Coverage for msstools/strings.py: 100.00%

19 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2026-05-13 23:05 +0000

1from pathlib import Path 

2import unicodedata 

3 

4def remove_accents(input_text: str) -> str: 

5 """Removes accents from a string.""" 

6 normalized_text = unicodedata.normalize('NFKD', input_text) 

7 text_without_accents = ''.join([c for c in normalized_text if unicodedata.category(c) != 'Mn']) 

8 return text_without_accents 

9 

10 

11def remove_accents_from_file(input_file_path: str|Path, output_file_path: str|Path) -> None: 

12 """Reads a file, removes accents from each line, and writes the cleaned lines to a new file.""" 

13 assert input_file_path, f"Input file path must be provided." 

14 assert output_file_path, f"Output file path must be provided." 

15 

16 assert input_file_path != output_file_path, "Input and output file paths must be different." 

17 

18 input_file_path = Path(input_file_path) 

19 assert input_file_path.exists(), f"Input file '{input_file_path}' does not exist." 

20 

21 # Ensure the output directory exists 

22 output_file_path = Path(output_file_path) 

23 output_file_path.parent.mkdir(parents=True, exist_ok=True) 

24 

25 with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file: 

26 for line in input_file: 

27 cleaned_line = remove_accents(line) 

28 output_file.write(cleaned_line) 

29 print(f"Accents removed and saved to {output_file_path}")