Coverage for msstools/strings.py: 100.00%
19 statements
« prev ^ index » next coverage.py v7.9.1, created at 2026-05-13 23:05 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2026-05-13 23:05 +0000
1from pathlib import Path
2import unicodedata
4def remove_accents(input_text: str) -> str:
5 """Removes accents from a string."""
6 normalized_text = unicodedata.normalize('NFKD', input_text)
7 text_without_accents = ''.join([c for c in normalized_text if unicodedata.category(c) != 'Mn'])
8 return text_without_accents
11def remove_accents_from_file(input_file_path: str|Path, output_file_path: str|Path) -> None:
12 """Reads a file, removes accents from each line, and writes the cleaned lines to a new file."""
13 assert input_file_path, f"Input file path must be provided."
14 assert output_file_path, f"Output file path must be provided."
16 assert input_file_path != output_file_path, "Input and output file paths must be different."
18 input_file_path = Path(input_file_path)
19 assert input_file_path.exists(), f"Input file '{input_file_path}' does not exist."
21 # Ensure the output directory exists
22 output_file_path = Path(output_file_path)
23 output_file_path.parent.mkdir(parents=True, exist_ok=True)
25 with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
26 for line in input_file:
27 cleaned_line = remove_accents(line)
28 output_file.write(cleaned_line)
29 print(f"Accents removed and saved to {output_file_path}")