Coverage for msstools/tei.py: 100.00%

58 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2026-05-13 23:05 +0000

1import xml.etree.ElementTree as ET 

2from pathlib import Path 

3 

4def csv_to_tei(input_csv:Path, output_xml:Path, dates:Path|None=None, max_readings:int=0): 

5 root = ET.Element('TEI', xmlns="http://www.tei-c.org/ns/1.0") 

6 teiHeader = ET.SubElement(root, 'teiHeader') 

7 fileDesc = ET.SubElement(teiHeader, 'fileDesc') 

8 titleStmt = ET.SubElement(fileDesc, 'titleStmt') 

9 respStmt = ET.SubElement(titleStmt, 'respStmt') 

10 resp = ET.SubElement(respStmt, 'resp') 

11 resp.text = "Collated using msstools" 

12 

13 publicationStmt = ET.SubElement(fileDesc, 'publicationStmt') 

14 ET.SubElement(publicationStmt, 'p').text = "Not for distribution." 

15 

16 sourceDesc = ET.SubElement(fileDesc, 'sourceDesc') 

17 ET.SubElement(sourceDesc, 'p').text = f"Derived from `{input_csv.name}`" 

18 

19 listWit = ET.SubElement(sourceDesc, 'listWit') 

20 

21 body = ET.SubElement(root, 'body') 

22 

23 all_witnesses = set() 

24 import csv 

25 with open(input_csv) as f: 

26 reader = csv.reader(f, delimiter=',') 

27 for row in reader: 

28 location = row[0] 

29 if not location: 

30 continue 

31 app = ET.SubElement(body, 'app') 

32 for reading_index, witnesses_for_reading in enumerate(row[1:]): 

33 if max_readings and reading_index >= max_readings: 

34 break 

35 

36 if witnesses_for_reading: 

37 rdg = ET.SubElement(app, 'rdg', wit=witnesses_for_reading) 

38 rdg.text = str(reading_index) 

39 all_witnesses.update(witnesses_for_reading.strip().split()) 

40 

41 

42 if dates: 

43 dates_dict = {} 

44 with open(dates) as f: 

45 reader = csv.reader(f, delimiter=',') 

46 for row in reader: 

47 siglum = row[0] 

48 start = str(int(row[1])) 

49 end = str(int(row[2])) 

50 dates_dict[siglum] = (start, end) 

51 

52 all_witnesses = sorted(all_witnesses) 

53 for siglum in all_witnesses: 

54 witness = ET.SubElement(listWit, 'witness', n=siglum) 

55 if dates: 

56 start, end = None, None 

57 if siglum in dates_dict: 

58 start, end = dates_dict[siglum] 

59 elif siglum.endswith("K") and siglum[:-1] in dates_dict: 

60 start, end = dates_dict[siglum[:-1]] 

61 

62 if start is not None: 

63 if start == end: 

64 ET.SubElement(witness, 'origDate', when=start) 

65 else: 

66 ET.SubElement(witness, 'origDate', notBefore=start, notAfter=end) 

67 else: 

68 print(f"Witness {siglum} not in dates") 

69 

70 tree = ET.ElementTree(root) 

71 ET.indent(tree, space="\t", level=0) 

72 tree.write(output_xml, encoding="utf-8")