Coverage for msstools/tei.py: 100.00%
58 statements
« prev ^ index » next coverage.py v7.9.1, created at 2026-05-13 23:05 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2026-05-13 23:05 +0000
1import xml.etree.ElementTree as ET
2from pathlib import Path
4def csv_to_tei(input_csv:Path, output_xml:Path, dates:Path|None=None, max_readings:int=0):
5 root = ET.Element('TEI', xmlns="http://www.tei-c.org/ns/1.0")
6 teiHeader = ET.SubElement(root, 'teiHeader')
7 fileDesc = ET.SubElement(teiHeader, 'fileDesc')
8 titleStmt = ET.SubElement(fileDesc, 'titleStmt')
9 respStmt = ET.SubElement(titleStmt, 'respStmt')
10 resp = ET.SubElement(respStmt, 'resp')
11 resp.text = "Collated using msstools"
13 publicationStmt = ET.SubElement(fileDesc, 'publicationStmt')
14 ET.SubElement(publicationStmt, 'p').text = "Not for distribution."
16 sourceDesc = ET.SubElement(fileDesc, 'sourceDesc')
17 ET.SubElement(sourceDesc, 'p').text = f"Derived from `{input_csv.name}`"
19 listWit = ET.SubElement(sourceDesc, 'listWit')
21 body = ET.SubElement(root, 'body')
23 all_witnesses = set()
24 import csv
25 with open(input_csv) as f:
26 reader = csv.reader(f, delimiter=',')
27 for row in reader:
28 location = row[0]
29 if not location:
30 continue
31 app = ET.SubElement(body, 'app')
32 for reading_index, witnesses_for_reading in enumerate(row[1:]):
33 if max_readings and reading_index >= max_readings:
34 break
36 if witnesses_for_reading:
37 rdg = ET.SubElement(app, 'rdg', wit=witnesses_for_reading)
38 rdg.text = str(reading_index)
39 all_witnesses.update(witnesses_for_reading.strip().split())
42 if dates:
43 dates_dict = {}
44 with open(dates) as f:
45 reader = csv.reader(f, delimiter=',')
46 for row in reader:
47 siglum = row[0]
48 start = str(int(row[1]))
49 end = str(int(row[2]))
50 dates_dict[siglum] = (start, end)
52 all_witnesses = sorted(all_witnesses)
53 for siglum in all_witnesses:
54 witness = ET.SubElement(listWit, 'witness', n=siglum)
55 if dates:
56 start, end = None, None
57 if siglum in dates_dict:
58 start, end = dates_dict[siglum]
59 elif siglum.endswith("K") and siglum[:-1] in dates_dict:
60 start, end = dates_dict[siglum[:-1]]
62 if start is not None:
63 if start == end:
64 ET.SubElement(witness, 'origDate', when=start)
65 else:
66 ET.SubElement(witness, 'origDate', notBefore=start, notAfter=end)
67 else:
68 print(f"Witness {siglum} not in dates")
70 tree = ET.ElementTree(root)
71 ET.indent(tree, space="\t", level=0)
72 tree.write(output_xml, encoding="utf-8")