Coverage for rdgai/export.py: 100.00%
75 statements
« prev ^ index » next coverage.py v7.6.4, created at 2025-01-03 01:37 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2025-01-03 01:37 +0000
1from pathlib import Path
2from openpyxl import Workbook
3from openpyxl.worksheet.datavalidation import DataValidation
4from openpyxl.styles import Font
5import pandas as pd
7from .apparatus import Doc
10def export_variants_to_excel(doc:Doc, output:Path):
11 """ Export the variants to an Excel file."""
12 wb = Workbook()
13 header_font = Font(bold=True)
15 relation_types = doc.relation_types
17 # Rename the default sheet
18 ws = wb.active
19 ws.title = 'Variants'
21 headers = [
22 'App ID', 'Context',
23 'Active Reading ID', 'Passive Reading ID',
24 'Active Reading Text', 'Passive Reading Text',
25 'Description', 'Relation Type(s)',
26 ]
28 for col_num, header in enumerate(headers, start=1): # Start from column A (1)
29 cell = ws.cell(row=1, column=col_num)
30 cell.value = header
31 cell.font = header_font
33 current_row = 2
34 for app in doc.apps:
35 for pair in app.non_redundant_pairs:
36 ws[f'A{current_row}'] = str(app)
37 ws[f'B{current_row}'] = app.text_in_context()
38 ws[f'C{current_row}'] = pair.active.n
39 ws[f'D{current_row}'] = pair.passive.n
40 ws[f'E{current_row}'] = pair.active.text
41 ws[f'F{current_row}'] = pair.passive.text
42 ws[f'G{current_row}'] = pair.get_description()
44 for relation_type_index, relation_type in enumerate(pair.types):
45 column = ord('H') + relation_type_index
46 ws[f'{chr(column)}{current_row}'] = str(relation_type)
48 current_row += 1
50 data_val = DataValidation(type="list",formula1=f'"{",".join(relation_types.keys())}"')
51 ws.add_data_validation(data_val)
53 max_relation_types = max(10, max(len(pair.types) for app in doc.apps for pair in app.pairs))
54 end_column = chr(ord('H') + max_relation_types - 1)
56 data_val.add(f"H2:{end_column}{current_row}")
58 # Create new sheet with descriptions of categories and counts
59 categories_worksheet = wb.create_sheet('Categories')
61 # Add a header to the "Category" column
62 headers = ['Category', 'Inverse', 'Count', 'Inverse Count', 'Total', 'Description']
63 for col_num, header in enumerate(headers, start=1):
64 cell = categories_worksheet.cell(row=1, column=col_num)
65 cell.value = header
66 cell.font = header_font
68 # Populate the categories from relation_types.keys()
69 for idx, category in enumerate(relation_types.values(), start=2): # Start from row 2
70 category_name = str(category)
71 inverse_name = str(category.inverse) if category.inverse else category_name
72 categories_worksheet[f'A{idx}'] = category_name
73 categories_worksheet[f'B{idx}'] = inverse_name
74 categories_worksheet[f'C{idx}'] = f'=COUNTIF(Variants!G:{end_column}, "{category_name}")'
75 categories_worksheet[f'D{idx}'] = f'=COUNTIF(Variants!G:{end_column}, "{inverse_name}")'
76 categories_worksheet[f'E{idx}'] = f'=SUM(C{idx}:D{idx})'
77 categories_worksheet[f'F{idx}'] = category.description
79 wb.save(output)
82def import_classifications_from_dataframe(doc:Doc, variants_df:pd.DataFrame, output:Path, responsible:str|None=None):
83 variants_df.fillna('', inplace=True)
84 apps_dict = {str(app): app for app in doc.apps}
85 relation_types = doc.relation_types
86 for _, row in variants_df.iterrows():
87 app_id = row['App ID']
88 active_reading_id = row['Active Reading ID']
89 passive_reading_id = row['Passive Reading ID']
90 description = row['Description'].strip() if 'Description' in row else None
91 app = apps_dict[app_id]
93 types = set(row[key] for key in row.keys() if (key.startswith('Relation Type') or key.startswith('Unnamed: ')) and row[key])
94 # for type in types:
95 # assert type in relation_types, f'{type} not in {relation_types.keys()}'
96 types = set(relation_types[type] for type in types if type in relation_types)
98 for pair in app.pairs:
99 if str(pair.active.n) == str(active_reading_id) and str(pair.passive.n) == str(passive_reading_id):
100 # Add relations
101 for type in types - pair.types:
102 pair.add_type_with_inverse(type, responsible=responsible)
104 # Remove relations
105 for type in pair.types - types:
106 pair.remove_type_with_inverse(type)
108 if description:
109 pair.add_description(description)
110 elif description == "":
111 # remove description if it is an empty string
112 # don't do anything if description is 'None'
113 pair.remove_description()
115 doc.write(output)