Coverage for rdgai/export.py: 100.00%

1from pathlib import Path

2from openpyxl import Workbook

3from openpyxl.worksheet.datavalidation import DataValidation

4from openpyxl.styles import Font

5import pandas as pd

7from .apparatus import Doc

10def export_variants_to_excel(doc:Doc, output:Path):

11 """ Export the variants to an Excel file."""

12 wb = Workbook()

13 header_font = Font(bold=True)

15 relation_types = doc.relation_types

17 # Rename the default sheet

18 ws = wb.active

19 ws.title = 'Variants'

21 headers = [

22 'App ID', 'Context',

23 'Active Reading ID', 'Passive Reading ID',

24 'Active Reading Text', 'Passive Reading Text',

25 'Description', 'Relation Type(s)',

26 ]

28 for col_num, header in enumerate(headers, start=1): # Start from column A (1)

29 cell = ws.cell(row=1, column=col_num)

30 cell.value = header

31 cell.font = header_font

33 current_row = 2

34 for app in doc.apps:

35 for pair in app.non_redundant_pairs:

36 ws[f'A{current_row}'] = str(app)

37 ws[f'B{current_row}'] = app.text_in_context()

38 ws[f'C{current_row}'] = pair.active.n

39 ws[f'D{current_row}'] = pair.passive.n

40 ws[f'E{current_row}'] = pair.active.text

41 ws[f'F{current_row}'] = pair.passive.text

42 ws[f'G{current_row}'] = pair.get_description()

44 for relation_type_index, relation_type in enumerate(pair.types):

45 column = ord('H') + relation_type_index

46 ws[f'{chr(column)}{current_row}'] = str(relation_type)

48 current_row += 1

50 data_val = DataValidation(type="list",formula1=f'"{",".join(relation_types.keys())}"')

51 ws.add_data_validation(data_val)

53 max_relation_types = max(10, max(len(pair.types) for app in doc.apps for pair in app.pairs))

54 end_column = chr(ord('H') + max_relation_types - 1)

56 data_val.add(f"H2:{end_column}{current_row}")

58 # Create new sheet with descriptions of categories and counts

59 categories_worksheet = wb.create_sheet('Categories')

61 # Add a header to the "Category" column

62 headers = ['Category', 'Inverse', 'Count', 'Inverse Count', 'Total', 'Description']

63 for col_num, header in enumerate(headers, start=1):

64 cell = categories_worksheet.cell(row=1, column=col_num)

65 cell.value = header

66 cell.font = header_font

68 # Populate the categories from relation_types.keys()

69 for idx, category in enumerate(relation_types.values(), start=2): # Start from row 2

70 category_name = str(category)

71 inverse_name = str(category.inverse) if category.inverse else category_name

72 categories_worksheet[f'A{idx}'] = category_name

73 categories_worksheet[f'B{idx}'] = inverse_name

74 categories_worksheet[f'C{idx}'] = f'=COUNTIF(Variants!G:{end_column}, "{category_name}")'

75 categories_worksheet[f'D{idx}'] = f'=COUNTIF(Variants!G:{end_column}, "{inverse_name}")'

76 categories_worksheet[f'E{idx}'] = f'=SUM(C{idx}:D{idx})'

77 categories_worksheet[f'F{idx}'] = category.description

79 wb.save(output)

82def import_classifications_from_dataframe(doc:Doc, variants_df:pd.DataFrame, output:Path, responsible:str|None=None):

83 variants_df.fillna('', inplace=True)

84 apps_dict = {str(app): app for app in doc.apps}

85 relation_types = doc.relation_types

86 for _, row in variants_df.iterrows():

87 app_id = row['App ID']

88 active_reading_id = row['Active Reading ID']

89 passive_reading_id = row['Passive Reading ID']

90 description = row['Description'].strip() if 'Description' in row else None

91 app = apps_dict[app_id]

93 types = set(row[key] for key in row.keys() if (key.startswith('Relation Type') or key.startswith('Unnamed: ')) and row[key])

94 # for type in types:

95 # assert type in relation_types, f'{type} not in {relation_types.keys()}'

96 types = set(relation_types[type] for type in types if type in relation_types)

98 for pair in app.pairs:

99 if str(pair.active.n) == str(active_reading_id) and str(pair.passive.n) == str(passive_reading_id):

100 # Add relations

101 for type in types - pair.types:

102 pair.add_type_with_inverse(type, responsible=responsible)

103

104 # Remove relations

105 for type in pair.types - types:

106 pair.remove_type_with_inverse(type)

107

108 if description:

109 pair.add_description(description)

110 elif description == "":

111 # remove description if it is an empty string

112 # don't do anything if description is 'None'

113 pair.remove_description()

114

115 doc.write(output)