Coverage for rdgai/export.py: 100.00%

75 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2025-01-03 01:37 +0000

1from pathlib import Path 

2from openpyxl import Workbook 

3from openpyxl.worksheet.datavalidation import DataValidation 

4from openpyxl.styles import Font 

5import pandas as pd 

6 

7from .apparatus import Doc 

8 

9 

10def export_variants_to_excel(doc:Doc, output:Path): 

11 """ Export the variants to an Excel file.""" 

12 wb = Workbook() 

13 header_font = Font(bold=True) 

14 

15 relation_types = doc.relation_types 

16 

17 # Rename the default sheet 

18 ws = wb.active 

19 ws.title = 'Variants' 

20 

21 headers = [ 

22 'App ID', 'Context', 

23 'Active Reading ID', 'Passive Reading ID', 

24 'Active Reading Text', 'Passive Reading Text', 

25 'Description', 'Relation Type(s)', 

26 ] 

27 

28 for col_num, header in enumerate(headers, start=1): # Start from column A (1) 

29 cell = ws.cell(row=1, column=col_num) 

30 cell.value = header 

31 cell.font = header_font 

32 

33 current_row = 2 

34 for app in doc.apps: 

35 for pair in app.non_redundant_pairs: 

36 ws[f'A{current_row}'] = str(app) 

37 ws[f'B{current_row}'] = app.text_in_context() 

38 ws[f'C{current_row}'] = pair.active.n 

39 ws[f'D{current_row}'] = pair.passive.n 

40 ws[f'E{current_row}'] = pair.active.text 

41 ws[f'F{current_row}'] = pair.passive.text 

42 ws[f'G{current_row}'] = pair.get_description() 

43 

44 for relation_type_index, relation_type in enumerate(pair.types): 

45 column = ord('H') + relation_type_index 

46 ws[f'{chr(column)}{current_row}'] = str(relation_type) 

47 

48 current_row += 1 

49 

50 data_val = DataValidation(type="list",formula1=f'"{",".join(relation_types.keys())}"') 

51 ws.add_data_validation(data_val) 

52 

53 max_relation_types = max(10, max(len(pair.types) for app in doc.apps for pair in app.pairs)) 

54 end_column = chr(ord('H') + max_relation_types - 1) 

55 

56 data_val.add(f"H2:{end_column}{current_row}") 

57 

58 # Create new sheet with descriptions of categories and counts 

59 categories_worksheet = wb.create_sheet('Categories') 

60 

61 # Add a header to the "Category" column 

62 headers = ['Category', 'Inverse', 'Count', 'Inverse Count', 'Total', 'Description'] 

63 for col_num, header in enumerate(headers, start=1): 

64 cell = categories_worksheet.cell(row=1, column=col_num) 

65 cell.value = header 

66 cell.font = header_font 

67 

68 # Populate the categories from relation_types.keys() 

69 for idx, category in enumerate(relation_types.values(), start=2): # Start from row 2 

70 category_name = str(category) 

71 inverse_name = str(category.inverse) if category.inverse else category_name 

72 categories_worksheet[f'A{idx}'] = category_name 

73 categories_worksheet[f'B{idx}'] = inverse_name 

74 categories_worksheet[f'C{idx}'] = f'=COUNTIF(Variants!G:{end_column}, "{category_name}")' 

75 categories_worksheet[f'D{idx}'] = f'=COUNTIF(Variants!G:{end_column}, "{inverse_name}")' 

76 categories_worksheet[f'E{idx}'] = f'=SUM(C{idx}:D{idx})' 

77 categories_worksheet[f'F{idx}'] = category.description 

78 

79 wb.save(output) 

80 

81 

82def import_classifications_from_dataframe(doc:Doc, variants_df:pd.DataFrame, output:Path, responsible:str|None=None): 

83 variants_df.fillna('', inplace=True) 

84 apps_dict = {str(app): app for app in doc.apps} 

85 relation_types = doc.relation_types 

86 for _, row in variants_df.iterrows(): 

87 app_id = row['App ID'] 

88 active_reading_id = row['Active Reading ID'] 

89 passive_reading_id = row['Passive Reading ID'] 

90 description = row['Description'].strip() if 'Description' in row else None 

91 app = apps_dict[app_id] 

92 

93 types = set(row[key] for key in row.keys() if (key.startswith('Relation Type') or key.startswith('Unnamed: ')) and row[key]) 

94 # for type in types: 

95 # assert type in relation_types, f'{type} not in {relation_types.keys()}' 

96 types = set(relation_types[type] for type in types if type in relation_types) 

97 

98 for pair in app.pairs: 

99 if str(pair.active.n) == str(active_reading_id) and str(pair.passive.n) == str(passive_reading_id): 

100 # Add relations 

101 for type in types - pair.types: 

102 pair.add_type_with_inverse(type, responsible=responsible) 

103 

104 # Remove relations 

105 for type in pair.types - types: 

106 pair.remove_type_with_inverse(type) 

107 

108 if description: 

109 pair.add_description(description) 

110 elif description == "": 

111 # remove description if it is an empty string 

112 # don't do anything if description is 'None' 

113 pair.remove_description() 

114 

115 doc.write(output)