Coverage for rdgai/validation.py: 100.00%

20 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2025-01-03 01:37 +0000

1import random 

2from pathlib import Path 

3from rich.console import Console 

4import llmloader 

5 

6from .apparatus import Doc, Pair 

7from .classification import classify, DEFAULT_MODEL_ID 

8from .evaluation import evaluate_docs 

9 

10 

11def validate( 

12 ground_truth:Doc, 

13 output:Path, 

14 validation_pairs:list[Pair]|None=None, 

15 verbose:bool=False, 

16 proportion:float=0.5, 

17 seed:int=42, 

18 api_key:str="", 

19 llm:str=DEFAULT_MODEL_ID, 

20 temperature:float=0.1, 

21 examples:int=10, 

22 console:Console|None=None, 

23 confusion_matrix:Path|None=None, 

24 confusion_matrix_plot:Path|None=None, 

25 report:Path|None=None, 

26): 

27 """ 

28 Partitions the classified pairs in the document and uses a proportion for examples and the remainder for classification. 

29 Then it evaluates the classifications and writes a report. 

30 """ 

31 ground_truth.write(output) 

32 doc = Doc(output) 

33 

34 llm = llmloader.load(model=llm, api_key=api_key, temperature=temperature) 

35 

36 # Find pairs to classify 

37 if not validation_pairs: 

38 random.seed(seed) 

39 classified_pairs = doc.get_classified_pairs(redundant=False) 

40 validation_pairs = random.sample(classified_pairs, int(len(classified_pairs) * proportion)) 

41 validation_pairs = sorted(validation_pairs, key=lambda pair: (str(pair.active.app), pair.active.n, pair.passive.n)) 

42 

43 # Remove classifications from validation pairs 

44 for pair in validation_pairs: 

45 pair.remove_all_types() 

46 

47 # Classify pairs 

48 classify( 

49 doc, 

50 output, 

51 pairs=validation_pairs, 

52 verbose=verbose, 

53 llm=llm, 

54 examples=examples, 

55 console=console, 

56 ) 

57 

58 # Evaluate classifications 

59 evaluate_docs( 

60 doc, 

61 ground_truth, 

62 pairs=validation_pairs, 

63 confusion_matrix=confusion_matrix, 

64 confusion_matrix_plot=confusion_matrix_plot, 

65 report=report, 

66 examples=examples, 

67 llm=llm, 

68 ) 

69