Coverage for msstools/split.py: 100.00%

72 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2026-05-13 23:05 +0000

1from pathlib import Path 

2import shutil 

3from PIL import Image 

4 

5 

6def split_images( 

7 prefix: Path, 

8 images: list[Path], 

9 rtl: bool = False, 

10 overlap: float = 10.0, 

11 skip: int = 0, 

12 margin_left: int = 0, 

13 margin_right: int = 0, 

14 force: bool = False, 

15 recto: list[str] | None = None, 

16 ignore: list[str]| set[str] | None = None, 

17): 

18 """ 

19 Split images into left and right parts. 

20 """ 

21 recto_by_image = _parse_recto_refs(recto) 

22 output_index = 0 

23 output_width = _number_width(images_count=len(images), skip=skip) 

24 current_folio: int | None = None 

25 

26 prefix = Path(prefix) 

27 prefix.parent.mkdir(parents=True, exist_ok=True) 

28 

29 ignore = set(ignore) if ignore is not None else set() 

30 

31 for image_index, image_path in enumerate(images): 

32 suffix = image_path.suffix 

33 if image_index < skip: 

34 path = _output_path(prefix, output_index, output_width, "", suffix) 

35 if not path.exists() or force: 

36 shutil.copy(image_path, path) 

37 output_index += 1 

38 continue 

39 

40 if image_path.name in ignore: 

41 print(f"Skipping {image_path.name} (in ignore list)") 

42 continue 

43 

44 print(f"Splitting {image_path.name}") 

45 with Image.open(image_path) as img: 

46 if margin_left or margin_right: 

47 width, height = img.size 

48 img = img.crop((margin_left, 0, width - margin_right, height)) 

49 

50 width, height = img.size 

51 overlap_px = int(width * (overlap / 100.0)) 

52 half = width // 2 

53 

54 # The overlap is centered around the midpoint 

55 left_crop = (0, 0, half + overlap_px // 2, height) 

56 right_crop = (half - overlap_px // 2, 0, width, height) 

57 

58 if rtl: 

59 verso_img = img.crop(right_crop) 

60 recto_img = img.crop(left_crop) 

61 else: 

62 verso_img = img.crop(left_crop) 

63 recto_img = img.crop(right_crop) 

64 

65 verso_ref = f"-{current_folio}v" if current_folio is not None else "" 

66 verso_path = _output_path( 

67 prefix, output_index, output_width, verso_ref, suffix 

68 ) 

69 print("\tVerso", verso_path) 

70 output_index += 1 

71 

72 recto_folio = recto_by_image.get(image_path.name) 

73 if recto_folio is None and current_folio is not None: 

74 recto_folio = current_folio + 1 

75 

76 recto_ref = f"-{recto_folio}r" if recto_folio is not None else "" 

77 recto_path = _output_path( 

78 prefix, output_index, output_width, recto_ref, suffix 

79 ) 

80 print("\tRecto", recto_path) 

81 output_index += 1 

82 

83 if recto_folio is not None: 

84 current_folio = recto_folio 

85 

86 if not verso_path.exists() or force: 

87 verso_img.save(verso_path) 

88 if not recto_path.exists() or force: 

89 recto_img.save(recto_path) 

90 

91 

92def _parse_recto_refs(recto: list[str] | None = None) -> dict[str, int]: 

93 recto_by_image: dict[str, int] = {} 

94 for value in recto or []: 

95 try: 

96 filename, folio = value.split("=", 1) 

97 recto_by_image[Path(filename).name] = int(folio) 

98 except ValueError as error: 

99 raise ValueError( 

100 f"Invalid recto reference {value!r}. Expected FILENAME=FOLIO." 

101 ) from error 

102 

103 return recto_by_image 

104 

105 

106def _number_width(images_count: int, skip: int) -> int: 

107 output_count = min(skip, images_count) + max(0, images_count - skip) * 2 

108 highest_index = max(0, output_count - 1) 

109 return len(str(highest_index)) 

110 

111 

112def _format_number(number: int, width: int) -> str: 

113 return f"{number:0{width}d}" 

114 

115 

116def _output_path( 

117 prefix: Path, 

118 output_index: int, 

119 output_width: int, 

120 folio_ref: str, 

121 suffix: str, 

122) -> Path: 

123 number = _format_number(output_index, output_width) 

124 return prefix.parent / f"{prefix.name}-{number}{folio_ref}{suffix}"