Coverage for msstools/combine.py: 100.00%

28 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2026-05-13 23:05 +0000

1from pathlib import Path 

2import re 

3import img2pdf 

4import pikepdf 

5 

6 

7def create_pdf( 

8 image_paths: list[Path], 

9 output_pdf: Path, 

10 strip_pattern: str = "", 

11) -> None: 

12 image_paths = [Path(p) for p in image_paths] 

13 

14 if not image_paths: 

15 raise ValueError("No image paths provided.") 

16 

17 def folio_label(path: Path) -> str: 

18 label = path.stem 

19 if strip_pattern: 

20 label = re.sub(strip_pattern, "", label) 

21 return label 

22 

23 # img2pdf embeds JPEG/JPEG2000 losslessly where possible. 

24 # It does not resample or recompress images. 

25 pdf_bytes = img2pdf.convert([str(p) for p in image_paths]) 

26 

27 output_pdf.parent.mkdir(parents=True, exist_ok=True) 

28 

29 tmp_pdf = output_pdf.with_suffix(".tmp.pdf") 

30 tmp_pdf.write_bytes(pdf_bytes) 

31 

32 pdf = pikepdf.Pdf.open(tmp_pdf) 

33 

34 if len(pdf.pages) != len(image_paths): 

35 raise RuntimeError( 

36 f"Expected {len(image_paths)} pages, got {len(pdf.pages)}." 

37 ) 

38 

39 with pdf.open_outline() as outline: 

40 outline.root.clear() 

41 

42 for page_index, image_path in enumerate(image_paths): 

43 label = folio_label(image_path) 

44 print(label) 

45 outline.root.append( 

46 pikepdf.OutlineItem(label, page_index) 

47 ) 

48 

49 pdf.save(output_pdf) 

50 tmp_pdf.unlink()