Coverage for msstools/number.py: 100.00%

32 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2026-05-13 23:05 +0000

1 

2 

3def number_sentences(data: str) -> str: 

4 """ 

5 Number <S> sentence tags within <P> blocks, restarting at 1 after each </P>. 

6 

7 Args: 

8 data (str): The input text containing XML-like markup. 

9 

10 Returns: 

11 str: The text with <S> tags numbered like <S 1>, <S 2>, etc. 

12 """ 

13 data_len = len(data) 

14 result = [] 

15 s_num = 1 

16 in_tag = False 

17 tag_start = 0 

18 current_index = 0 

19 

20 while current_index < data_len: 

21 char = data[current_index] 

22 

23 if not in_tag and char != "<": 

24 result.append(char) 

25 current_index += 1 

26 continue 

27 

28 if char == "<": 

29 in_tag = True 

30 tag_start = current_index 

31 current_index += 1 

32 continue 

33 

34 if in_tag and char == ">": 

35 tag_end = current_index 

36 current_tag = data[tag_start:tag_end + 1] 

37 

38 if "/P" in current_tag: 

39 s_num = 1 

40 elif current_tag.startswith("<S") and not current_tag.startswith("</"): 

41 current_tag = f"<S {s_num}>" 

42 s_num += 1 

43 

44 result.append(current_tag) 

45 in_tag = False 

46 current_index += 1 

47 continue 

48 

49 current_index += 1 

50 

51 return ''.join(result)