Coverage for seqbank/main.py: 100.00%

69 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-12-02 04:29 +0000

1import typer 

2from pathlib import Path 

3from rich.progress import track 

4import plotly.io as pio 

5 

6from .refseq import get_refseq_urls 

7from .seqbank import SeqBank 

8from .dfam import download_dfam 

9 

10pio.kaleido.scope.mathjax = None 

11 

12 

13app = typer.Typer(pretty_exceptions_enable=False) 

14 

15 

16@app.command() 

17def add(path: Path, files: list[Path], format: str = "", filter: Path = None) -> None: 

18 """Add sequences from a file or list of files to a SeqBank. 

19 

20 Args: 

21 path (Path): The path to the SeqBank. 

22 files (list[Path]): A list of file paths containing sequences. 

23 format (str, optional): The format of the sequence files. Defaults to "". 

24 filter (Path, optional): A filter file for sequences. Defaults to None. 

25 """ 

26 print(f"Opening seqbank '{path}'") 

27 seqbank = SeqBank(path=path, write=True) 

28 seqbank.add_files(files, format=format, filter=filter) 

29 

30 

31@app.command() 

32def add_sequence_from_file( 

33 path: Path, 

34 accession:str, 

35 file: Path, 

36 format: str = "", 

37) -> None: 

38 """ 

39 Add a sequence from a file with a single sequence to a SeqBank. 

40 

41 Args: 

42 path (Path): The path to the SeqBank. 

43 files (list[Path]): A list of file paths containing sequences. 

44 format (str, optional): The format of the sequence files. Defaults to "". 

45 filter (Path, optional): A filter file for sequences. Defaults to None. 

46 """ 

47 print(f"Opening seqbank '{path}'") 

48 seqbank = SeqBank(path=path, write=True) 

49 seqbank.add_sequence_from_file(accession, file, format=format) 

50 print(f"Added {accession} from {file.name}") 

51 

52 

53@app.command() 

54def url(path: Path, urls: list[str], format: str = "", max: int = 0, workers: int = -1, tmp_dir: Path = None) -> None: 

55 """Add sequences from a list of URLs to a SeqBank. 

56 

57 Args: 

58 path (Path): The path to the SeqBank. 

59 urls (list[str]): A list of URLs containing sequences. 

60 format (str, optional): The format of the sequence files. Defaults to "". 

61 max (int, optional): Maximum number of sequences to add. Defaults to 0 (all). 

62 workers (int, optional): Number of workers to use for downloading. Defaults to -1. 

63 tmp_dir (Path, optional): Temporary directory for downloads. Defaults to None. 

64 """ 

65 print(f"Opening seqbank '{path}'") 

66 seqbank = SeqBank(path=path, write=True) 

67 

68 seqbank.add_urls(urls, format=format, max=max, workers=workers, tmp_dir=tmp_dir) 

69 

70 

71@app.command() 

72def delete(path: Path, accessions: list[str]) -> None: 

73 """Delete sequences from a SeqBank. 

74 

75 Args: 

76 path (Path): The path to the SeqBank. 

77 accessions (list[str]): A list of accessions to delete from the SeqBank. 

78 """ 

79 print(f"Opening seqbank '{path}'") 

80 seqbank = SeqBank(path=path, write=True) 

81 

82 for accession in track(accessions, "Deleting"): 

83 seqbank.delete(accession) 

84 

85 

86@app.command() 

87def refseq(path: Path, max: int = 0, workers: int = -1, tmp_dir: Path = None) -> None: 

88 """Download all RefSeq sequences to a SeqBank. 

89 

90 Args: 

91 path (Path): The path to the SeqBank. 

92 max (int, optional): Maximum number of sequences to add. Defaults to 0 (all). 

93 workers (int, optional): Number of workers to use for downloading. Defaults to -1. 

94 tmp_dir (Path, optional): Temporary directory for downloads. Defaults to None. 

95 """ 

96 print("Getting RefSeq files list") 

97 return url(path, get_refseq_urls(tmp_dir=tmp_dir), max=max, workers=workers, tmp_dir=tmp_dir) 

98 

99 

100@app.command() 

101def dfam(path: Path, release: str = "current", curated: bool = True) -> bool: 

102 """Download DFam sequences to a SeqBank. 

103 

104 Args: 

105 path (Path): The path to the SeqBank. 

106 release (str, optional): The DFam release version to download. Defaults to "current". 

107 curated (bool, optional): Whether to download curated sequences. Defaults to True. 

108 

109 Returns: 

110 bool: True if the download and addition were successful, False otherwise. 

111 """ 

112 print("Getting DFam") 

113 seqbank = SeqBank(path=path, write=True) 

114 return download_dfam(seqbank, release=release, curated=curated) 

115 

116 

117@app.command() 

118def ls(path: Path) -> None: 

119 """List accessions in a SeqBank. 

120 

121 Args: 

122 path (Path): The path to the SeqBank. 

123 """ 

124 seqbank = SeqBank(path=path) 

125 seqbank.ls() 

126 

127 

128@app.command() 

129def count(path: Path) -> None: 

130 """Display the number of accessions in a SeqBank. 

131 

132 Args: 

133 path (Path): The path to the SeqBank. 

134 """ 

135 seqbank = SeqBank(path=path) 

136 print(len(seqbank)) 

137 

138 

139@app.command() 

140def cp(path: Path, new: Path) -> None: 

141 """Copy each sequence from one SeqBank to another. 

142 

143 Args: 

144 path (Path): The path to the source SeqBank. 

145 new (Path): The path to the destination SeqBank. 

146 """ 

147 print(f"Copying seqbank '{path}' to '{new}'") 

148 seqbank = SeqBank(path=path) 

149 new = SeqBank(path=new, write=True) 

150 seqbank.copy(new) 

151 

152 

153@app.command() 

154def export(path: Path, output: Path, format: str = "fasta") -> None: 

155 """Export a SeqBank to a specified format. 

156 

157 Args: 

158 path (Path): The path to the SeqBank. 

159 output (Path): The path to save the exported sequences. 

160 format (str, optional): The format for the exported sequences. Defaults to "fasta". 

161 """ 

162 print(f"Exporting seqbank '{path}' to '{output}' in {format} format") 

163 seqbank = SeqBank(path=path) 

164 return seqbank.export(output, format=format) 

165 

166 

167@app.command() 

168def histogram(path: Path, output_path: Path = None, show: bool = False, nbins: int = 30, min:int=0, max:int=0) -> None: 

169 """Generate a histogram of sequence lengths from a SeqBank. 

170 

171 Args: 

172 path (Path): The path to the SeqBank. 

173 output_path (Path, optional): The path to save the histogram. If None, the histogram will be displayed. 

174 show (bool, optional): Whether to display the histogram. Defaults to False. 

175 nbins (int, optional): The number of bins for the histogram. Defaults to 30. 

176 """ 

177 # Load the SeqBank 

178 seqbank = SeqBank(path=path) 

179 

180 # Generate the histogram 

181 fig = seqbank.histogram(nbins=nbins, min=min, max=max) 

182 

183 # Save the histogram to the specified output path 

184 if output_path is None: 

185 show = True 

186 else: 

187 fig.write_image(output_path) 

188 print(f"Histogram saved to {output_path}") 

189 

190 if show: 

191 fig.show()