Coverage for seqbank/main.py: 100.00%
69 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-02 04:29 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-02 04:29 +0000
1import typer
2from pathlib import Path
3from rich.progress import track
4import plotly.io as pio
6from .refseq import get_refseq_urls
7from .seqbank import SeqBank
8from .dfam import download_dfam
10pio.kaleido.scope.mathjax = None
13app = typer.Typer(pretty_exceptions_enable=False)
16@app.command()
17def add(path: Path, files: list[Path], format: str = "", filter: Path = None) -> None:
18 """Add sequences from a file or list of files to a SeqBank.
20 Args:
21 path (Path): The path to the SeqBank.
22 files (list[Path]): A list of file paths containing sequences.
23 format (str, optional): The format of the sequence files. Defaults to "".
24 filter (Path, optional): A filter file for sequences. Defaults to None.
25 """
26 print(f"Opening seqbank '{path}'")
27 seqbank = SeqBank(path=path, write=True)
28 seqbank.add_files(files, format=format, filter=filter)
31@app.command()
32def add_sequence_from_file(
33 path: Path,
34 accession:str,
35 file: Path,
36 format: str = "",
37) -> None:
38 """
39 Add a sequence from a file with a single sequence to a SeqBank.
41 Args:
42 path (Path): The path to the SeqBank.
43 files (list[Path]): A list of file paths containing sequences.
44 format (str, optional): The format of the sequence files. Defaults to "".
45 filter (Path, optional): A filter file for sequences. Defaults to None.
46 """
47 print(f"Opening seqbank '{path}'")
48 seqbank = SeqBank(path=path, write=True)
49 seqbank.add_sequence_from_file(accession, file, format=format)
50 print(f"Added {accession} from {file.name}")
53@app.command()
54def url(path: Path, urls: list[str], format: str = "", max: int = 0, workers: int = -1, tmp_dir: Path = None) -> None:
55 """Add sequences from a list of URLs to a SeqBank.
57 Args:
58 path (Path): The path to the SeqBank.
59 urls (list[str]): A list of URLs containing sequences.
60 format (str, optional): The format of the sequence files. Defaults to "".
61 max (int, optional): Maximum number of sequences to add. Defaults to 0 (all).
62 workers (int, optional): Number of workers to use for downloading. Defaults to -1.
63 tmp_dir (Path, optional): Temporary directory for downloads. Defaults to None.
64 """
65 print(f"Opening seqbank '{path}'")
66 seqbank = SeqBank(path=path, write=True)
68 seqbank.add_urls(urls, format=format, max=max, workers=workers, tmp_dir=tmp_dir)
71@app.command()
72def delete(path: Path, accessions: list[str]) -> None:
73 """Delete sequences from a SeqBank.
75 Args:
76 path (Path): The path to the SeqBank.
77 accessions (list[str]): A list of accessions to delete from the SeqBank.
78 """
79 print(f"Opening seqbank '{path}'")
80 seqbank = SeqBank(path=path, write=True)
82 for accession in track(accessions, "Deleting"):
83 seqbank.delete(accession)
86@app.command()
87def refseq(path: Path, max: int = 0, workers: int = -1, tmp_dir: Path = None) -> None:
88 """Download all RefSeq sequences to a SeqBank.
90 Args:
91 path (Path): The path to the SeqBank.
92 max (int, optional): Maximum number of sequences to add. Defaults to 0 (all).
93 workers (int, optional): Number of workers to use for downloading. Defaults to -1.
94 tmp_dir (Path, optional): Temporary directory for downloads. Defaults to None.
95 """
96 print("Getting RefSeq files list")
97 return url(path, get_refseq_urls(tmp_dir=tmp_dir), max=max, workers=workers, tmp_dir=tmp_dir)
100@app.command()
101def dfam(path: Path, release: str = "current", curated: bool = True) -> bool:
102 """Download DFam sequences to a SeqBank.
104 Args:
105 path (Path): The path to the SeqBank.
106 release (str, optional): The DFam release version to download. Defaults to "current".
107 curated (bool, optional): Whether to download curated sequences. Defaults to True.
109 Returns:
110 bool: True if the download and addition were successful, False otherwise.
111 """
112 print("Getting DFam")
113 seqbank = SeqBank(path=path, write=True)
114 return download_dfam(seqbank, release=release, curated=curated)
117@app.command()
118def ls(path: Path) -> None:
119 """List accessions in a SeqBank.
121 Args:
122 path (Path): The path to the SeqBank.
123 """
124 seqbank = SeqBank(path=path)
125 seqbank.ls()
128@app.command()
129def count(path: Path) -> None:
130 """Display the number of accessions in a SeqBank.
132 Args:
133 path (Path): The path to the SeqBank.
134 """
135 seqbank = SeqBank(path=path)
136 print(len(seqbank))
139@app.command()
140def cp(path: Path, new: Path) -> None:
141 """Copy each sequence from one SeqBank to another.
143 Args:
144 path (Path): The path to the source SeqBank.
145 new (Path): The path to the destination SeqBank.
146 """
147 print(f"Copying seqbank '{path}' to '{new}'")
148 seqbank = SeqBank(path=path)
149 new = SeqBank(path=new, write=True)
150 seqbank.copy(new)
153@app.command()
154def export(path: Path, output: Path, format: str = "fasta") -> None:
155 """Export a SeqBank to a specified format.
157 Args:
158 path (Path): The path to the SeqBank.
159 output (Path): The path to save the exported sequences.
160 format (str, optional): The format for the exported sequences. Defaults to "fasta".
161 """
162 print(f"Exporting seqbank '{path}' to '{output}' in {format} format")
163 seqbank = SeqBank(path=path)
164 return seqbank.export(output, format=format)
167@app.command()
168def histogram(path: Path, output_path: Path = None, show: bool = False, nbins: int = 30, min:int=0, max:int=0) -> None:
169 """Generate a histogram of sequence lengths from a SeqBank.
171 Args:
172 path (Path): The path to the SeqBank.
173 output_path (Path, optional): The path to save the histogram. If None, the histogram will be displayed.
174 show (bool, optional): Whether to display the histogram. Defaults to False.
175 nbins (int, optional): The number of bins for the histogram. Defaults to 30.
176 """
177 # Load the SeqBank
178 seqbank = SeqBank(path=path)
180 # Generate the histogram
181 fig = seqbank.histogram(nbins=nbins, min=min, max=max)
183 # Save the histogram to the specified output path
184 if output_path is None:
185 show = True
186 else:
187 fig.write_image(output_path)
188 print(f"Histogram saved to {output_path}")
190 if show:
191 fig.show()