Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import pandas as pd
2from . import refseq
3import asyncio
6def preprocess(categories=None, base_dir=None, max_files=None, file_indexes=None):
7 if not categories:
8 categories = refseq.REFSEQ_CATEGORIES
10 if isinstance(categories, str):
11 categories = [categories]
13 dfs = []
14 for name in categories:
15 print(f"Preprocessing {name}", flush=True)
16 category = refseq.RefSeqCategory(name=name, max_files=max_files, base_dir=base_dir)
17 category_df = category.write_h5(file_indexes=file_indexes)
18 dfs.append(category_df)
20 df = pd.concat(dfs, ignore_index=True)
22 return df
25def download(categories=None, base_dir=None, max_files=None):
26 if not categories:
27 categories = refseq.REFSEQ_CATEGORIES
29 if isinstance(categories, str):
30 categories = [categories]
32 for name in categories:
33 category = refseq.RefSeqCategory(name=name, max_files=max_files, base_dir=base_dir)
34 print(f"Downloading raw files for {category}")
35 category.download_all()
36 # asyncio.run(category.download_all())