-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetTaxonomyfromNCBI.py
36 lines (32 loc) · 1.18 KB
/
getTaxonomyfromNCBI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import sys
import zipfile
import pandas as pd
from pprint import pprint
from datetime import datetime
from collections import defaultdict, Counter
from IPython.display import display
import matplotlib.pyplot as plt
plt.style.use('ggplot')
try:
import ncbi.datasets
assembly_accessions = []
datasets = pd.read_csv('126_accessions.txt')
for i in range(0, 125):
assembly_accessions.append([datasets.values[i,j] for j in range(0, 1)])
for data in assembly_accessions:
print(data)
## start an api_instance
api_instance = ncbi.datasets.GenomeApi(ncbi.datasets.ApiClient())
genome_summary = api_instance.assembly_descriptors_by_accessions(data, limit='all')
## print other information
for assembly in map(lambda d: d.assembly, genome_summary.assemblies):
print(
assembly.assembly_accession,
assembly.display_name,
assembly.org.sci_name,
assembly.assembly_level,
len(assembly.chromosomes),
assembly.submission_date,
sep='\t')
except ImportError:
print('ncbi.datasets module not found. To install, run `pip install ncbi-datasets-pylib`.')