-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* added dna fetcher * fixed testing * added ncbi blastn running dna results * latest update * added parallel test * max runnning error * all in working order * updated doc * added missing import --------- Co-authored-by: Niklas Abraham - INFlux <[email protected]> Co-authored-by: Max Häußler <[email protected]>
- Loading branch information
1 parent
4610e89
commit da02f48
Showing
10 changed files
with
577 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import asyncio | ||
import json | ||
import logging | ||
from typing import List | ||
|
||
import nest_asyncio | ||
from rich.console import Console | ||
from rich.progress import Progress | ||
|
||
from pyeed.fetch.requester import AsyncRequester, AsyncParamRequester | ||
|
||
from .ncbidnamapper import NCBIDNAMapper | ||
|
||
LOGGER = logging.getLogger(__name__) | ||
|
||
|
||
class DNAFetcher: | ||
def __init__(self, ids: List[str]): | ||
self.ids = ids | ||
nest_asyncio.apply() | ||
|
||
async def fetch(self, **console_kwargs): | ||
""" | ||
Fetches DNA data from various databases based on the provided IDs. | ||
Parameters: | ||
force_terminal (bool): Whether to force the use of a terminal | ||
for progress tracking. | ||
Returns: | ||
List[dnarecord]: A list of dnaRecord objects containing the fetched dna data. | ||
Raises: | ||
Exception: If there is an error during the fetching process. | ||
""" | ||
# right now in the first batch version we just fetch from NCBI | ||
param_requester = None | ||
|
||
with Progress( | ||
console=Console(**console_kwargs), | ||
) as progress: | ||
requesters: List[AsyncRequester] = [] | ||
|
||
# | ||
task_id = progress.add_task( | ||
f"Requesting sequences from NCBI...", total=len(self.ids) | ||
) | ||
requesters.append( | ||
AsyncRequester( | ||
ids=self.ids, | ||
url="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&retmode=text&rettype=genbank&id=", | ||
task_id=task_id, | ||
progress=progress, | ||
batch_size=10, | ||
rate_limit=2, | ||
n_concurrent=5, | ||
) | ||
) | ||
|
||
|
||
responses = await asyncio.gather( | ||
*[requester.make_request() for requester in requesters] | ||
) | ||
|
||
|
||
# in case of multiple databases, identify the source of the data | ||
ncbi_responses, uniprot_response = self.identify_data_source(responses) | ||
|
||
# map data to objects | ||
ncbi_entries = NCBIDNAMapper().map(responses=ncbi_responses) | ||
|
||
return ncbi_entries | ||
|
||
|
||
def identify_data_source(self, responses: List[str]) -> tuple: | ||
""" | ||
Identifies the source of the data based on the response content. | ||
""" | ||
ncbi_responses = [] | ||
uniprot_response = [] | ||
|
||
for response in responses: | ||
if response[0].startswith("LOCUS"): | ||
ncbi_responses.append(response) | ||
else: | ||
uniprot_response.append(response) | ||
|
||
return ncbi_responses, uniprot_response | ||
|
||
|
||
|
Oops, something went wrong.