-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added code for function gapfill_analysis #52
- Loading branch information
Showing
1 changed file
with
74 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,82 @@ | ||
# Function originally from refineGEMs.genecomp/refineGEMs.KEGG_analysis --- Modified | ||
def compare_gene_lists(gps_in_model: pd.DataFrame, db_genes: pd.DataFrame) -> pd.DataFrame: | ||
|
||
in_db = db_genes.set_index('Locus_tag') | ||
in_model = gps_in_model.set_index(0) | ||
genes_in_db_not_in_model = in_db[~in_db.index.isin(in_model.index)] | ||
return genes_in_db_not_in_model.reset_index() | ||
#!/usr/bin/env python | ||
from libsbml import * | ||
import refinegems.analysis_db as rga | ||
import refinegems.analysis_kegg as rga_kegg | ||
import refinegems.analysis_biocyc as rga_biocyc | ||
import refinegems.entities as rge | ||
from colorama import init as colorama_init | ||
from colorama import Fore | ||
|
||
def get_genes_from_gff(): | ||
pass | ||
__author__ = "Famke Baeuerle and Gwendolyn O. Gusak" | ||
|
||
|
||
def compare_model_to_gff(): | ||
pass | ||
def get_genes_from_gff(): | ||
pass | ||
|
||
|
||
def get_related_metabs_reactions_blast(): | ||
pass | ||
pass | ||
|
||
|
||
def gff_gene_comp(): | ||
pass | ||
|
||
def gapfill(): | ||
''' Main function to gapfill a model with comparison to KEGG/BioCyc/(Genbank) GFF file | ||
''' | ||
pass | ||
|
||
def gapfill_analysis(model_libsbml: Model, gapfill_params: dict[str: str]): | ||
""" Main function to gapfill a model with comparison to KEGG/BioCyc/(Genbank) GFF file | ||
Args: | ||
model_libsbml (Model): model loaded with libSBML | ||
gapfill_params (dict): Dictionary obtained from YAML file containing the parameter mappings | ||
""" | ||
colorama_init(autoreset=True) | ||
db_to_compare = gapfill_params['db_to_compare'] | ||
|
||
if db_to_compare not in ['KEGG', 'BioCyc', 'GFF', 'KEGG+BioCyc']: | ||
print(f'{Fore.RED}To use the module gapfill the parameter of db_to_compare has to be set to one of the following' | ||
+ ' options:\n- \'KEGG\'\n- \'BioCyc\'\n- \'GFF\'\n- \'KEGG+BioCyc\'\nAdditionally, the required parameters' | ||
+ ' for each option need to be specified.\n- \'biggreactions\' and \'gapfill\' are required for all options.' | ||
+ '\n- \'organismid\' is required only for the options \'KEGG\' and \'KEGG+BioCyc\'.\n- \'biocyc_tables\'' | ||
+ ' is only required for the options \'BioCyc\' and \'KEGG+BioCyc\'.') | ||
return | ||
|
||
if db_to_compare == 'KEGG': | ||
if gapfill_params['organismid']: | ||
missing_kegg = rga_kegg.kegg_gene_comp(model_libsbml, | ||
gapfill_params['organismid'], | ||
gapfill_params['bigg_dbs'][0], | ||
gapfill_params['gff_file'] | ||
) | ||
return missing_kegg | ||
else: | ||
print(f'{Fore.RED}To use the KEGG comparison the specification of the organismid is obligatory.\n' + | ||
'If there is no organismid available for your organism in KEGG, use one of the options \'BioCyc\' or \'GFF\'.') | ||
|
||
elif db_to_compare == 'BioCyc': | ||
missing_biocyc = rga_biocyc.biocyc_gene_comp(model_libsbml, | ||
gapfill_params['biocyc_files'], | ||
gapfill_params['bigg_dbs'] | ||
) | ||
return missing_biocyc | ||
|
||
elif db_to_compare == 'GFF': | ||
gff_genes = gff_gene_comp(model_libsbml, | ||
gapfill_params['bigg_dbs'], | ||
gapfill_params['gff_file'] | ||
) | ||
return gff_genes | ||
|
||
elif db_to_compare == 'KEGG+BioCyc': | ||
missing_kegg_reacs = rga_kegg.kegg_gene_comp(model_libsbml, | ||
gapfill_params['organismid'], | ||
gapfill_params['bigg_dbs'][0], | ||
gapfill_params['gff_file'] | ||
) | ||
missing_kegg_reacs.drop(['name', 'locus_tag', 'EC'], axis=1, inplace=True) | ||
missing_biocyc = rga_biocyc.biocyc_gene_comp(model_libsbml, | ||
gapfill_params['biocyc_files'], | ||
gapfill_params['bigg_dbs'] | ||
) | ||
stats, missing_biocyc_genes, missing_biocyc_metabs, missing_metabs_wo_BiGG_df, missing_biocyc_reacs = missing_biocyc | ||
missing_combined_reacs = missing_biocyc_reacs.merge(missing_kegg_reacs, how='left', on='bigg_id') | ||
return (stats, missing_biocyc_genes, missing_biocyc_metabs, missing_metabs_wo_BiGG_df, missing_combined_reacs, missing_kegg_reacs) |