Skip to content

Commit

Permalink
Merge pull request #135 from draeger-lab/hot-fixes-2.0-pre-release
Browse files Browse the repository at this point in the history
Hot fixes for 2.0.0a0 pre-release
  • Loading branch information
GwennyGit authored Sep 1, 2024
2 parents dd50de8 + 25e6990 commit 47f5aa3
Show file tree
Hide file tree
Showing 8 changed files with 1,461 additions and 250 deletions.
839 changes: 615 additions & 224 deletions dev/gapfill-testing.ipynb

Large diffs are not rendered by default.

786 changes: 786 additions & 0 deletions dev/tutorial.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
author = 'Famke Bäuerle, Gwendolyn O. Döbel and Carolin Brune'

# The full version, including alpha/beta/rc tags
release = '2.0.0-alpha.0'
release = '2.0.0-alpha.1'


# -- General configuration ---------------------------------------------------
Expand Down
7 changes: 0 additions & 7 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,6 @@ Welcome to refineGEMs!
======================================
``refineGEMs`` is a Python-based toolbox for the curation and analysis of genome-scale metabolic models (GEMS).

.. warning::
| *Will be deprecated from version 2.0.0 onwards.*
| Due to massive restructuring and extension, all functions have been moved into new modules.
| If you used any functions from refineGEMs until now and want to use version 2.0.0 in the future, make sure to check your code after the version change.
| For the main pipeline in main.py see `SPECIMEN <https://github.com/draeger-lab/SPECIMEN>`__ as it will be deprecated from version 2.0.0 onwards.
| Beware that SPECIMEN only runs with refineGEMs 2.0.0 (starting with developmental versions).
Overview
--------

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "refineGEMs"
version = "2.0.0-alpha.0"
version = "2.0.0-alpha.1"
requires-python = ">=3.10, <3.12"
authors = [
{name = "Famke Baeuerle", email = "[email protected]"},
Expand Down
6 changes: 4 additions & 2 deletions src/refinegems/analysis/growth.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,8 @@ def test_auxotrophies(model:cobraModel, media_list:list[Medium], supplement_list
if len(entry) == 0:
warn_str = f'Amino acid {a} has no identifier for your chosen namespace {namespace}. Please contact support if you want to add one.'
warnings.warn(warn_str)
auxotrophies[a] = m.optimize().objective_value
growth_res = m.optimize()
auxotrophies[a] = growth_res.objective_value if growth_res.status == 'optimal' else 0.0
else:

# create and check IDs for the chosen namespace
Expand Down Expand Up @@ -834,7 +835,8 @@ def test_auxotrophies(model:cobraModel, media_list:list[Medium], supplement_list
m.reactions.get_by_id(exchange_reac).lower_bound = 0.0
m.reactions.get_by_id(exchange_reac).upper_bound = 0.0
# and calculate the new objective
auxotrophies[a] = m.optimize().objective_value
growth_res = m.optimize()
auxotrophies[a] = growth_res.objective_value if growth_res.status == 'optimal' else 0.0

# add the current test results to the list of all results
results[med.name] = auxotrophies
Expand Down
48 changes: 33 additions & 15 deletions src/refinegems/classes/gapfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def _map_to_mnx(biocyc_reacs: pd.DataFrame) -> Union[tuple[pd.DataFrame, pd.Data
# Transform id column to only contain BioCyc/MetaCyc IDs
mnx2biocyc_reacs['id'] = mnx2biocyc_reacs['id'].str.split(':', n=1).str[-1]

# Crop table to contain BiGG IDs set per BioCyc ID
# Crop table to contain MetaNetX IDs set per BioCyc ID
mnx_as_list = mnx2biocyc_reacs.groupby('id')['MetaNetX'].apply(set).reset_index(name='MetaNetX')
mnx2biocyc_reacs.drop('MetaNetX', axis=1, inplace=True)
mnx2biocyc_reacs = mnx_as_list.merge(mnx2biocyc_reacs, on='id')
Expand All @@ -235,11 +235,24 @@ def _map_to_mnx(biocyc_reacs: pd.DataFrame) -> Union[tuple[pd.DataFrame, pd.Data
# Step 2.1: Clean-up of table containing MetaNetX IDs
# ---------------------------------------------------
if not mnx_reacs.empty:
# Replace id with MetaNetX ID
mnx_reacs.rename(
{'id': 'metacyc.reaction', 'MetaNetX': 'id'},
axis=1, inplace=True
)
# Rename id to metacyc.reaction for simpler addition to column reference
mnx_reacs.rename({'id': 'metacyc.reaction'}, axis=1, inplace=True)

# Turn MetaNetX column in single value, rename column to id
# & if multiple MetaNetX IDs exist add to column alias
mnx_reacs['id'] = mnx_reacs['MetaNetX'].map(list).str.get(0)
mnx_reacs['alias'] = mnx_reacs.apply(
lambda x:
x['MetaNetX'].remove(x['id']) if len(x['MetaNetX']) != 1 else None,
axis=1
)

# Specify origin of identified missing reactions &
# Move BioCyc ID & origin to reference
mnx_reacs['origin'] = 'BioCyc'
mnx_reacs['reference'] = mnx_reacs[
['metacyc.reaction', 'origin', 'alias']
].to_dict('records')

# Create list of EC codes in column ec-code_x,
# Join both ec-code columns into one & Create a set of ec-codes
Expand All @@ -249,15 +262,12 @@ def _map_to_mnx(biocyc_reacs: pd.DataFrame) -> Union[tuple[pd.DataFrame, pd.Data
).map(set).map(list)
mnx_reacs['ec-code'] = (mnx_reacs['ec-code_x'] + mnx_reacs['ec-code_y']).map(set).map(list)

# Specify origin of identified missing reactions &
# Move BioCyc ID, origin & ec-code to reference
mnx_reacs['origin'] = 'BioCyc'
mnx_reacs['reference'] = mnx_reacs[['metacyc.reaction', 'origin']].to_dict('records')

# Drop all unnecessary columns
mnx_reacs.drop(
['origin', 'metacyc.reaction', 'ec-code_x', 'ec-code_y', 'equation'],
axis=1, inplace=True
[
'origin', 'metacyc.reaction', 'MetaNetX', 'alias', 'ec-code_x',
'ec-code_y', 'equation'
], axis=1, inplace=True
)

# Replace equation with mnx_equation
Expand Down Expand Up @@ -796,8 +806,8 @@ def add_gene_reac_associations_from_table(self,model:libModel,

# get each unique ncbiprotein vs reaction mapping
reac_table = reac_table[['ncbiprotein','add_to_GPR']]
reac_table = reac_table.explode('ncbiprotein').explode('add_to_GPR')
reac_table.drop_duplicates(inplace=True)
reac_table = reac_table.explode('ncbiprotein')
reac_table.drop_duplicates(subset='ncbiprotein', inplace=True)

# add the genes to the corresponding GPRs
for idx,row in reac_table.iterrows():
Expand Down Expand Up @@ -985,6 +995,13 @@ def fill_model(self, model:Union[cobra.Model,libModel],
libModel:
The gap-filled model.
"""

# Filter out reactions without ncbiprotein
# @DISCUSSION
# @TODO Add also reactions without GPR to model
self.manual_curation['Reactions no GPR'] = self.missing_reactions[self.missing_reactions['ncbiprotein'].isnull()]
self.missing_reactions = self.missing_reactions[~self.missing_reactions['ncbiprotein'].isnull()]

# filter out duplicates genes to avoid duplicates IDs in the model
# @TODO: Better idea to add duplicate genes
if len(self.missing_genes) != len(self.missing_genes['ncbiprotein'].unique()):
Expand Down Expand Up @@ -1014,6 +1031,7 @@ def fill_model(self, model:Union[cobra.Model,libModel],
genes_with_reacs_in_model = self.missing_genes[self.missing_genes['ncbiprotein'].isin(ncbiprot_with_reacs_in_model)]
self._statistics['genes']['added'] = self._statistics['genes']['added'] + len(genes_with_reacs_in_model)
if len(genes_with_reacs_in_model) > 0:

# add genes as gene products to model
self.add_genes_from_table(model, genes_with_reacs_in_model)

Expand Down
21 changes: 21 additions & 0 deletions src/refinegems/utility/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,9 @@ def build_reaction_mnx(model:cobra.Model, id:str,
list:
List of matching reaction IDs (in model).
"""

# Get origin if not reaction was found missing with MetaNetX
origin = references.pop('origin') if 'origin' in references.keys() else None

# ---------------------
# check, if ID in model
Expand Down Expand Up @@ -1121,6 +1124,12 @@ def build_reaction_mnx(model:cobra.Model, id:str,
# update reactions direction, if MetaCyc has better information
if db == 'metacyc.reaction' and len(db_matches[db_matches['source'].str.contains('-->')]):
new_reac.bounds = (0.0,cobra.Configuration().upper_bound)
# get alias annotations for MetaNetX if available
if 'alias' in references.keys():
if references['alias'] != None:
references['metanetx.reaction'] = list(references['alias'])
del references['alias']

# add additional references from the parameter
_add_annotations_from_dict_cobra(references,new_reac)

Expand All @@ -1130,6 +1139,7 @@ def build_reaction_mnx(model:cobra.Model, id:str,

# add notes
# ---------
if origin: new_reac.notes['found with'] = f'refineGEMs GapFiller, {origin}'
new_reac.notes['created with'] = 'refineGEMs GapFiller, MetaNetX'

# match ID to namespace
Expand Down Expand Up @@ -1348,6 +1358,9 @@ def build_reaction_bigg(model:cobra.Model, id:str,
list:
List of matching reaction IDs (in model).
"""

# Get origin if not reaction was found missing with MetaNetX
origin = references.pop('origin') if 'origin' in references.keys() else None

# ---------------------
# check, if ID in model
Expand Down Expand Up @@ -1413,11 +1426,17 @@ def build_reaction_bigg(model:cobra.Model, id:str,
# add infos from BiGG
new_reac.annotation['bigg.reaction'] = [id]
_add_annotations_from_bigg_reac_row(bigg_reac_info, new_reac)
# get alias annotations for MetaNetX if available
if 'alias' in references.keys():
if references['alias'] != None:
references['metanetx.reaction'] = list(references['alias'])
del references['alias']
# add additional references from the parameter
_add_annotations_from_dict_cobra(references,new_reac)

# add notes
# ---------
if origin: new_reac.notes['found with'] = f'refineGEMs GapFiller, {origin}'
new_reac.notes['created with'] = 'refineGEMs GapFiller, BiGG'

# match ID to namespace
Expand Down Expand Up @@ -1596,6 +1615,7 @@ def old_create_gp(model: libModel, model_id: str, name: str, locus_tag: str, pro
# @NEW - substitues the one above
# --> WARNING: Output is different now
# @TODO generalise addition of references -> maybe kwargs
# @TODO: Check if ncbiprotein leads to valid ID -> Otherwise, replace invalid chars with '_'
def create_gp(model:libModel, protein_id:str,
model_id:str=None,
name:str=None, locus_tag:str=None,
Expand Down Expand Up @@ -1634,6 +1654,7 @@ def create_gp(model:libModel, protein_id:str,
gp.setSBOTerm('SBO:0000243') # SBOterm
gp.setMetaId(f'meta_G_{protein_id}') # Meta ID
# test for NCBI/RefSeq
id_db = None
if re.fullmatch('^(((AC|AP|NC|NG|NM|NP|NR|NT|NW|WP|XM|XP|XR|YP|ZP)_\d+)|(NZ_[A-Z]{2,4}\d+))(\.\d+)?$', protein_id, re.IGNORECASE):
id_db = 'REFSEQ'
elif re.fullmatch('^(\w+\d+(\.\d+)?)|(NP_\d+)$', protein_id, re.IGNORECASE): id_db = 'NCBI'
Expand Down

0 comments on commit 47f5aa3

Please sign in to comment.