diff --git a/dev/gapfill-testing.ipynb b/dev/gapfill-testing.ipynb index 416f508..b5eac63 100644 --- a/dev/gapfill-testing.ipynb +++ b/dev/gapfill-testing.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -66,117 +66,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ec-codencbiproteinidequationreferenceis_transportviaadd_to_GPR
05.6.2.2[WP_011274363.1]MNXR1156321 MNXM12437@MNXD1 + 1 MNXM40333@MNXD1 + 1 MNXM...metacycR:5.99.1.3-RXNNoneMetaNetXNone
15.6.2.2[WP_011274363.1]MNXR1728941 MNXM1100221@MNXD1 + 1 MNXM40333@MNXD1 + 1 MN...sabiorkR:15120NoneMetaNetXNone
25.6.2.2[WP_011274363.1]MNXR1728951 MNXM1100221@MNXD1 + 1 MNXM735047@MNXD1 + 1 M...sabiorkR:15121NoneMetaNetXNone
35.6.2.2[WP_011274363.1]MNXR1728961 MNXM1100223@MNXD1 + 1 MNXM40333@MNXD1 + 1 MN...sabiorkR:15122NoneMetaNetXNone
\n", - "
" - ], - "text/plain": [ - " ec-code ncbiprotein id \\\n", - "0 5.6.2.2 [WP_011274363.1] MNXR115632 \n", - "1 5.6.2.2 [WP_011274363.1] MNXR172894 \n", - "2 5.6.2.2 [WP_011274363.1] MNXR172895 \n", - "3 5.6.2.2 [WP_011274363.1] MNXR172896 \n", - "\n", - " equation reference \\\n", - "0 1 MNXM12437@MNXD1 + 1 MNXM40333@MNXD1 + 1 MNXM... metacycR:5.99.1.3-RXN \n", - "1 1 MNXM1100221@MNXD1 + 1 MNXM40333@MNXD1 + 1 MN... sabiorkR:15120 \n", - "2 1 MNXM1100221@MNXD1 + 1 MNXM735047@MNXD1 + 1 M... sabiorkR:15121 \n", - "3 1 MNXM1100223@MNXD1 + 1 MNXM40333@MNXD1 + 1 MN... sabiorkR:15122 \n", - "\n", - " is_transport via add_to_GPR \n", - "0 None MetaNetX None \n", - "1 None MetaNetX None \n", - "2 None MetaNetX None \n", - "3 None MetaNetX None " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapped_res[1]" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", @@ -202,68 +95,7 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ncbiproteinlocus_tagec-codeUniProt
4WP_011274363.1SH00055.6.2.2[Q5HK03, Q8CQK4, Q6GKU0, Q2FKQ1, Q6GD85, P0A0K...
\n", - "
" - ], - "text/plain": [ - " ncbiprotein locus_tag ec-code \\\n", - "4 WP_011274363.1 SH0005 5.6.2.2 \n", - "\n", - " UniProt \n", - "4 [Q5HK03, Q8CQK4, Q6GKU0, Q2FKQ1, Q6GD85, P0A0K... " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapped_res[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -271,6 +103,8 @@ "from itertools import chain\n", "import re\n", "from refinegems.utility.cvterms import add_cv_term_genes\n", + "from libsbml import FbcOr, FbcAnd, GeneProductRef\n", + "import warnings\n", "\n", "\n", "# @TODO merge with the function of the same name in entities, if possible\n", @@ -318,10 +152,75 @@ " uniprot=(x['UniProt'],True))\n", " \n", "\n", + "# @TODO : does it cover indeed all cases\n", + "# Where to sort it -> entities?\n", "def create_gpr(reaction,gene):\n", - " # Case 1:\n", - " pass\n", "\n", + " # Step 1: test, if there is already a gpr\n", + " # ---------------------------------------\n", + " old_association_str = None\n", + " old_association_fbc = None\n", + " if reaction.getPlugin(0).getGeneProductAssociation():\n", + " old_association = reaction.getPlugin(0).getGeneProductAssociation().getListOfAllElements()\n", + " # case 1: only a single association\n", + " if len(old_association) == 1 and isinstance(old_association[0],GeneProductRef):\n", + " old_association_str = old_association[0].getGeneProduct()\n", + " # case 2: nested structure of asociations\n", + " elif isinstance(old_association[0], FbcOr) or isinstance(old_association[0], FbcAnd):\n", + " old_association_fbc = old_association[0].clone()\n", + " # this should get the highest level association (that includes all others)\n", + "\n", + " \n", + " # Step 2: create new gene product association \n", + " # -------------------------------------------\n", + " if old_association_str and isinstance(gene,str):\n", + " gene = [old_association_str,id]\n", + " elif old_association_str and isinstance(gene,list):\n", + " gene.append(old_association_str)\n", + " \n", + " # add the old association rule as an 'OR' (if needed)\n", + " if not old_association_fbc:\n", + " new_association = reaction.getPlugin(0).createGeneProductAssociation()\n", + " else:\n", + " new_association = reaction.getPlugin(0).createGeneProductAssociation().createOr()\n", + " new_association.addAssociation(old_association_fbc)\n", + "\n", + " # add the remaining genes \n", + " # @TODO currently, only connection possible is 'OR'\n", + " if isinstance(gene,str):\n", + " new_association.createGeneProductRef().setGeneProduct(gene)\n", + " elif isinstance(gene,list) and len(gene) == 1:\n", + " new_association.createGeneProductRef().setGeneProduct(gene[0])\n", + " elif isinstance(gene,list) and len(gene) > 1:\n", + " gpa_or = new_association.createOr()\n", + " for i in gene:\n", + " gpa_or.createGeneProductRef().setGeneProduct(i)\n", + " \n", + "\n", + "# @TODO seems very ridgid, beter ways to find the ids?\n", + "# probably sort into GapFiller\n", + "def add_gene_reac_associations_from_table(model,reac_table:pd.DataFrame):\n", + " \n", + " model_gene_ids = [_.getId() for _ in model.getPlugin(0).getListOfGeneProducts()]\n", + " \n", + " # get each unique ncbiprotein vs reaction mapping\n", + " reac_table = reac_table[['ncbiprotein','add_to_GPR']]\n", + " reac_table = reac_table.explode('ncbiprotein').explode('add_to_GPR')\n", + " reac_table.drop_duplicates(inplace=True)\n", + " \n", + " # add the genes to the corresponding GPRs\n", + " for idx,row in reac_table.iterrows():\n", + " # check, if G_+ncbiprotein in model\n", + " # if yes, add gpr\n", + " geneid = 'G_'+row['ncbiprotein'].replace('.','_')\n", + " reacid = 'R_'+row['add_to_GPR']\n", + " if geneid in model_gene_ids:\n", + " create_gpr(model.getReaction(reacid),geneid)\n", + " # else, print warning\n", + " else:\n", + " mes = f'Cannot find {geneid} in model. Should be added to {reacid}'\n", + " warnings.warn(mes,UserWarning)\n", + " \n", "\n", "def fill_model(model, missing_genes:pd.DataFrame, \n", " missing_reacs:pd.DataFrame):\n", @@ -338,9 +237,8 @@ " add_genes_from_table(model, genes_with_reacs_in_model)\n", " \n", " # extend gene production rules \n", - " # @TODO\n", - " # add_gene_reac_associations_from_table(model,....)\n", - " \n", + " add_gene_reac_associations_from_table(model,reacs_in_model)\n", + " \n", " # what remains:\n", " missing_reacs = missing_reacs[missing_reacs['add_to_GPR'].isnull()]\n", " missing_genes = missing_genes[~(missing_genes['ncbiprotein'].isin(ncbiprot_with_reacs_in_model))]\n", @@ -351,12 +249,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "# [*chain(*list(mapped_res[1][~mapped_res[1]['add_to_GPR']]['ncbiprotein']))]\n", "testmodel = model.clone()\n", + "# print(testmodel.getReaction('R_12DGR160tipp').getPlugin(0).getGeneProductAssociation().getListOfAllElements())\n", "testcase = mapped_res[1].copy()\n", "testcase.iloc[2,-1] = ['12DGR160tipp']\n", "fill_model(testmodel,mapped_res[0],testcase)\n", @@ -365,109 +264,264 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, "outputs": [ { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", - "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", - "\u001b[1;31mClick here for more info. \n", - "\u001b[1;31mView Jupyter log for further details." - ] + "data": { + "text/plain": [ + "'G_WP_011274363_1'" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "from libsbml import FbcOr, FbcAnd\n", + "testmodel.getReaction('R_12DGR160tipp').getPlugin(0).getGeneProductAssociation().getListOfAllElements()[0].getGeneProduct()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1 MNXM1100221@MNXD1 + 1 MNXM735047@MNXD1 + 1 MNXM9@MNXD1 = 1 MNXM1100222@MNXD1 + 1 MNXM286@MNXD1'" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "testcase.iloc[2,3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Further ideas and Code snippets for the filling part\n", "\n", - "testmodel = model.clone()\n", - "x = 113 # 45 : None, 46 : One, 113 Or\n", - "id = 'WP_011274363_1'\n", - "reac = testmodel.getListOfReactions()[x].getPlugin(0)\n", - "# connection = 'or'\n", - "# test, if there is already a gpr\n", - "old_association_str = None\n", - "old_association_fbc = None\n", - "if reac.getGeneProductAssociation():\n", - " old_association = reac.clone().getGeneProductAssociation().getListOfAllElements()\n", - " if len(old_association) == 1:\n", - " old_association_str = old_association[0].getGeneProduct()\n", - " else:\n", - " for el in old_association:\n", - " if isinstance(el, FbcOr) or isinstance(el, FbcAnd):\n", - " old_association_fbc = el # there should only be one object od this type -> @TODO check\n", - " break\n", - " \n", - "# create new gene product association \n", - "if old_association_str and isinstance(id,str):\n", - " id = [old_association_str,id]\n", - "elif old_association_str and isinstance(id,list):\n", - " id.append(old_association_str)\n", - " \n", - "# this does not work!!!!\n", - "# @IDEA: create a dummy gp e.g. a copy of the current one and copy it from there\n", - "if not old_association_fbc:\n", - " new_association = reac.createGeneProductAssociation()\n", - "else:\n", - " new_association = reac.createGeneProductAssociation().createOr()\n", - " new_association.addAssociation(old_association_fbc)\n", - " \n", - "if isinstance(id,str):\n", - " new_association.createGeneProductRef().setGeneProduct(id)\n", - "elif isinstance(id,list) and len(id) == 1:\n", - " new_association.createGeneProductRef().setGeneProduct(id[0])\n", - "elif isinstance(id,list) and len(id) > 1:\n", - " gpa_or = new_association.createOr()\n", - " for i in id:\n", - " gpa_or.createGeneProductRef().setGeneProduct(i)\n", - " \n", - " \n", + "##### how to build the new entities:\n", + "\n", + "- option a) collection all information first, filter and then add them from table\n", + "- option b) iteratively collection information and add entities (reaction after reaction)\n", + "\n", + "use libsbml or cobrapy?\n", + "\n", + "available functions:\n", + "- libsbml-based create_reaction/create_species (needs all information beforehand + all other entities need to be in the model) -> required for the gene labels\n", + "- cobra-based add_reaction/add_metabolite (builds as it goes), also match_id_to_namespace and \n", + "finding possible matches might be easier using COBRApy <- namespace and annotation stuff far easier here\n", "\n", - "print(reac.getGeneProductAssociation().getListOfAllElements())\n", - " " + "definitly needed:\n", + "- parse reaction string of different formats:\n", + " - MetaNetX (can get this somewhat from SPECIMEN)\n", + " - KEGG (also somewhat in SPECIMEN)\n", + " - BiGG (new?)\n", + " - BioCyc (new?)\n", + "- retrieve needed information from the required databases (reaction/metabolites)\n", + " - cross referencing, if one db not enough?\n", + "- filter for when to include reactions and when not (e.g. missing metabolites, formulas, DNA/RNA etc.) **This means, before adding stuff to the model, it needs to be validated**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reload a libsbml model into a cobra model" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1538\n" + "\n" ] } ], "source": [ - "testmodel = model.clone()\n", - "print(len(testmodel.getListOfReactions()))\n", - "testreac = testmodel.getListOfReactions()[113]\n", - "testreaccopy = testreac.clone()" + "from tempfile import NamedTemporaryFile\n", + "from refinegems.utility.io import write_model_to_file, load_model\n", + "\n", + "with NamedTemporaryFile(suffix='.xml') as tmp:\n", + " print(tmp)\n", + " write_model_to_file(model,tmp.name)\n", + " cobramodel = load_model(tmp.name,'cobra')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Parse reaction string" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'R_ADPT'" + "({'C00024': 1.0, 'C00025': 1.0}, {'C00010': 1.0, 'C00624': 1.0}, None, True)" ] }, - "execution_count": 10, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "testreac.getId()" + "\n", + "equation = '1 MNXM1100221@MNXD1 + 1 MNXM735047@MNXD1 + 1 MNXM9@MNXD1 = 1 MNXM1100222@MNXD1 + 1 MNXM286@MNXD1'\n", + "equation2 = 'aspsa_c + nadp_c + pi_c <-> 4pasp_c + h_c + nadph_c'\n", + "equation3 = 'C00024 + C00025 <=> C00010 + C00624'\n", + "\n", + "# @TODO: BioCyc missing\n", + "def parse_reac_str(equation, type='MetaNetX'):\n", + "\n", + " products = {}\n", + " reactants = {}\n", + " compartments = list()\n", + " is_product = False\n", + " reversible = True\n", + "\n", + " match type:\n", + " case 'MetaNetX':\n", + " for s in equation.split(' '):\n", + " # switch from reactants to products\n", + " if s == '=':\n", + " is_product = True\n", + " # found stoichiometric factor\n", + " elif s.isnumeric():\n", + " factor = float(s)\n", + " # skip\n", + " elif s == '+':\n", + " continue\n", + " # found metabolite\n", + " else:\n", + " # get information from MetaNetX\n", + " metabolite, compartment = s.split('@')\n", + " compartments.append(compartment)\n", + " \n", + " if is_product:\n", + " products[metabolite] = factor\n", + " else:\n", + " reactants[metabolite] = factor\n", + " \n", + " case 'BiGG':\n", + " factor = 1.0 # BiGG does not use factor 1 in the quations\n", + " for s in equation.split(' '):\n", + " # found factor\n", + " if s.isnumeric():\n", + " factor = float(s)\n", + " # switch from reactants to products\n", + " elif s == '-->' :\n", + " is_product = True\n", + " reversible = False\n", + " elif s == '<->':\n", + " is_product = True\n", + " # skip\n", + " elif s == '+':\n", + " continue\n", + " # found metabolite\n", + " else:\n", + " compartments.append(s.split('_')[1])\n", + " if is_product:\n", + " products[s] = factor\n", + " else:\n", + " reactants[s] = factor\n", + " factor = 1.0\n", + " \n", + " case 'KEGG':\n", + " compartments = None\n", + " factor = 1.0\n", + " for s in equation.split(' '):\n", + " if s.isnumeric():\n", + " factor = float(s)\n", + " elif s == '+':\n", + " continue\n", + " elif s == '<=>': # @TODO are there more options?\n", + " is_product = True\n", + " else:\n", + " if is_product:\n", + " products[s] = factor\n", + " else:\n", + " reactants[s] = factor\n", + " factor = 1.0\n", + " \n", + " case 'BioCyc':\n", + " pass\n", + " \n", + " return (reactants,products,compartments,reversible)\n", + " \n", + " \n", + " \n", + "parse_reac_str(equation3,'KEGG')" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### check, if a reaction should be added or not" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# originally from SPECIMEN HQTB\n", + "# @TODO\n", + "def isreaction_complete(reac:cobra.Reaction, \n", + " exclude_dna:bool=True, exclude_rna:bool=True) -> bool:\n", + "\n", + " # check reaction\n", + " if exclude_dna and 'DNA' in reac.name:\n", + " return False\n", + " if exclude_rna and 'RNA' in reac.name:\n", + " return False\n", + "\n", + " # check metabolites\n", + " for m in reac.metabolites:\n", + " if m.id == '' or pd.isnull(m.id):\n", + " return False\n", + " if m.name == '' or pd.isnull(m.name):\n", + " return False\n", + " if m.formula == '' or pd.isnull(m.formula):\n", + " return False\n", + "\n", + " return True\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### creating the reactions" ] }, { @@ -476,45 +530,543 @@ "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'R_ADPT'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "('MNXR104661', ec-code ncbiprotein id \\\n", + "194 4.2.1.- [WP_011274603.1] MNXR104661 \n", + "\n", + " equation reference \\\n", + "194 1 MNXM505@MNXD1 + 1 WATER@MNXD1 = 1 MNXM988@MNXD1 keggR:R08766 \n", + "\n", + " is_transport via add_to_GPR \n", + "194 None MetaNetX None )\n" + ] } ], "source": [ - "testreaccopy.getId()" + "genes_to_add = pd.DataFrame(columns=['ncbiprotein','reaction'])\n", + "# for every type of database\n", + "for t in mapped_res[1].groupby('via'):\n", + " # for every unique ID per database\n", + " for g in t.groupby('id'):\n", + " # try to rebuild the reaction\n", + "\n", + " # check, if reaction was build successfully\n", + " #\n", + " pass" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 170, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1538" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "len(testmodel.getListOfReactions())" + "# some decorators\n", + "def template(func):\n", + " def wrapper():\n", + " print('This function is a template for developers.\\nThis cannot be executed.')\n", + " return wrapper\n", + "\n", + "def implement(func):\n", + " def wrapper():\n", + " print('The current function is just a placeholder and will be implement in the fucture.')\n", + " return wrapper" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 171, "metadata": {}, + "outputs": [], + "source": [ + "from refinegems.utility.io import load_a_table_from_database\n", + "from refinegems.utility.entities import create_random_id, match_id_to_namespace\n", + "import cobra\n", + "import pandas as pd\n", + "from typing import Literal\n", + "from Bio.KEGG import REST, Compound\n", + "import urllib\n", + "\n", + "# @TODO : name is an issue\n", + "def get_BiGG_metabs_annotation_via_dbid(metabolite, id, dbcol, compartment):\n", + " if not 'bigg.metabolite' in metabolite.annotation.keys():\n", + " bigg_search = load_a_table_from_database(\n", + " f'SELECT * FROM bigg_metabolites WHERE \\'{dbcol}\\' = \\'{id}\\'',\n", + " query=True)\n", + " if len(bigg_search) > 0:\n", + " metabolite.annotation['bigg.metabolite'] = [_ for _ in bigg_search['id'].tolist() if _.endswith(f'_{compartment}')]\n", + " if len(metabolite.annotation['bigg.metabolite']) == 0:\n", + " metabolite.annotation.pop('bigg.metabolite')\n", + "\n", + "\n", + "def add_annotations_from_BiGG_metabs(metabolite:cobra.Metabolite):\n", + " if 'bigg.metabolite' in metabolite.annotation.keys():\n", + " bigg_information = load_a_table_from_database(\n", + " 'SELECT * FROM bigg_metabolites WHERE id = \\'' + f'\\' OR id = \\''.join(metabolite.annotation['bigg.metabolite']) + '\\'',\n", + " query=True)\n", + " db_id_bigg = {'BioCyc':'biocyc', 'MetaNetX (MNX) Chemical':'metanetx.chemical','SEED Compound':'seed.compound','CHEBI':'chebi', 'KEGG Compound':'kegg.compound'}\n", + " for db in db_id_bigg:\n", + " info = list(set(bigg_information[db].dropna().to_list()))\n", + " if len(info) > 0:\n", + " info = ','.join(info)\n", + " info = [x.strip() for x in info.split(',')] # make sure all entries are a separate list object\n", + " if db_id_bigg[db] in metabolite.annotation.keys():\n", + " metabolite.annotation[db_id_bigg[db]] = list(set(info + metabolite.annotation[db_id_bigg[db]]))\n", + " else:\n", + " metabolite.annotation[db_id_bigg[db]] = info\n", + "\n", + "\n", + "@template\n", + "def build_metabolite_xxx(id:str, model:cobra.Model, \n", + " namespace:str,\n", + " compartment:str,\n", + " idprefix:str) -> cobra.Metabolite: \n", + " # check if id in model\n", + " # get information via id\n", + " # collection formation in a new metabolite object\n", + " # add more annotations from other databases\n", + " # adjust namespace\n", + " # check model again for new namespace\n", + " pass\n", + "\n", + "# originally from SPECIMEN\n", + "# @TODO some issues left\n", + "# current version works on a couple of examples \n", + "def build_metabolite_mnx(id: str, model:cobra.Model, \n", + " namespace:str='BiGG',\n", + " compartment:str='c',\n", + " idprefix:str='refineGEMs') -> cobra.Metabolite | None:\n", + "\n", + " # fast check if compound already in model\n", + " # ------------------------------------------\n", + " # step 1: check if MetaNetX ID in model\n", + " matches = [x.id for x in model.metabolites if 'metanetx.chemical' in x.annotation and x.annotation['metanetx.chemical']==id and x.compartment == compartment]\n", + "\n", + " # step 2: if yes, retrieve metabolite from model\n", + " # case 1: multiple matches found\n", + " if len(matches) > 0:\n", + " if len(matches) > 1:\n", + " # ................\n", + " # @TODO what to do\n", + " # currently, just the forst one is taken\n", + " # ................\n", + " match = model.metabolites.get_by_id(matches[0])\n", + " # case 2: only one match found\n", + " else:\n", + " match = model.metabolites.get_by_id(matches[0])\n", + "\n", + " # step 3: add metabolite\n", + " return match\n", + "\n", + " # if not, create new metabolite\n", + " # -----------------------------\n", + " metabolite_prop = load_a_table_from_database(f'SELECT * FROM mnx_chem_prop WHERE id = \\'{id}\\'')\n", + " metabolite_anno = load_a_table_from_database(f'SELECT * FROM mnx_chem_xref WHERE id = \\'{id}\\'')\n", + " if len(metabolite_prop) == 0: # cannot construct metabolite\n", + " return None\n", + " else:\n", + " \n", + " # step 1: create a random metabolite ID\n", + " new_metabolite = cobra.Metabolite(create_random_id(model, 'meta', idprefix)) \n", + "\n", + " # step 2: add features\n", + " # --------------------\n", + " new_metabolite.formula = metabolite_prop['formula'].iloc[0]\n", + " new_metabolite.name = metabolite_prop['name'].iloc[0]\n", + " new_metabolite.charge = metabolite_prop['charge'].iloc[0]\n", + " new_metabolite.compartment = compartment\n", + "\n", + " # step 3: add notes\n", + " # -----------------\n", + " new_metabolite.notes['created with'] = 'refineGEMs GapFiller, metanetx.chemical'\n", + "\n", + " # step 4: add annotations\n", + " # -----------------------\n", + " # add SBOTerm\n", + " new_metabolite.annotation['sbo'] = 'SBO:0000247'\n", + " \n", + " # add information directly available from the mnx_chem_prop table \n", + " new_metabolite.annotation['metanetx.chemical'] = [metabolite_prop['id'].iloc[0]]\n", + " if not pd.isnull(metabolite_prop['InChIKey'].iloc[0]):\n", + " new_metabolite.annotation['inchikey'] = metabolite_prop['InChIKey'].iloc[0].split('=')[1]\n", + " \n", + " # get more annotation from the mnx_chem_xref table\n", + " for db in ['kegg.compound','metacyc.compound','seed.compound','bigg.metabolite','chebi']:\n", + " db_matches = metabolite_anno[metabolite_anno['source'].str.contains(db)]\n", + " if len(db_matches) > 0:\n", + " new_metabolite.annotation[db] = [m.split(':',1)[1] for m in db_matches['source'].tolist()]\n", + "\n", + " # Cleanup BiGG annotations (MetaNetX only saves universal)\n", + " # @TODO : there is no guarantee, that the id with the specific compartment actually exists -> still do it? // kepp the universal id?\n", + " new_metabolite.annotation['bigg.metabolite'] = [_+'_'+compartment for _ in new_metabolite.annotation['bigg.metabolite']]\n", + " # if no BiGG was found in MetaNetX, try reverse search in BiGG\n", + " get_BiGG_metabs_annotation_via_dbid(new_metabolite, id, 'MetaNetX (MNX) Chemical', compartment)\n", + " \n", + " # add additional information from BiGG (if ID found) \n", + " add_annotations_from_BiGG_metabs(new_metabolite)\n", + "\n", + " # step 5: change ID according to namespace\n", + " # ----------------------------------------\n", + " match_id_to_namespace(new_metabolite,namespace)\n", + " \n", + " # step 6: re-check existence of ID in model\n", + " # -----------------------------------------\n", + " # @TODO : check complete annotations? \n", + " # - or let those be covered by the duplicate check later on?\n", + " if new_metabolite.id in [_.id for _ in model.metabolites]:\n", + " return model.metabolites.get_by_id(new_metabolite.id)\n", + " \n", + " return new_metabolite\n", + "\n", + "\n", + "# originally from SPECIMEN\n", + "# @TODO some issues left\n", + "# current version works on a couple of examples \n", + "def build_metabolite_kegg(kegg_id:str, model:cobra.Model, \n", + " namespace:Literal['BiGG']='BiGG', \n", + " compartment:str='c',\n", + " idprefix='refineGEMs') -> cobra.Metabolite | None:\n", + "\n", + " \n", + " # ---------------------------------------\n", + " # fast check if compound already in model\n", + " # ---------------------------------------\n", + " # step 1: check via KEGG ID\n", + " matches = [x.id for x in model.metabolites if ('kegg.compound' in x.annotation and x.annotation['kegg.compound'] == kegg_id)]\n", + " if len(matches) > 0:\n", + " # step 2: model id --> metabolite object\n", + " # case 1: multiple matches found\n", + " if len(matches) > 1:\n", + " # .......\n", + " # @TODO\n", + " # .......\n", + " match = model.metabolites.get_by_id(matches[0])\n", + " # case 2: only one match found\n", + " else:\n", + " match = model.metabolites.get_by_id(matches[0])\n", + "\n", + " # step 3: add metabolite\n", + " return match\n", + "\n", + " # -----------------------------\n", + " # if not, create new metabolite\n", + " # -----------------------------\n", + " \n", + " # step 1: retrieve KEGG entry for compound\n", + " # ----------------------------------------\n", + " try:\n", + " kegg_handle = REST.kegg_get(kegg_id)\n", + " kegg_record = [r for r in Compound.parse(kegg_handle)][0]\n", + " except urllib.error.HTTPError:\n", + " warnings.warn(F'HTTPError: {kegg_id}')\n", + " return None\n", + " except ConnectionResetError:\n", + " warnings.warn(F'ConnectionResetError: {kegg_id}')\n", + " return None\n", + " except urllib.error.URLError:\n", + " warnings.warn(F'URLError: {kegg_id}')\n", + " return None\n", + "\n", + " # step 2: create a random metabolite ID\n", + " # -------------------------------------\n", + " new_metabolite = cobra.Metabolite(create_random_id(model, 'meta',idprefix)) \n", + "\n", + " # step 3: add features\n", + " # --------------------\n", + " # set name from KEGG and additionally use it as ID if there is none yet\n", + " if isinstance(kegg_record.name, list):\n", + " # @TODO : better way to choose a name than to just take the first entry???\n", + " new_metabolite.name = kegg_record.name[0]\n", + " else:\n", + " new_metabolite.name = kegg_record.name\n", + " # set compartment\n", + " new_metabolite.compartment = compartment\n", + " # set formula\n", + " new_metabolite.formula = kegg_record.formula\n", + "\n", + " # step 4: add notes\n", + " # -----------------\n", + " new_metabolite.notes['created with'] = 'refineGEMs GapFiller, KEGG.compound'\n", + "\n", + " # step 5: add annotations\n", + " # -----------------------\n", + " # add annotation from the KEGG entry\n", + " new_metabolite.annotation['kegg.compound'] = kegg_id\n", + " db_idtf = {'CAS':'cas','PubChem':'pubchem.compound','ChEBI':'chebi'}\n", + " for db,ids in kegg_record.dblinks:\n", + " if db in db_idtf:\n", + " new_metabolite.annotation[db_idtf[db]] = ids\n", + " \n", + " # add SBOTerm\n", + " new_metabolite.annotation['sbo'] = 'SBO:0000247'\n", + "\n", + " # search for infos in MetaNetX\n", + " # @TODO, since the table are readily available at the database now\n", + " mnx_info = load_a_table_from_database(\n", + " f'SELECT * FROM mnx_chem_xref WHERE source = \\'kegg.compound:{kegg_id}\\'',\n", + " query=True\n", + " )\n", + " if len(mnx_info) > 0:\n", + " mnx_ids = list(set(mnx_info['id']))\n", + " # mapping is unambiguously\n", + " if len(mnx_ids) == 1:\n", + " mnx_info = load_a_table_from_database(\n", + " f'SELECT * FROM mnx_chem_prop WHERE id = \\'{mnx_ids[0]}\\'',\n", + " query=True\n", + " )\n", + " # add charge \n", + " new_metabolite.charge = mnx_info['charge'].iloc[0]\n", + " # add more annotations\n", + " new_metabolite.annotation['metanetx.chemical'] = [mnx_info['id'].iloc[0]]\n", + " if not pd.isnull(mnx_info['InChIKey'].iloc[0]):\n", + " new_metabolite.annotation['inchikey'] = mnx_info['InChIKey'].iloc[0].split('=')[1]\n", + " \n", + " # get more annotation from the mnx_chem_xref table \n", + " metabolite_anno = load_a_table_from_database(f'SELECT * FROM mnx_chem_xref WHERE id = \\'{mnx_info[\"id\"]}\\'')\n", + " for db in ['kegg.compound','metacyc.compound','seed.compound','bigg.metabolite','chebi']:\n", + " db_matches = metabolite_anno[metabolite_anno['source'].str.contains(db)]\n", + " if len(db_matches) > 0:\n", + " mnx_tmp = [m.split(':',1)[1] for m in db_matches['source'].tolist()]\n", + " if db in new_metabolite.annotation.keys():\n", + " new_metabolite.annotation[db] = list(set(mnx_tmp)+set(new_metabolite.annotation[db]))\n", + " else:\n", + " new_metabolite.annotation[db] = mnx_tmp\n", + "\n", + " else:\n", + " pass\n", + " # @TODO : how to handle multiple matches, e.g. getting charge will be complicated\n", + " \n", + " # Cleanup BiGG annotations (MetaNetX only saves universal)\n", + " # @TODO : there is no guarantee, that the id with the specific compartment actually exists -> still do it? // kepp the universal id?\n", + " if 'bigg.metabolite' in new_metabolite.annotation.keys():\n", + " new_metabolite.annotation['bigg.metabolite'] = [_+'_'+compartment for _ in new_metabolite.annotation['bigg.metabolite']]\n", + " \n", + " # if no BiGG ID, try reverse search\n", + " get_BiGG_metabs_annotation_via_dbid(new_metabolite, id, 'KEGG Compound', compartment)\n", + " \n", + " # search for annotations in BiGG\n", + " add_annotations_from_BiGG_metabs(new_metabolite)\n", + "\n", + " # step 6: change ID according to namespace\n", + " # ----------------------------------------\n", + " match_id_to_namespace(new_metabolite,namespace)\n", + " \n", + " # step 7: re-check existence of ID in model\n", + " # -----------------------------------------\n", + " # @TODO : check complete annotations? \n", + " # - or let those be covered by the duplicate check later on?\n", + " if new_metabolite.id in [_.id for _ in model.metabolites]:\n", + " return model.metabolites.get_by_id(new_metabolite.id)\n", + "\n", + " return new_metabolite\n", + "\n", + "\n", + "@implement\n", + "def build_metatabolite_bigg(id:str, model:cobra.Model, \n", + " namespace:str,\n", + " compartment:str,\n", + " idprefix:str) -> cobra.Metabolite: \n", + " pass\n", + "\n", + "@implement\n", + "def build_metabolite_biocyc(id:str, model:cobra.Model, \n", + " namespace:str,\n", + " compartment:str,\n", + " idprefix:str) -> cobra.Metabolite: \n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "# general functions\n", + "# -----------------\n", + "\n", + "@template\n", + "def build_reaction_xxx():\n", + " pass\n", + "\n", + "# @TEST (more) - tries some cases, in which it seems to work\n", + "# @TODO\n", + "def build_rection_mnx(id, model,\n", + " reac_str:str = None,\n", + " references:dict={},\n", + " idprefix='refineGEMs',\n", + " namespace:Literal['BiGG']='BiGG') -> cobra.Reaction | None:\n", + " \n", + " # ---------------------\n", + " # check, if ID in model\n", + " # ---------------------\n", + " matches_found = [_.id for _ in model.reactions if 'metanetx.reaction' in _.annotation.keys() and _.annotation['metanetx.reaction']==id]\n", + " if len(matches_found) > 0:\n", + " return matches_found\n", + " \n", + " # -----------------------------\n", + " # otherwise, build new reaction\n", + " # -----------------------------\n", + " \n", + " # get relevant part of table from database\n", + " mnx_reac_refs = load_a_table_from_database(\n", + " f'SELECT * FROM mnx_reac_xref WHERE id = \\'{id}\\'',\n", + " query=True)\n", + " mnx_reac_refs = mnx_reac_refs[~(mnx_reac_refs['description']=='secondary/obsolete/fantasy identifier')]\n", + " \n", + " # create reaction object\n", + " new_reac = cobra.Reaction(create_random_id(model,'reac',idprefix))\n", + " \n", + " # set name of reaction\n", + " name = ''\n", + " for desc in mnx_reac_refs['description']:\n", + " if '|' in desc: # entry has a name and an equation string\n", + " name = desc.split('|')[0]\n", + " break # one name is enough\n", + " new_reac.name = name \n", + " \n", + " # get metabolites\n", + " # ---------------\n", + " if reac_str:\n", + " pass\n", + " else:\n", + " mnx_reac_prop = load_a_table_from_database(\n", + " f'SELECT * FROM mnx_reac_prop WHERE id = \\'{id}\\'',\n", + " query=True)\n", + " reac_str = mnx_reac_prop['mnx_equation'][0]\n", + " if mnx_reac_prop['ec-code'][0]:\n", + " references['ec-code'] = mnx_reac_prop['ec-code'][0]\n", + " \n", + " reactants,products,comparts,rev = parse_reac_str(reac_str)\n", + " # ............................................................\n", + " # @TODO / Issue\n", + " # reac_prop / mnx equation only saves generic compartments 1 and 2 (MNXD1 / MNXD2)\n", + " # how to get the (correct) compartment?\n", + " # current solution 1 -> c, 2 -> e\n", + " comparts = ['c' if _ == 'MNXD1' else 'e' for _ in comparts ]\n", + " # ............................................................\n", + " metabolites = {}\n", + " meta_counter = 0\n", + " \n", + " # reconstruct reactants\n", + " for id,factor in reactants.items():\n", + " tmp_meta = build_metabolite_mnx(id,model,\n", + " namespace,\n", + " comparts[meta_counter],idprefix)\n", + " if tmp_meta:\n", + " metabolites[tmp_meta] = -1*factor\n", + " meta_counter += 1\n", + " else:\n", + " return None # not able to build reaction successfully\n", + " \n", + " # reconstruct products\n", + " for id,factor in products.items():\n", + " tmp_meta = build_metabolite_mnx(id,model,\n", + " namespace,\n", + " comparts[meta_counter],idprefix)\n", + " if tmp_meta:\n", + " metabolites[tmp_meta] = factor\n", + " meta_counter += 1\n", + " else:\n", + " return None # not able to build reaction successfully\n", + " \n", + " # add metabolites to reaction\n", + " # @TODO: does it need some kind of try and error, if - for some highly unlikely reason - two newly generated ids are the same\n", + " new_reac.add_metabolites(metabolites)\n", + " \n", + " # set reversibility\n", + " if rev:\n", + " new_reac.bounds = (1000.0,1000.0)\n", + " else:\n", + " new_reac.bounds = (0.0,1000.0)\n", + " \n", + " # get annotations\n", + " # ---------------\n", + " # get more annotation from the mnx_reac_xref table\n", + " for db in ['bigg.reaction','kegg.reaction','seed.reaction','metacyc.reaction','rhea']:\n", + " db_matches = mnx_reac_refs[mnx_reac_refs['source'].str.contains(db)]\n", + " if len(db_matches) > 0:\n", + " new_reac.annotation[db] = [m.split(':',1)[1] for m in db_matches['source'].tolist()]\n", + " # update reactions direction, if MetaCyc has better information\n", + " if db == 'metacyc.reaction' and len(db_matches[db_matches['source'].str.contains('-->')]):\n", + " new_reac.bounds = (0.0,1000.0)\n", + " # add additional references from the parameter\n", + " for db,idlist in references.items():\n", + " if not isinstance(idlist,list):\n", + " idlist = [idlist]\n", + " if db in new_reac.annotation.keys():\n", + " new_reac.annotation[db] = list(set(new_reac.annotation[db]) + set(idlist))\n", + " else:\n", + " new_reac.annotation[db] = idlist\n", + "\n", + " # add notes\n", + " # ---------\n", + " new_reac.notes['created with'] = 'refineGEMs GapFiller, MetaNetX'\n", + " \n", + " # match ID to namespace\n", + " # ---------------------\n", + " match_id_to_namespace(new_reac,namespace)\n", + " \n", + " return new_reac\n", + "\n", + "@implement\n", + "def build_reaction_kegg(model, id:str=None, reac_str:str=None,\n", + " references:dict={},\n", + " idprefix='refineGEMs',\n", + " namespace:Literal['BiGG']='BiGG'):\n", + " \n", + " # either reaction id or a reaction string needed for reconstruction\n", + " if not id and not reac_str:\n", + " return None # reconstruction not possible\n", + " \n", + " \n", + " if id:\n", + " # check, if reaction in model\n", + " \n", + " # retrieve information from KEGG\n", + " \n", + " pass\n", + " \n", + " if reac_str:\n", + " \n", + " pass\n", + " \n", + " else:\n", + " return None # reconstruction not possible\n", + " \n", + "\n", + "@implement\n", + "def build_reaction_bigg():\n", + " pass\n", + "\n", + "@implement\n", + "def build_reaction_biocyc():\n", + " pass\n", + "\n", + "@implement\n", + "def build_reaction():\n", + " pass\n", + "\n", + "# GapFiller functions\n", + "@implement\n", + "def add_reactions_from_table():\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [], "source": [ - "### Further ideas and Code snippets for the filling part" + "id = 'MNXR104661'\n", + "mnx_reac_refs = load_a_table_from_database(\n", + " f'SELECT * FROM mnx_reac_xref WHERE id = \\'{id}\\'',\n", + " query=True)\n", + "mnx_reac_refs = mnx_reac_refs[~(mnx_reac_refs['description']=='secondary/obsolete/fantasy identifier')]" ] }, { diff --git a/dev/growth_curves.ipynb b/dev/growth_curves.ipynb index d5a0bbb..02a8bce 100644 --- a/dev/growth_curves.ipynb +++ b/dev/growth_curves.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -35,9 +35,18 @@ "filepaths = [filepath,filepath1,filepath2]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "issue:\n", + "\n", + "reading in different formats can be very difficult - maybe set some guidelines on how to use this?" + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +106,7 @@ "\n", " return df,info\n", "\n", - "def read_in_experiments(filepaths, timeinterval=15, skiprows=51): # maybe quarks for read_csv\n", + "def read_in_experiments(filepaths, timeinterval=15, skiprows=51): # maybe kwargs for read_csv\n", "\n", " info = dict()\n", " data = 0\n", @@ -143,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -153,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -169,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -211,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -252,35 +261,47 @@ " " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### issues \n", + "\n", + "- RuntimeError / starting values leading to very different values \n", + "\n", + " - how to set good starting values\n", + " - how to deal with the RuntimeErrors (solution cannot be calculated)\n", + "\n", + "- bad/irregular data\n", + "\n", + " - perc parameter for cutting of the end -> good idea or not and how to choose it and when to set it\n", + " - the different fits (currently implemented) can lead to VERY different results -> option to calculate the mean to even it out? What, if there are outliers? Which functions are more robust than other?" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "RPMI_147\n", - "192.0428474846806\n" + "ename": "RuntimeError", + "evalue": "Optimal parameters not found: Number of calls to function has reached maxfev = 1000.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m#fit = 'logistic'\u001b[39;00m\n\u001b[1;32m 3\u001b[0m fit \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgompertz4\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 4\u001b[0m cut, solu \u001b[38;5;241m=\u001b[39m \u001b[43mfit_growth_curve\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdmean\u001b[49m\u001b[43m,\u001b[49m\u001b[43mdstd\u001b[49m\u001b[43m,\u001b[49m\u001b[43mfit\u001b[49m\u001b[43m,\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m80\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(dmean\u001b[38;5;241m.\u001b[39mcolumns[x])\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(np\u001b[38;5;241m.\u001b[39mlog(\u001b[38;5;241m2\u001b[39m)\u001b[38;5;241m/\u001b[39m(solu[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m2\u001b[39m]))\n", + "Cell \u001b[0;32mIn[7], line 25\u001b[0m, in \u001b[0;36mfit_growth_curve\u001b[0;34m(dmean, dstd, fit, col, perc)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mmatch\u001b[39;00m fit:\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mcase\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgompertz4\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m---> 25\u001b[0m solution \u001b[38;5;241m=\u001b[39m \u001b[43mcurve_fit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfourparam_gompertz_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mxdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mydata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msigma\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlm\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43mp0\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0.2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0.005\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0.05\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0.05\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m#p0=np.asarray([0.2,0.005,0.05,30]) \u001b[39;00m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mcase\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlogistic\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 30\u001b[0m solution \u001b[38;5;241m=\u001b[39m curve_fit(logistic_mod, xdata, ydata, \n\u001b[1;32m 31\u001b[0m sigma\u001b[38;5;241m=\u001b[39merrdata, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlm\u001b[39m\u001b[38;5;124m'\u001b[39m, \n\u001b[1;32m 32\u001b[0m p0\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39masarray([\u001b[38;5;241m0.2\u001b[39m,\u001b[38;5;241m0.005\u001b[39m,\u001b[38;5;241m0.05\u001b[39m])) \u001b[38;5;66;03m#p0=np.asarray([0.2,0.005,0.05,30]) \u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/sprg/lib/python3.10/site-packages/scipy/optimize/_minpack_py.py:982\u001b[0m, in \u001b[0;36mcurve_fit\u001b[0;34m(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, full_output, nan_policy, **kwargs)\u001b[0m\n\u001b[1;32m 980\u001b[0m cost \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39msum(infodict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfvec\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 981\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ier \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m4\u001b[39m]:\n\u001b[0;32m--> 982\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOptimal parameters not found: \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m errmsg)\n\u001b[1;32m 983\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 984\u001b[0m \u001b[38;5;66;03m# Rename maxfev (leastsq) to max_nfev (least_squares), if specified.\u001b[39;00m\n\u001b[1;32m 985\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmax_nfev\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs:\n", + "\u001b[0;31mRuntimeError\u001b[0m: Optimal parameters not found: Number of calls to function has reached maxfev = 1000." ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGdCAYAAADuR1K7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABT9klEQVR4nO3deXhTZd4//neSJulGF+gOLWUplKVS1lpkUelYlBmFUUTGRxAZ/LngqDiMggrjOM+D44g7I48zos/8ZhSGUZBRRLGAglSQAmKlbLKUrRt0X9Pk/v4RTkjSkzRps/X0/bquXLTn3OfkPoc0+eTePiohhAARERGRQqj9XQEiIiIiT2JwQ0RERIrC4IaIiIgUhcENERERKQqDGyIiIlIUBjdERESkKAxuiIiISFEY3BAREZGiBPm7Ap5gMplw4cIF9OjRAyqVyt/VISIiIhcIIVBbW4ukpCSo1Z5rb1FEcHPhwgUkJyf7uxpERETUAWfPnkWfPn08dj5FBDc9evQAYL45ERERfq4NERERuaKmpgbJycmWz3FPUURwI3VFRUREMLghIiLqYjw9pIQDiomIiEhRGNwQERGRojC4ISIiIkVhcENERESKwuCGiIiIFIXBDRERESkKgxsiIiJSFAY3REREpCgMboiIiEhRGNwQERGRojC4ISIiIkVhcENERESKwuCGiIiIFEURWcFJmVpaTVi1/QQA4OEbBkIXxFiciIjax08LIiIiUhS23FCXwZYcIiJyBT8diIiISFHYckPdHluEiIiUhe/iREREpCgMboiIiEhRGNwQERGRojC4ISIiIkVhcENERESKwuCGiIiIFIXBDRERESkKgxsiIiJSFC7iR4rDRfmIiLo3BjekeAx2iIi6F77LExERkaIwuCEiIiJFYXBDREREisLghoiIiBSFwQ0REREpCoMbIiIiUhQGN0RERKQoXOeGuhWueUNEpHx8ZyciIiJFYXBDREREisLghoiIiBSFwQ0REREpSoeCm1WrViE1NRXBwcHIysrC3r17nZZfv3490tPTERwcjIyMDGzevNlmf11dHRYuXIg+ffogJCQEQ4cOxerVqztSNSIiIurm3A5u1q1bh0WLFmH58uXYv38/RowYgdzcXJSVlcmW3717N2bPno358+fjwIEDmD59OqZPn47CwkJLmUWLFmHLli34xz/+gaKiIjz22GNYuHAhNm3a1PErIyIiom7J7eDm5ZdfxoIFCzBv3jxLC0toaCjWrFkjW/61117D1KlTsXjxYgwZMgTPP/88Ro0ahTfffNNSZvfu3Zg7dy6uv/56pKam4v7778eIESPabREi5WlpNeGVrcfwytZjaGk1+bs6RETUBbkV3LS0tKCgoAA5OTlXT6BWIycnB/n5+bLH5Ofn25QHgNzcXJvy48ePx6ZNm3D+/HkIIbB9+3YcO3YMN910k+w5m5ubUVNTY/MgIiIiAtwMbioqKmA0GhEfH2+zPT4+HiUlJbLHlJSUtFv+jTfewNChQ9GnTx/odDpMnToVq1atwqRJk2TPuWLFCkRGRloeycnJ7lwGKQBbeIiIyJGAmC31xhtv4Ntvv8WmTZtQUFCAlStX4uGHH8aXX34pW37JkiWorq62PM6ePevjGlOgMJoEXs877naQ09HjiIgo8LmVfiEmJgYajQalpaU220tLS5GQkCB7TEJCgtPyjY2NWLp0KTZs2IBp06YBAK655hocPHgQL730UpsuLQDQ6/XQ6/XuVJ2IiIi6CbdabnQ6HUaPHo28vDzLNpPJhLy8PGRnZ8sek52dbVMeALZu3WopbzAYYDAYoFbbVkWj0cBk4jdqIiIico/biTMXLVqEuXPnYsyYMRg3bhxeffVV1NfXY968eQCAOXPmoHfv3lixYgUA4NFHH8XkyZOxcuVKTJs2DWvXrsW+ffvw9ttvAwAiIiIwefJkLF68GCEhIejbty+++uor/P3vf8fLL7/swUslck7qqtKoVUyqSUTUhbkd3MyaNQvl5eVYtmwZSkpKkJmZiS1btlgGDRcXF9u0wowfPx7vv/8+nnnmGSxduhRpaWnYuHEjhg8fbimzdu1aLFmyBHfffTcuX76Mvn374r//+7/xwAMPeOASqauTgg6JRq3yY22IiCjQuR3cAMDChQuxcOFC2X07duxos23mzJmYOXOmw/MlJCTg3Xff7UhViIiIiGyw3Z2IiIgUpUMtN0RKxrE3RERdG4MbCggtrSas2n4CRpMA4P64GuuAZMHE/t6oIhERdRH8SkpERESKwpYb8iv7FhtP4iwrIqLuiS03REREpCgMboiIiEhRGNwQERGRonDMDZET0pggAJwWTkTURfCdmoiIiBSFwQ0REREpCrulyOesu3q44B4REXkaW26IiIhIURjcEBERkaKwW4rIBUymSUTUdfAdmoiIiBSFLTdEbmALDhFR4OM7MxERESkKgxsiIiJSFHZLEXUQUzMQEQUmvhsTERGRorDlhnxGaukwmgQAQKNW+blGRESkRGy5ISIiIkVhcENERESKwuCGiIiIFIXBDRERESkKgxsiIiJSFM6WIuokpmQgIgosfBcmIiIiRWHLDZGHsAWHiCgw8N2XiIiIFIXBDRERESkKgxsiIiJSFAY3REREpCgcUEzkBVKSUAAcXExE5GN8xyUiIiJFYXBDREREisLghoiIiBSFY26IvIgL+xER+R6DG6IAxoHJRETuY3BDXid9QBtNwt9VISKiboDBDZEPsHuKiMh3+A5LREREisLghoiIiBSF3VJEPsZBwkRE3sXghrqUumYDappaUVrb6NHj6poNuFTXgqhQLcL1Wk9UlYiI/ITBDXUZhy9WY9uRcgDAR/vPI2dIHNITIjp93IcHivHOrtOW329Mj8XQxEjPVp6IiHyGwQ0FNKlFRatRWQIUSV5RGfpEhzhtaalrNjg9rq7ZgNfyjtvs33akHCk9QxEZovPchThh3U21YGJ//HXnSQDmLisiInIfgxsKWPYtKvYEgOoGg2xwIwVFjQaj0+MqGwyy565uMPgsuCEiIs9icEMBSa5FxZ4KQGRo28CmvaAIABpajKhrNiBa5ng4OG93x4HQRNRV8N2JApKjFhWJCsCUIXFtWm3qmg1Y/nFRu+f//HAp3tt9BmcuNeC524ZAZbXvxvRYDip2U0urCa9sPYZXth5DS6vJ5X1ERN7AlhvyCvtxJO5y1KIyc3RvmATw0A0DsOngRRhNAnXNBlQ1GBAVqkVNU6vscblD46FWq/BZYYnN9ryiMiy5JR33TWjE5boWRPpwtpS0arFEo1a5tC+Q2KfWCNR6ElH3wuCGAlK4XovnbhuC339cBCkj1Y3psYiPCIFGrUJ8jxAAtjOhAGDCwF6y50uMCpYNfASA4ooGhOu1CNF2vz8HdjURkRJ1v3dz6jJuH5mC4kuOW1RKaxrbzIT65sQlPHHTILz8xTGboChcr5VtVVABSIkJxcFz1QBg0wqklAHFgRzAOKtbINebiAIbgxsKaI5aVD48UIy1351rs10AGJ4YgfsmpLYJiuRag6YMiXPYCpQzJM7j1+NP7ibvtA8uPMEb5yQissfghrocZzOppJaY8HPyQZF1a1CQRgWjAEprGx2uh+PuSshEROR/DG6oy3E2k8q6JcaRcL0WpyrqbVYtHpUS1aacNB6H3ONK64zcgOnOtuqwG4uIJAxuqMtxNJPq5xkJ0Gk17ba2yLXS7C+uki0brAucD0i5D39Xxqu4M1tN7jhngYg0S4qIKJAwuKEuR27sTHp8OD75wTzNu728U45afjKTI3HwbLXNttl/3etyDitfcSfYkBtEbT32piPT9Lsytu4QdQ8MbqhLsh87s77gvM1+KX8UgDazn+RaflQAMpOjkBYX3uZcXxaVQaNSITEqmIv7dTEcwEzUPTG4oS5Lmkl1rrLtuBgB4ODZKpuWGGn2U7heixvTYy1dU9arHVc5aNX5/HApAPO08pSeoahpalXUYGNvLBrY0XMGQl3YwkPUtTG4oS7B2fozUQ7G4Nh3MVnPfhqaGImUnqGobWq1We3Y0bkk1mN12uv+Is9j0EFErmBwQwGvvfVn5FpiRqZEtRkkbD/7KVxvDpSsZ1fZn6s9UvcXu6v8p6MpIBgoESkXgxsKOHXNBku3j6vrz9i3xLy/p7hNcGO/GrEj0rlKa5rb5KKyJwAcL6tDWlw4AxwfCoRB0QyOiAIXgxtyytdv4NatNO6uP2PdEiPXAuPKGjg254rX4doB0e1mGf/mxCV8c+ISbkyPxdDESJfOT0RE3sPghgKGq+vPuNsCU91gQGQHc0XdPjIFRRdqZVM92Nt2pBwpPUMVk5Oqu5AbbOxsjZ8FE/vjrztPMhM6UQBjcEMuc6UVp6PjHwA4nKlkv/6Muy0wne0uio8IadMKlBYXhuNl9W3KnqqoR69wZc2kIiLqajrUx7Bq1SqkpqYiODgYWVlZ2Lt3r9Py69evR3p6OoKDg5GRkYHNmze3KVNUVIRbb70VkZGRCAsLw9ixY1FcXNyR6pEHtLSa8MrWY3g977jDVWilMq9sPYaWVlOnn1NuppK0/sy94/tiRmYS7h3fF8OSfN/1MzQx0qYOkwbFypb76lgFPtp/HlNWfo0fLzhvWSJlkFp3Xtl6DHVNrTZ/E9Z/I/b7vM3Tf59EXYnbwc26deuwaNEiLF++HPv378eIESOQm5uLsrIy2fK7d+/G7NmzMX/+fBw4cADTp0/H9OnTUVhYaCnz008/YcKECUhPT8eOHTtw6NAhPPvsswgODu74lQWo7v6GU9dswLnKBtQ1G9psA8zryEis158J12vROzrUr4N2resgrZLsTF5RGeqaDbLXTGTN/n3B2ftEd38PIXKF291SL7/8MhYsWIB58+YBAFavXo1PP/0Ua9aswVNPPdWm/GuvvYapU6di8eLFAIDnn38eW7duxZtvvonVq1cDAJ5++mnccsstePHFFy3HDRgwoEMXRIHLfkr3qJQoBGlUeGfXacu2G9Njce/4vm3WnwlEt49Mwbc/VTqcUSUA7DpegRPlV7uv7Kexu8J69lggc1bPrnINnuJs0UDrmV723bveWMCQqDtyq+WmpaUFBQUFyMnJuXoCtRo5OTnIz8+XPSY/P9+mPADk5uZayptMJnz66acYNGgQcnNzERcXh6ysLGzcuNHNSyFfkd6AnXVZ2XM0WHjlF8dstkll+kSHujyuxp+Sopy3LloHNoA5lcMP5ytdPv/hi9V4b/eZgO/qclbPrnINXZE7LT5E3YlbwU1FRQWMRiPi4+NttsfHx6OkRP7ba0lJidPyZWVlqKurwwsvvICpU6fiiy++wIwZM/DLX/4SX331lew5m5ubUVNTY/OgwOYoWaWci1VNXqyJZ0lTzt0x+697XfqAd7TGjz+7t+qaDThb2WDTAuOsnnL7viwqw/HSWst1yJ3T/vlKalxr8XG3fFcQyAELu88oUPl9tpTJZH7R33bbbXj88ccBAJmZmdi9ezdWr16NyZMntzlmxYoVeO6553xaz+7CUXbpzpJLVunI54dLYRSB2RUlx3rKeZBGhZKaZuw8XuH0GFdWNpabPSYAVDcY/DL2yH4NIin1xMXqtsGoVE9H/4tSrq7k6GCcrWxqc07759uw/zxeuD0DM0b2QV2zAZfqWtqk4nBUPlD5eiFCudmOcrMbpanu1uWIuhq3gpuYmBhoNBqUlpbabC8tLUVCQoLsMQkJCU7Lx8TEICgoCEOHDrUpM2TIEOzatUv2nEuWLMGiRYssv9fU1CA5OdmdSyEf60hag640PsN6ynlEiLbd4MbZysbSh3erse23XRWASDcCRftzajUqGIzCJjCwDhac1UWuBeZCdSMOX6iVfc6GFiMiQpy/xUiBjfU5NSoVIkKCbJ5PAFj6USEu1bfYjNEalRKFWzMT27QQSeWzB/Rqc+1ajQpGAcsK2O1du9w+6zKOcp4pkbMs646+GHHcEPmDW8GNTqfD6NGjkZeXh+nTpwMwt7zk5eVh4cKFssdkZ2cjLy8Pjz32mGXb1q1bkZ2dbTnn2LFjcfToUZvjjh07hr59+8qeU6/XQ6/Xu1N1CgBSC4d1tm4VgGFJESi8YNu16GgV4q5Amkn1+4+LHLZcALYrG0uZxt/dfcrmw9ua9ewxd3x4oFj2nDlD4trsc7UuEkeBDSDfOuMK6Th7RiHw4hbb94n9xVWYsvJrDE9qm7zUKATW7DyN9/JPy57vo/3nbX63XmFa7r7Yrz7dXs4zIvIft7ulFi1ahLlz52LMmDEYN24cXn31VdTX11tmT82ZMwe9e/fGihUrAACPPvooJk+ejJUrV2LatGlYu3Yt9u3bh7fffttyzsWLF2PWrFmYNGkSbrjhBmzZsgX/+c9/sGPHDs9cJQWMcL0WEwbGIjM5yiYPlH1w4+oqxIHq9pEpKL7UiMt1LYgM1aL4coPDVitXW7PeXzAO356sbLfL0Lo1QaNW4TWr2TfWviwqw5dFtks4uFoXd0iBzfDeESg83/HxcWoAjkZu2L9+JI4CGznbjpQjVKuBUQh8llcqu0+n1aC0thGlNY0Oc551tjWoI1yZnWW9z1vdz0SBwu3gZtasWSgvL8eyZctQUlKCzMxMbNmyxTJouLi4GGr11T7a8ePH4/3338czzzyDpUuXIi0tDRs3bsTw4cMtZWbMmIHVq1djxYoV+M1vfoPBgwfjww8/xIQJEzxwieQL7r5ZejIPVKAK12sRojX/iUmtVj+V17fbZeVIU4upzYej/RRr+9aEtLiwzl+Ih/x4vgZP3DSozQw5V40f2Au7Tlxyufwomczw7fnkB8eJUqV99i0+EgHgre0n8e/9V1N1OGsJ68q5yDhlnQJdhwYUL1y40GE3lFxry8yZMzFz5kyn57zvvvtw3333daQ6pACeyAMV6ML1WgyKD+9QcKMCUHixxubDMT0+HEdK6wCYP3DH9o3Gd2dsp5nLpYjwpglOAhABYHhiBOZPSEXBmUqblBrtteq8dHsGzlY1QRekdql1KXdoPPr0DHE7uOks68AGcN4Stu1IOXqF6ZAUFQqg7bgo67FBgG1A4eq6Qd5qKSIKdH6fLUUk8UQeqEDn6ngce9cN7NWmxUMKbCT2gY235AyJw8RBvWyuITM5EpnJUYgM0eFnw+JkM6lLXY3h5652TVoHszPH9HZ43MjUaJw9eFF23JacxKjgDt9rX1pfcF527JO1j/afx4SBvRATrkeUXRen/Qwz4GrgE0gtRa7kpSPyJAY3RD5mPR4nSKPC8bI6mwHW1w3shdhwPYI0KrQaBSJDtahpau30804Y2AtRIVqnXS9yx9jXJTJE12ZMkXVQKu2zb52x72q0D2bdOc563FZqTBhe/uKYJYC5MT3Wcl77e91qFAjSqGASwEM3DMBfvz7pUsZ3b5Ib+2TPWXecNMMsMSq43bFdvcJ0Nq1BwNUxWvazyCobDCipaURKz8Dp2iRyFYMb8gk2j9sK12txqqK+TTqKP8+8RjblRGfHNKgADLwy7dx6fJMUTMl9eH7QzgBm6zFFcvvkWmfa485x0riteeP7oaqhRTbQclRPjVqF+B4hshnfgasz00xCeGWQtac5mmFmb32BebyQ1OIjd33WY4qk9YJmjU3pUL3su8/kUk+wVYe8gcENeZ0r02q7G7mVew84GR8idbHIddu0x34KudS1Y52/y34sS86QOGT0jsa3JzvX1dXRrkZ3j3MWaLXHfhFGqVVHCjLl9r2/p1h2JpwKwBiZsU8SuXFR/tJeaxFgHie15MMfcKy0FuF6LRZM7O90ULv1uCHrFkm57jMib2JwQ15V12xoMxV525FypPQMVeSgYVfJpaNob22f20emoOhCrWw3yv0T++GvO0/ZdM3YBzDWLTDWs9WA7jGg2xnrYEpq1XG2T24mnPW97mG3CKF1q5z9vkBngnm17HC9ts0XFfsVpp1pb1XuzrbqsAWIrDG4Ia9ylFOqusHQ7T5Arcmlo3BlbR+5bpScIXH4zZRBMArRpmvGOoBpT3cY0O0trgSL1vt6heksXUTWpg6NR4+QIJuxQakxYR2ePj9cZoFMd6kBRIVqZb+ouLM4o/Wq3N35b598g8ENeZWjnFIdSSGgJJ1Z28dRK0tnumbI85wFiwaj/DimEJ0G8RFXXwMatQrzxvfD4Qs1+Kyw7UDwe7NTHS5U+NLtGSgqre1UcKMG8PyMYThZXo9jpY5Xo3aVtCo3V3Mmb+M7IXmN1Bf/xE2DHM5m6c460xXEVpauLcpBy52joD8pKli2/D3jU9DU2tqmq9Iyfb6qqU0Q7WhNoZ9nJECv1VjGFFU1GPBoThp2Hi9vNw2Hu74sKsOhc5UYk9qrzT5nCUWtu57sE3wSWWNwQ15hv1KuNKVYbjZLd8YgpXuSm7XmLG+Ys5Y+ua5K61ZAuSB65pjeNuv/3Jgei9SYcJsxRVJdntnwY6euNS0uTHYxyTtWf4s/2WV6d5TYVZqWnhDRtVctJ99hcEMeJzcT6JsTlzB3fF9+kBNdITcQ2Vn6Emctfe21AjpaU8jR9HnJ6Yp62QUQB8aG4UT51YBFGjAtzSKzXhdJo1Y5XClbLtO7xH5xw4/2n7fMtHS0NALX5yEJgxvyuCoHM4Gqr8y4IPI2V9MT+Jv9QGRXyjtr3fH09PnUmDCoAJsARwVgQloMJqTFtBkw7WhNIUfLGMhlepc4SuxqncDUOpix7j7r7Po81PUxuCGPc3c8AZGrpGnCACxjL+xZd4lyfZXOSYgIwR9nDMMzG36EQNvuM1eDKUfLGDjL9O6IXAJT+2noAuZWoUmDYpEYya6s7ogLAQS4llYTXtl6DK9sPYaWVnffBvxDGh8gaW88AZEr6poNOFvZgJKaRuiC1Hj8Z4Pw+M8G2axnItclmldUhrpm+SUJOkOjVuE3U9Lwmylplm4SqV7W2yTO9gWymaNTcN+EVNw+qje+fGIShiV1bAFOaWyQRA3gyZvT4a07YRQCL31+tMu8b5JnseUmAPh78Sn7GQie4O54AiJnrMdeOOpy0KhVuDE9Du/tPmOz3VGXqBRsSK9/vj4dk7q8XO0+c8T6feHRnDSk9AxDeLDG0jJkTUrQ2pFVuSUNLUbLQOQ/bSlCZYMBj1153pZWU5ttSuXvzxh/YHDjJ86mO3Z1UiK+qFAt+kSHdvoNkbo3+8Xj7LscpCAFAC5WN7YZI6JRqfD4TYPwYcF52QDG+niJ9G3ferVca/bl7X+XO6f9PusPHGfl5c5vX09nXOnK8yVpnJE082nm6BScLK9vMxBZStDqaFVuV3xWWILPCkvarKQ8YWAv1DW3WtJDSAHzjJF9ZIMg68AAgMPp6EoNGrpicMTghjzKfgo4F+sia/Yf4tZvmo7IrXJtFAKnKxrajKdIjLQdI6IG8D+/HI6UnmFOgwen9RQCMBoBk+nqv84erpQxmaBqNiDu+BmohAmq8EpAozI/l/Sc9g+Z7apWI/ocPA+TSUAlBDQq4NYRidAKFQzbT6LvwfPmcs3HoVWr8PiV8xg+P4L+318AIHDL8ARopW4yIYDNhXhcCBhajfj8uc9gMgEDIaAGEHQxFoOPlMJkMkElxYlCQKMSCCqOxdAjZTAJ2wBSrVIh6Ewshh4tg+lKcKlWq6AujgM0aqhbjRh3pAzC6rgmgxFNBiMaDsfhxuIqDKpsQFHJ1UUEVQ4a2VSyc7sAlV2dcND8Tzpg6RY7eHALSqP0aKhqhh7A6jUCKdEhuH10H2ScrYZJCHy+wYjGZiNS1OaxQo0HYzHo9GU0tBhR930sIoK1+OJwCRpajLgxPQ49w/RX76uVVqPAV8fKoAIwKS0WQRq7zjn7+rqwr7XVhJ3Hze+9E63PKQTURoGRJ8z71D/EohXAriu/Z/fvhfyT5sS5EwbK1AWwHG9Sa4Abfu+4bgGEwU0A8kY3kS84Gu8Q6DNWqPMsLRwqgZbaeoRUXoKqpRlqgwE6owG/ykyA1mgA8r8BmpuBlhaguRm6lhY83twMNDejtegLjPrxPFSGFmgMLQgytmJMQiiaGpowcu8ZaExGaExGBAkjgkwmZB7taf6Ua221ecw2tGJSRQ2EoRUJYUHQbTa1KWPzaC8o8RItgLs9cI6ZTvb90sm+21w4989ltt/i5JjcDuwLaue4m5zs84m1QI6T3dPsfnd2fyRBAKZ0vEYOz3mDk33X22273sHPzo5v1epgevX3btfNHxjcBBCpq0rSlQYdAh1LBkl+YDJB29SAkKYGqI40I/7IUQTV1ULfUAfdlYe+uRG6vGBMOn4RmqZGaFuaENTUBG1zE4JamhCiE/iviioENZl/1zY3QdvSBLS0QAfggQ5UKwjAZJntYQDukTvgsPx51ACSO/D8HqFWX31oNLa/2z2EWo06g4BQqdAjRAuVSgWorvzNSz9bP+S2e7GsCSpcqG4CVCokRYdCrW5b1gTg7OVGQKVCcq9QqNXqq8cLgTOXzH/7fXuFQn2lfJttQuD05UZAAK0mE85VXv0yJK6cKzk6BAPjwgEAp68cn9IrDCfL61B82fb9RTqmLcfvp46OcdR+4rh82+3xEXpo1SoYoYJaZT7ngNhwnL3cgOZWE7RBagSpzd08KhXQPyYcJyvq2pxLpQL6xYTjVEUdTDL7+seE41hZLZpbTdBp1AjSqC1XrdOq8YPVqtTxEXqE6oJgMJqQltAD56902aXFm+/x8dI64Eo9fyqvs7SqiSAtuspa0AxuyGM6mgyS3KcyGhFcV4PgmkqE1ddAU1uEoflHoKupQnBNFUJqqxBcW42QuhqEaAyYU3rpSuBSD11jvU0z/a+cPM9oJ/tinewDAOh05ode3/ZfmW0mrRZHK1tg1GoxpG8sNMFXymm1MGqC8PWpSjQYgfHpCYiOCAWCgmQfBpUam49UQGiCMG1UMrR6ncOy0Ghsg5B2AhKn5Rx+qMoztJrwtwAex9DaasL6durX2mrCRw7KqAH0kym/0a58a6sJH1/ZNrpvFO5b812b5/lgwbUYNKAXWqzKLpjYH5/uPIkfzle16Qrv7EBkX8tMjkRmchQiQ3SW67IfHyaNz7TfJ63p1M8uwaq04GGjodU8GN/JH/OEgb0QFxGMx3LSkBARgs+s7vFnV55Pep7HusgCiQxuAoz1YNyuljm3M8kgCdC0NEN15gzijxxCyKVyhFWWI6yyAqGVlxBeVYGQ1lrMKz6P4JoqBNe1zQ3krGm/bQYfwKTWQBUZgRpdKFpCwtAcGg5DqPnf1pBQDBmYCE14GBASAoSGolUfgrziWrTqgnHT2H7Q9giHQReMdT9WoFUfjF/dMAS6iHBz+eBg8we+G9QAhjjYZ2w14eCVN9yf3TAQcBIIiFYTTl4pK9op60/tDSJWovauWW7RQI1KhdSYUIfHZPSOwu9vHYYzFQ3YcawM4XqtZQXmgjOVlkHDKgALJvVDVKgOf95y1LJuz+ysZHyw56zDVhpfOHi2GgfPVmNUShRuzUxsk45Cq1HBKGBZuFDad7ysznJ99qQFD6ub2l8GYdcJ85ibDfvP448zhlkWR5Sez/o+dpUFEhncBBAlDMbtTDJIJVMZWxF+qQxRl0oQdGEvxuz6HuGlF9CjvAQ9yi+iR/lFhNZUAXDekhJl93tTWA80RUShR2IczqpD0BgeicaIKDT1iEJjj0gYIiIxeWwaPjldh6bgUDSHhqPlykMEB2PBpAFYY/VN0NHsIEB+bIQWwH9N6tg9oa7P0wGa/aKB0oBwaeC4/fNZ/5zSMwwTB5nbE1taTQjXazF5UBxenz0S5yubkBoTajnPjJG9cbqiwbJtWFKEzUKF1w3sZfnA96X9xVWYsvJrh/vtFytsj7TgoasEgKetconJPV9XWSCRwU2A6KqDceWWue+uySA1Lc2IulCMniXF0O6rw5Rv9iPy/BlEXyhGeEUJ1FaDUyc6OIfQ6VAbFYP66BjUR/VCfc8YNETHoqFnDMaPH4ZNJa2o7xGNxh5RaOoRAaEJsjRXb7DLkCwNSp8wsT/OOZiu2t6UZyJvsw9YpKnhUlbyjnSB2J/T/hyJkSE2H8x3Z6XixvR4m4DnYnUjTlc04MDZSpuWnvsmpKK+uRUGo8BDNwzAX78+KZuVXcmrJjmarRhIGNwEiK44GNd+mfsJA3shJlyPKIVn/g6puoy44uPQ/rAZU7Z9aw5gzp9Bj4oSm7Es19gdZwzSoi4mHuFp/XFMF4Xq2ETUxiaiJjYRtbEJaIhLxJybM/HOrlOy/e1ZE/vjgkxffHu6Y/cHeYa/ElFKX5B8mQXcPuCRfh/dNxqX6podBlu3ZvZuE9wIAPOv64c135xSZJDTXldhIGBwEyC62mBcuZYm62ZcaTBbV6Y2tEB9YD+Gf7YVPU8eRcyZ44g5fRxhVVev0z6AaQ4NR1Xvvug5YhgKdL1QmZiCyt59UR3fG/XRsdAEabBgYn9scTBg0N1BqZ3FwIccWV/Q/qrQHdXVFoVzFmz1czBO6NeT+uHXk/rhdEUDQnVqNLSYLP/2jg7G/+0+bTOWxRPG9o3Gd2cqZfepAGQP6IndP122bBuVEoUb0uPw8hfHLC1T7QVjKth2FQYqBjcBoqsMxpUGszUajE7LbTtSjpSeoV1mzI3K2IqYUyeQcLwQCccLEX+sELGnjiDIYMDP7MoKlQrVickIH5WJgh5JuNQ7FVVJKahMSkVjZDQ0GjUWTOyP/A60skjkVpWVW9lW+mCwX6mWQQt1xsXqRjxjNfaiq4yz8Ib2/pYcLRwp3Se5+yWNCZowMBaZyVE4eLaqU0HOqJQo/HnmNdh08CJ6hAS1+eIp5fdLT4jAoPgeNmMi543vh6qGFkvL1M7j5bLpMKTnefWuTM6WItfVNRsQEazFzNG9bZYfDyTW+X1cUd1gCLhrkGgb65FUdBDJPxYg+I8/4uE9e6BtbmpTTkRHozh1CMr7DUJ56iBUpKbhcsoAmELDsGBif+xuJ4Dx1dL3DGbIk05V1Lf5cPP0OAt/dXl5g7vjhORW6v7TliJUNRjw0A0D8P6eYpt0FEEaFUwCbfaZBGye7/Gf9bCcS66M9MUoMkRn88XIumXK+lqsn0/6TPJlV2FnMLgJAPZBw43psegdHVj9mfb5fVwRKdPV5i/6uhr0PbQXuv8cx+zP8xB3oghqk23rU3NoOErThqEkbThKBw1H+eAM3DlzEj6SGwPj5vNbv5k5ywck96bXXhkiT3PU1eKpcRbe7PLyB12QGs/+fFinzmGdnDRcr0WI1vbjWaNWtdmnUatkgw1HZVx972ivLl1B16uxwsgFDTuOlKNfTFjADMqtazbgp/I62X25Q+MRHhyE1JgwS78tYA7Q/Fp/kwnxxwrRb98upO7biaQj31uCmYQrRarjknAhYwz6zZiKdfq+KO/dz2ZtFn+MgbHHQIb8ob2uls5gl1dbzqa422vv/cDd9wxXvlB1RQxu/ExulpQJQFWDISCCG/u1d+xFhAQhKSrU0m8rNV/6o+7BtVUY8N3X0K85gP/vsy0IrbYdWHe5Tz/0mJqDrTGDcXbYaNTGJlqmUV/eeRLo4PgYIiXyxJRsOb7o8qKOcyfQCmQMbvxMbpaUGkBUAHTpyM2Isre+4LxlsUF/NF+GXSrDwPw8pH2zFSnf77G0zmgBNIeGoTgzG6dGT8Dp0RNQn5SMBRP742gnBvp6grOBwUSBxBtTstvr8mJrJXkCgxs/C9dr8dxtQ/D7j4sszb/PzxiGspoWf1cNVTKtSnJ8vdhgcE0lgt5ejTv/+h6Sftxvs7ZMeb9BiLxjBjbGZ+Bc+giYgq4Gie6OkyEiz/NmlxeRhMGNmzz5bVta3ffWzAG4b0Kjpfk3ISIEf9pSZMnt4S9ajWvjTXyx2GBQcxMG7NmOIXmb0G/fTmiMreh9Zd+F9BE4ft1NOH5dDmr7pGLBxP44v/MkTD5unbFf5ZffPv2D3/wDn7e6vIgkDG78xH5135whcRiWFImEiBCbmQTW+/xVP4mjnCveXGyw16ljGP7pOgzdtskmWaQxMxO7xk1F0cSpqItNsGz3duuMdQCjlIF3RP7gj1WIyTGldZMzuPEDR3mk+vYKRUmN7UwCaV+f6BC8nnfcJzmAHI21eX/BOHx7shK6ILVXFxsMam7CkJ1bELLsY8z5Nt+yvSYuCYdv/AWOTrkVt//XTdjv57EzEuZl8j62xigL/z/J2xjc+IGjPFJVDQaclplJIGBeEM9XM5Dk6gcATS3mlgpvZf4OraxA5ifvI/OTtQitNi8RblJrcCL7Rhy6+U6cHnUdoFabp2gHAL5BExEFJgY3XmTdzLdgYn/89UpLQ0Rw29uugnmGVKrMTAIVfLsgnk5mrI1915MnM3/3OnUMIz98D0O2b0KQwRxY1cQlQf/wg/i/QdejJjrWI89DRK5j8E5dGYMbP7DPIyXl/ZD6n61nEljv84UPDxS3yXALeCfPVeyJwwh+43eY85+PLdsupI9AwS/vxU8Tb8Kvrx+E+nbWn/FVegNvdD3xw4OIAomSUmIwuPETqWuntqkVD90wAJsOXrTss8/tsengRZ+MLXGUYmHm6N5IivJcOoj444XI/udfMPDbbQDMiSiPX/czfPfLebg4dCQA+KXryVmySiIiJVNaSgwGN34UrjePV5FrEbHO7SFl4o7ycjJNR2NtWo2eCax6nj2JiWtWIi0/DwBgUqthnHkn3s+5B+XJAzzyHERE5B4lpsRgcBPg7JNqSqsBe4PcaslA58f7qEpLcePry5GxeT3UJiNMajWKrv85vrv7QdzxqxymPiAi8iMlpsRgcBPA5LqJ8orKvNYfar9aMtC5BJhBTY0Yu+E9hH74DkbUmRNvnrj2Rnw9/7e4nNzfo11PrmbdJiIiW97OAu8PDG4CmKMp42cqGrw22Ov2kSkovtTY6QSY/ffswJS/PI/I0vMAgJLBGdjx69/hXMZYj9TTlbEwHR1szHE2RNSdKDElBoObACbXTaQC0NfL0XRnEmD2KL+IG1b/DwZ9sxUAUBuTAO1Lf8YHCWNghP/Xp5FLWklE1N0pLSUGg5sAYN9SIHWryHUTTRkSF5jLlRuNGPXvd5H999eha2qASa1BwYy52DNnIe69KQPYeRIawCfTtu2xJYaIqH1KSonB4CbA2XcTeXO2VEdFnj+DkJx7MflKqoTzQ0di68Lfo6L/4IBZTdgex+gQESkXg5suoDPdRHLsE6QBVxdvcisLuRAY8elaXP/XF6FpbkRzaBh2LHgSP+TeAaiZZ4mIqKtQWgs3gxuSzUKenhDh9JjwilLkvvI0+hXsAgC0TpqM/3/BctTG98Zv/ND1REREJOHX627MaBL4n82H8bRMFvK6ZvkF/QAgdd9OzH3wVvQr2AWDTo8dDy5F02dfoDa+t7erTERE1C623HSQ0STwet5xr+QccqSu2YCaplaPrnNT5WC6uWwWcqMR2e+9iqwPVkMlBEoGDsXmJ19Cdd8BGGPXDWXfxCk3YJqIiMgbGNx0EYcvVlsSbXoy70eUg+nm9qsSh1ReQvAvHsS12835oA78/FfYcf9TMOp00HS6Fp6ltL5jIiJyD7uluoC6ZoMlsAGu5v24WO3G4F8HpAzlErks5ImHD+C/HpqOoO3b0BIcik+e/DPyFi6DURd4M7eIiIjYchOA7Kcpy61U7Mm8H3IZyqUs5OnbP8HUl5ciyNACU/oQfPDblSjv07/Tz+mOjq40TERE3RODmy5AbqViT+f9aJOhXAhk/3MVrvvHmwCAE+NzkPDxelw+UCab5JJdQUREFCgY3PiJOwORpa4jqWvK23k/NC0tyF25FEO3/wcAsPeO+fjm17/FgvBwAGVeeU5/YmBGRKQsDG46yVezpqy7jrya9+PyZdz+1L3oXVgAoyYIXy5chh9uvjMgMngzCCEiIlcwuOmEumYDqhoMiPJRWgQp78e/vjvnlWAqtLICIVPvQHjhD2gK64FNz7yO4pHZHjs/ERGRLzC46SDrqdkAkDMkzo+16bwe5Rcxc8k8aM6dRn3PWKz/n3dQkcpWEiIi6no4FbwD7KdmA8CXRWU4dK7STzXqnKgLZ3DXE3ej57nTMCWnYN3KfzKwISKiLovBTQfITc0GgDtWf4t13xX7uDadE3P6GO564r8QWXYBl/ukojFvB6p79/V3tYiIiDqM3VIdoNM4Hlz71Ic/4FhpLZ6cOsSHNeqYnsU/4c4n5yK0uhLlqYPw0Z/exd3JycDpky4d74lxP9IgYSlTuVFmmjkREZE7GNy4aX1BMdZ+d87hfgH5fE2BJuLiWcxcMg+h1ZUoGTgU/16xBobIaH9XywZnRxERUUewW8oNF6sb8YxdBm17Ksjnawoo58/jjifvRY9LZahIGYgP//sdNPWI8netiIiIPIItN244VVEPZ50mcnmZvKFT2cHLy6G9ORe6knOoTEzB+hVr0BhALTa+zLJORETKxODGDf1iwqAC2gQ4M0f3hknAkpfJmzqVHby6GsjNhaqoCLUxCVi/Yg3qe3XtKexERET2+PXYDYmRIfjjjGGwHk58Y3os4iNC0Cc61JKXSVq1+PW84x4dINup7OAtLcCMGcCBAxCxsfjwT++iJqGPx+pGREQUKNhy46aZo1Nwsrwel+taEBmq9XoXlDW5gcouZQcXAnjoIWD7diA8HK2bt6CyMlw2AaYz9jObiIiIAhGDmw4I12sRojXfOusxIi2tJtQ1G3CprgVRXgh85AYqu5Qd/KWXgHfeAdRqYN06iMxMgMEJEREpFIMbD1pfUIx3dp22/H5jeiyGJkZ67Pwdyg6+YQPw5JPmn199FbjlFsCNZJVERERdTYfG3KxatQqpqakIDg5GVlYW9u7d67T8+vXrkZ6ejuDgYGRkZGDz5s0Oyz7wwANQqVR49dVXO1I1v5GbJr7tSDnqmj275s3QxEjcO74vbh/VGzt+d73zwcQFBcDdd5u7pR5+GHjkEY/WxZOkLq/HfzaIM6WIiKhT3P4UWbduHRYtWoTly5dj//79GDFiBHJzc1FWViZbfvfu3Zg9ezbmz5+PAwcOYPr06Zg+fToKCwvblN2wYQO+/fZbJCUluX8lPqZRq/CbKWmWD2NH08SrO7mgn/Sh/5spadCozUOZw/Va9IkORUKEkxabCxeAX/wCaGwEpk41t9pYqWs24FxlQ4eCLwYiREQUyNz+ZHr55ZexYMECzJs3D0OHDsXq1asRGhqKNWvWyJZ/7bXXMHXqVCxevBhDhgzB888/j1GjRuHNN9+0KXf+/Hk88sgj+Oc//wmtNsAXwZMhTRO3F+mhBf3kghyHWluBu+4CLl4Ehg0D1q0Dgq72QErdZxsPXsB7u89g1wnPtzARERH5i1vBTUtLCwoKCpCTk3P1BGo1cnJykJ+fL3tMfn6+TXkAyM3NtSlvMplwzz33YPHixRg2bFi79WhubkZNTY3Nw98cTRP35Wwqi6efBnbuBHr0MI+5iYiw7JLrPjt4thrv7T6DHy9U+7qmREREHufWgOKKigoYjUbEx8fbbI+Pj8eRI0dkjykpKZEtX1JSYvn9T3/6E4KCgvCb3/zGpXqsWLECzz33nDtV9wnraeJBGhUMRoG6ZgMiQ3S+q8SmTcCLL5p/fvddIC3NZrezVZbzispQWuvCmjlEREQBzO+zpQoKCvDaa69h//79UKna6W65YsmSJVi0aJHl95qaGiQnJ3urim4J12txqqLeZrG9nCE+WgX41Clg7lzzz48+Ctx+e5sijlZZxpVtxRUN3qwhERGR17nVLRUTEwONRoPS0lKb7aWlpUhISJA9JiEhwWn5nTt3oqysDCkpKQgKCkJQUBDOnDmDJ554AqmpqbLn1Ov1iIiIsHkECvtVhAHvtYi0tJrwytZjeGXrMbTUNwJ33glUVQHXXnu19caO1H0mRwUgxW7NHPuB00RERIHOrU8rnU6H0aNHIy8vz7LNZDIhLy8P2dnZssdkZ2fblAeArVu3Wsrfc889OHToEA4ePGh5JCUlYfHixfj888/dvR6/q5SZHeXJFhFHM5U0v30C2LcP6NnTPIBY57grbOboFMyfkIrMZNs1eKYMibOkkCAiIuqq3O6WWrRoEebOnYsxY8Zg3LhxePXVV1FfX4958+YBAObMmYPevXtjxYoVAIBHH30UkydPxsqVKzFt2jSsXbsW+/btw9tvvw0A6NWrF3r16mXzHFqtFgkJCRg8eHBnr8/nomVmR0ktIgfPeX7Abl2zAQm7tkGz+i3zhn/8A0hpP5FmuF6LCQNjkZkcheoGAyJDtb4dG0REROQlbgc3s2bNQnl5OZYtW4aSkhJkZmZiy5YtlkHDxcXFUKuvtiiMHz8e77//Pp555hksXboUaWlp2LhxI4YPH+65qwgg9qsIA95rEVlfUIwPtx7CF+8sBwAcvWs+Bt98s1vnCNf7Nj8WERGRt3VoQPHChQuxcOFC2X07duxos23mzJmYOXOmy+c/ffp0R6oVMIYmRiKlZ6hXW0SkKd2vf/EW4uorcaJnH8zoPQ151Y3O0zEQEREpnN9nSymJfdZsRy0idc0G1DS1oqSmESk9wzr0XKcq6jGt6Gv84shOtKrUePznT6AhSNd+hnAXWScEJSIi6koY3PjY4YvVli6rDfvP44XbM5znh3JgQGsNnv/CPM5mVfYs/JCY5lqGcCIiIoVjcOND9tPEBYClHxVi0qBY91pbhED8okeAplr8ED8Ab4yf5VqGcBdIrU9ERERdFYMbH5KbJm4Uwv2upDVrgM2bIfR6fPPsStwW2xeP5qR1uIuLiIhISRjc+JDcNHG3u5LKyoDf/hYAoHr+eTzw8G0uHSaNAwKABRP729aB42uIiEhB+GnmQ9I0cUmHupIWLzavQpyZCTz+uGXzxepG7P6pAhermRuKiIi6N7bceIH9rClr0jTx2qZW97uSvvoK+PvfAZUKWL0aCDL/9/1zz2k8bZXpO2dIHP5y92i2xBARUbfETz8/CNdr0Sc6FAkRbrTYtLQADz5o/vn++4GsLABX17uxlldUhpIatuAQEVH3xOCmq3j5ZaCoCIiNBa6ktgDM693YZ/gWAM4wuzcREXVTDG66gtOngT/8wfzzSy8B0dGWXf1iwqCyK64C0Jfr3RARUTfF4CbQCQE88gjQ2AhMngzcc4/N7sTIELxwewY0qqshzsiUKB9XkoiIKHAwuAl0mzcDn3wCaLXAW2+ZBxPbmTU2BbueugHzr+sHANhfXIXJL+7Auu+K0dJqwitbj+H1vOOobmzB2coGlNZyPA4RESkXZ0sFstZW89RvAHjsMWDIEKfF13xzyvKztPpx9oBeAGzTPny0/zxyhsQhPSHCG7UmIiLyK7bcBLI1a8yDiHv1ApYudVpUbmCxUQicqWhok/YBMM+oqmtuu2IyERFRV8eWGzf5LPdSXR2wbJn552efBaKinBaXBhZbBzgalQp9Y0JRebBtECMAVDcYHGYuJyIi6qrYchOoXnoJKC0FBgy4ur6NE4mRIfjjjGGWmVPS6scJESGyaR9UACJlthMREXV1bLkJRBcvAn/+s/nnFSsAnc6lw2aOTsHJ8npUNRgsqx+3tJosaR+krikVgClD4thqQ0REisTgJhAtXw40NADXXgvccYdbh4brtQjXa9usfmyd9uGhGwZg08GLMJrsR+kQERF1feyWCjQ//gi8847555dekp367a66ZgPOVZpXLO4THYr4Hm6kfSAiIupi2HITaJ56CjCZgBkzgOuu6/Tp1hcU451dpy2/5wyJ892gaCIiIj9gy00g2bPHvGCfRgO88EKnT8ekmkRE1B2x5SaQSPmj5swBBrnfsmLdItPSasIf/nPYYVLNlJ5hnasrERFRgGLLTaDYt8+cakGjaXfBPlesLyjGZ4UlbbYzqSYRESkdg5tAIbXa3H03MHBgp04l1x0lmTIkrs1MKiIiIiVht1QgOHAA+M9/ALUaePrpTp9OLhUDAOQOjUd6IvNJERGRsjG4CQRSq83s2R0aa2NPLhWDCsDTPx/CsTZERKR47Jbyt++/BzZuNK9n44FWG6BtKgaA3VFERNR9sOXG355/3vzvrFnAkCEeO62UiuFyXQsiQ7WIDHEthQMREVFXx+DGj1SFhcCHH5pbbZ55xqPn1gWp8ezPh6Gl1YRV20949NxERESBjN1SfqT+0wrzD3fcAQwb5t/KEBERKQSDGz+JKDkH9fr15l88sK4NERERmbFbyk9Gbvg/qIxG4Gc/AzIzvfY8zCNFRETdDVtu/EBfW43hn/3b/Mvixf6tDBERkcIwuPGDzE8/gK6pAaZrRgA5Of6uDhERkaIwuPExTUsLRn78TwCAadEi80wpIiIi8hgGNz42+OvPEF5ZjtqYeJjunOXv6hARESkOgxtfEgKjPv47AOD7X/wK0Gr9XCEiIiLlYXDjQ4mHDyDh+I9o1erwwy13+rs6REREisTgxodGbjS32hTd8As0Rfb0c22IiIiUicGNj6jOnUPazi8AAPun3+Pn2hARESkXgxsf0a75G9QmI85mjEF5/3R/V4eIiEixGNz4gLrVgKB31wAA8m64A3XNBj/XiIiISLmYfsEHBuzOg7rkIsrDovDfusEw7D6DnCFx/q4WERGRIrHlxgeGbXofALDumptg0Jinf+cVlaGkptGf1SIiIlIkBjdeFn32JPod2gMTVPhgxFTLdgHgTEWD/ypGRESkUAxuvCxj878AANsGjMH5yKtdUSoAfWNC/VQrIiIi5WJw403NzRjyxQYAQOO8X8M6i9SUIXFIiAjxT72IiIgUjAOKvWjPa2swsbYKJeE98Vh1Aq4b1Aux4XpEhmoRGaLzd/WIiIgUiS03XnKxuhGmv5mnf/87IwdGtQbfnLiEyFAtwvXMKUVEROQtDG685Pyho5h46gAA4F8ZPwNgHkRc3cA1boiIiLyJwY2XDP7sQ6ghkJ+SgeLoRADmQcSRoWy1ISIi8iYGN95gMqHHB/8AAPzrmpsAmAObKUPi2CVFRETkZRxQ7A1ffw2cPg0REYG4e2fjdqMGD90wAJsOXoTRJPxdOyIiIkVjcOMNf/87AMB0x0wER/RAHwDxPTjtm4iIyBfYLeVpDQ3A+vUAANM99/i5MkRERN0PgxtP27gRqKsD+vWDuG6Cv2tDRETU7TC48bQrXVKYMwdQqZyXJSIiIo9jcONJFy4AW7eaf2aXFBERkV8wuPGktWsBkwkYPx4YMMDftSEiIuqWGNx40gcfmP/91a/8Ww8iIqJujMGNpxw/DuzbB2g0wMyZ/q4NERFRt8XgxlPWrjX/O2UKEBfn37oQERF1YwxuPEGIq11Ss2f7ty5ERETdHIMbTzh0CCgqAvR6YMYMf9eGiIioW2Nw4wlSq80ttwCRkf6tCxERUTfH4KazhAD+9S/zz3fd5d+6EBEREYObTjtwADh1CggJAaZN83dtiIiIur0OBTerVq1CamoqgoODkZWVhb179zotv379eqSnpyM4OBgZGRnYvHmzZZ/BYMCTTz6JjIwMhIWFISkpCXPmzMGFCxc6UjXf+/e/zf/ecgsQFubfuhAREZH7wc26deuwaNEiLF++HPv378eIESOQm5uLsrIy2fK7d+/G7NmzMX/+fBw4cADTp0/H9OnTUVhYCABoaGjA/v378eyzz2L//v346KOPcPToUdx6662duzJfEMKSARx33OHfuhAREREAQCWEEO4ckJWVhbFjx+LNN98EAJhMJiQnJ+ORRx7BU0891ab8rFmzUF9fj08++cSy7dprr0VmZiZWr14t+xzfffcdxo0bhzNnziAlJaXdOtXU1CAyMhLV1dWIiIhw53I65/vvgcxM8yyp8nKgRw+b3S2tJqzafgIAsGBif/x150kYTebbrVGr8PANA6ELYs8gERF1T976/Hbrk7WlpQUFBQXIycm5egK1Gjk5OcjPz5c9Jj8/36Y8AOTm5josDwDV1dVQqVSIioqS3d/c3Iyamhqbh19IXVI339wmsCEiIiL/cCu4qaiogNFoRHx8vM32+Ph4lJSUyB5TUlLiVvmmpiY8+eSTmD17tsMobsWKFYiMjLQ8kpOT3bkMzxDianDDLikiIqKAEVB9IgaDAXfeeSeEEHjrrbcclluyZAmqq6stj7Nnz/qwllccOWJ+6HTAL37h++cnIiIiWUHuFI6JiYFGo0FpaanN9tLSUiQkJMgek5CQ4FJ5KbA5c+YMtm3b5rTvTa/XQ6/Xu1N1z9u40fzvlCmAL8f5EBERkVNutdzodDqMHj0aeXl5lm0mkwl5eXnIzs6WPSY7O9umPABs3brVprwU2Bw/fhxffvklevXq5U61/EMKbqZP92ctiIiIyI5bLTcAsGjRIsydOxdjxozBuHHj8Oqrr6K+vh7z5s0DAMyZMwe9e/fGihUrAACPPvooJk+ejJUrV2LatGlYu3Yt9u3bh7fffhuAObC54447sH//fnzyyScwGo2W8Tg9e/aETqfz1LV6zoULwN69gEoFdIUp60RERN2I28HNrFmzUF5ejmXLlqGkpASZmZnYsmWLZdBwcXEx1OqrDULjx4/H+++/j2eeeQZLly5FWloaNm7ciOHDhwMAzp8/j02bNgEAMjMzbZ5r+/btuP766zt4aV50pb7IygIcdMcRERGRf7gd3ADAwoULsXDhQtl9O3bsaLNt5syZmDlzpmz51NRUuLnUjv+xS4qIiChgBdRsqS6huhrYts38M4MbIiKigMPgxl1btgAGAzB4sPlBREREAYXBjbukNBK33ebfehAREZEsBjfuMBoBKaP5tGn+rQsRERHJYnDjjm+/BS5fBqKigPHj/V0bIiIiksHgxh1Sl9TNNwNBHZpoRkRERF7G4MYdn35q/vfnP/dvPYiIiMghBjeuOnMG+OEHQK0Gpk71d22IiIjIAQY3rpJabcaPB3r29G9diIiIyCEGN66SghvOkiIiIgpoDG5c0dQEbN9u/vmWW/xbFyIiInKKwY0rdu0CGhuBxEQgI8PftSEiIiInGNy4YssW87+5uYBK5d+6EBERkVMMblwhBTecJUVERBTwGNy059w54McfzVPAc3L8XRsiIiJqB4Ob9nz+ufnfceOAXr38WxciIiJqF4Ob9kjBDbukiIiIugQmSHKmtRXYutX8c26uR06pUavw8A0DoQtiXElEROQNDG6cOXcOiI42z5AaO9bftSEiIiIXMLhxJjUV+OknoKwM0Gj8XRsiIiJyAftG2qNSAfHx/q4FERERuYjBDRERESkKgxsiIiJSFAY3REREpCgMboiIiEhRGNy042J1I3b/VIGL1Y3+rgoRERG5gFPBnfjnntN4esOPAAAVgBduz8CssSn+rRQRERE5xZYbBy5WN+KZK4ENAAgASz8qZAsOERFRgGNw48CpinoIu21GIXC6osEv9SEiIiLXMLhxoF9MGFR22zQqFVJjQv1SHyIiInINgxsHEiND8MLtGdCozCGORqXC//xyOBIjQ/xcMyIiInKGA4qdmDU2BZMGxeJ0RQNSY0IZ2BAREXUBDG7akRgZgl5heqzafgIA8PANA6ELYoMXERFRoOKnNBERESkKgxsX1TUbcLayASU1nApOREQUyBjcuGB9QTHe2XUaH+0/j8kv7sC674r9XSUiIiJygMFNO7iYHxERUdfC4KYdXMyPiIioa2Fw0w4u5kdERNS1MLhpR2JkCP44Y5glwFEDXMyPiIgogHGdGxfMHJ2Ck+X1qGow4NGcNKT0DPN3lYiIiMgBBjcuCtdrEa7XIiGCLTZERESBjMGNC3RBajz+s0H+rgYRERG5gGNuiIiISFEY3BAREZGiMLghIiIiRWFwQ0RERIrC4IaIiIgUhcENERERKQqDGyIiIlIUBjdERESkKAxuiIiISFEY3BAREZGiMP2CjzCFAxERkW+w5YaIiIgUhcENERERKQqDGyIiIlIUBjdERESkKAxuiIiISFEY3BAREZGiMLghIiIiRWFwQ0RERIrC4IaIiIgUhSsUexFXJSYiIvI9ttwQERGRojC4ISIiIkVhcENERESK0qHgZtWqVUhNTUVwcDCysrKwd+9ep+XXr1+P9PR0BAcHIyMjA5s3b7bZL4TAsmXLkJiYiJCQEOTk5OD48eMdqRoRERF1c24HN+vWrcOiRYuwfPly7N+/HyNGjEBubi7Kyspky+/evRuzZ8/G/PnzceDAAUyfPh3Tp09HYWGhpcyLL76I119/HatXr8aePXsQFhaG3NxcNDU1dfzKiIiIqFtSCSGEOwdkZWVh7NixePPNNwEAJpMJycnJeOSRR/DUU0+1KT9r1izU19fjk08+sWy79tprkZmZidWrV0MIgaSkJDzxxBP47W9/CwCorq5GfHw83nvvPdx1113t1qmmpgaRkZGorq5GRESEO5dDREREfuKtz2+3Wm5aWlpQUFCAnJycqydQq5GTk4P8/HzZY/Lz823KA0Bubq6l/KlTp1BSUmJTJjIyEllZWQ7P2dzcjJqaGpsHEREREeBmcFNRUQGj0Yj4+Hib7fHx8SgpKZE9pqSkxGl56V93zrlixQpERkZaHsnJye5cBhERESlYl5wttWTJElRXV1seZ8+e9XeViIiIKEC4FdzExMRAo9GgtLTUZntpaSkSEhJkj0lISHBaXvrXnXPq9XpERETYPIiIiIgAN4MbnU6H0aNHIy8vz7LNZDIhLy8P2dnZssdkZ2fblAeArVu3Wsr369cPCQkJNmVqamqwZ88eh+ckIiIicsTt3FKLFi3C3LlzMWbMGIwbNw6vvvoq6uvrMW/ePADAnDlz0Lt3b6xYsQIA8Oijj2Ly5MlYuXIlpk2bhrVr12Lfvn14++23AQAqlQqPPfYY/vjHPyItLQ39+vXDs88+i6SkJEyfPt1zV0pERETdgtvBzaxZs1BeXo5ly5ahpKQEmZmZ2LJli2VAcHFxMdTqqw1C48ePx/vvv49nnnkGS5cuRVpaGjZu3Ijhw4dbyvzud79DfX097r//flRVVWHChAnYsmULgoODPXCJRERE1J24vc5NIOI6N0RERF1PQKxzQ0RERBTo3O6WCkRS4xMX8yMiIuo6pM9tT3ciKSK4qa2tBQAu5kdERNQF1dbWIjIy0mPnU8SYG5PJhAsXLqBHjx5QqVQePXdNTQ2Sk5Nx9uxZjudxA+9bx/C+dQzvW8fwvrmP96xjHN03IQRqa2uRlJRkMxmpsxTRcqNWq9GnTx+vPgcXC+wY3reO4X3rGN63juF9cx/vWcfI3TdPtthIOKCYiIiIFIXBDRERESkKg5t26PV6LF++HHq93t9V6VJ43zqG961jeN86hvfNfbxnHePr+6aIAcVEREREErbcEBERkaIwuCEiIiJFYXBDREREisLghoiIiBSFwU07Vq1ahdTUVAQHByMrKwt79+71d5X85ve//z1UKpXNIz093bK/qakJDz/8MHr16oXw8HDcfvvtKC0ttTlHcXExpk2bhtDQUMTFxWHx4sVobW319aV41ddff41f/OIXSEpKgkqlwsaNG232CyGwbNkyJCYmIiQkBDk5OTh+/LhNmcuXL+Puu+9GREQEoqKiMH/+fNTV1dmUOXToECZOnIjg4GAkJyfjxRdf9PaleVV79+3ee+9t8/qbOnWqTZnudt9WrFiBsWPHokePHoiLi8P06dNx9OhRmzKe+rvcsWMHRo0aBb1ej4EDB+K9997z9uV5jSv37frrr2/zenvggQdsynS3+/bWW2/hmmuusSzEl52djc8++8yyP6Bea4IcWrt2rdDpdGLNmjXixx9/FAsWLBBRUVGitLTU31Xzi+XLl4thw4aJixcvWh7l5eWW/Q888IBITk4WeXl5Yt++feLaa68V48ePt+xvbW0Vw4cPFzk5OeLAgQNi8+bNIiYmRixZssQfl+M1mzdvFk8//bT46KOPBACxYcMGm/0vvPCCiIyMFBs3bhTff/+9uPXWW0W/fv1EY2OjpczUqVPFiBEjxLfffit27twpBg4cKGbPnm3ZX11dLeLj48Xdd98tCgsLxQcffCBCQkLE//7v//rqMj2uvfs2d+5cMXXqVJvX3+XLl23KdLf7lpubK959911RWFgoDh48KG655RaRkpIi6urqLGU88Xd58uRJERoaKhYtWiQOHz4s3njjDaHRaMSWLVt8er2e4sp9mzx5sliwYIHN6626utqyvzvet02bNolPP/1UHDt2TBw9elQsXbpUaLVaUVhYKIQIrNcagxsnxo0bJx5++GHL70ajUSQlJYkVK1b4sVb+s3z5cjFixAjZfVVVVUKr1Yr169dbthUVFQkAIj8/Xwhh/vBSq9WipKTEUuatt94SERERorm52at19xf7D2mTySQSEhLEn//8Z8u2qqoqodfrxQcffCCEEOLw4cMCgPjuu+8sZT777DOhUqnE+fPnhRBC/OUvfxHR0dE29+3JJ58UgwcP9vIV+Yaj4Oa2225zeAzvmxBlZWUCgPjqq6+EEJ77u/zd734nhg0bZvNcs2bNErm5ud6+JJ+wv29CmIObRx991OExvG9m0dHR4m9/+1vAvdbYLeVAS0sLCgoKkJOTY9mmVquRk5OD/Px8P9bMv44fP46kpCT0798fd999N4qLiwEABQUFMBgMNvcrPT0dKSkplvuVn5+PjIwMxMfHW8rk5uaipqYGP/74o28vxE9OnTqFkpISm/sUGRmJrKwsm/sUFRWFMWPGWMrk5ORArVZjz549ljKTJk2CTqezlMnNzcXRo0dRWVnpo6vxvR07diAuLg6DBw/Ggw8+iEuXLln28b4B1dXVAICePXsC8NzfZX5+vs05pDJKeS+0v2+Sf/7zn4iJicHw4cOxZMkSNDQ0WPZ19/tmNBqxdu1a1NfXIzs7O+Bea4pInOkNFRUVMBqNNv8JABAfH48jR474qVb+lZWVhffeew+DBw/GxYsX8dxzz2HixIkoLCxESUkJdDodoqKibI6Jj49HSUkJAKCkpET2fkr7ugPpOuXug/V9iouLs9kfFBSEnj172pTp169fm3NI+6Kjo71Sf3+aOnUqfvnLX6Jfv3746aefsHTpUtx8883Iz8+HRqPp9vfNZDLhsccew3XXXYfhw4cDgMf+Lh2VqampQWNjI0JCQrxxST4hd98A4Fe/+hX69u2LpKQkHDp0CE8++SSOHj2Kjz76CED3vW8//PADsrOz0dTUhPDwcGzYsAFDhw7FwYMHA+q1xuCGXHbzzTdbfr7mmmuQlZWFvn374l//+leX/COlruWuu+6y/JyRkYFrrrkGAwYMwI4dOzBlyhQ/1iwwPPzwwygsLMSuXbv8XZUuxdF9u//++y0/Z2RkIDExEVOmTMFPP/2EAQMG+LqaAWPw4ME4ePAgqqur8e9//xtz587FV1995e9qtcFuKQdiYmKg0WjajPQuLS1FQkKCn2oVWKKiojBo0CCcOHECCQkJaGlpQVVVlU0Z6/uVkJAgez+lfd2BdJ3OXlcJCQkoKyuz2d/a2orLly/zXlrp378/YmJicOLECQDd+74tXLgQn3zyCbZv344+ffpYtnvq79JRmYiIiC79xcbRfZOTlZUFADavt+5433Q6HQYOHIjRo0djxYoVGDFiBF577bWAe60xuHFAp9Nh9OjRyMvLs2wzmUzIy8tDdna2H2sWOOrq6vDTTz8hMTERo0ePhlartblfR48eRXFxseV+ZWdn44cffrD5ANq6dSsiIiIwdOhQn9ffH/r164eEhASb+1RTU4M9e/bY3KeqqioUFBRYymzbtg0mk8nyBpudnY2vv/4aBoPBUmbr1q0YPHhwl+5acce5c+dw6dIlJCYmAuie900IgYULF2LDhg3Ytm1bmy43T/1dZmdn25xDKtNV3wvbu29yDh48CAA2r7fudt/kmEwmNDc3B95rrWPjo7uHtWvXCr1eL9577z1x+PBhcf/994uoqCibkd7dyRNPPCF27NghTp06Jb755huRk5MjYmJiRFlZmRDCPA0wJSVFbNu2Tezbt09kZ2eL7Oxsy/HSNMCbbrpJHDx4UGzZskXExsYqbip4bW2tOHDggDhw4IAAIF5++WVx4MABcebMGSGEeSp4VFSU+Pjjj8WhQ4fEbbfdJjsVfOTIkWLPnj1i165dIi0tzWZKc1VVlYiPjxf33HOPKCwsFGvXrhWhoaFddkqzEM7vW21trfjtb38r8vPzxalTp8SXX34pRo0aJdLS0kRTU5PlHN3tvj344IMiMjJS7Nixw2bKckNDg6WMJ/4upem5ixcvFkVFRWLVqlVdekpze/ftxIkT4g9/+IPYt2+fOHXqlPj4449F//79xaRJkyzn6I737amnnhJfffWVOHXqlDh06JB46qmnhEqlEl988YUQIrBeawxu2vHGG2+IlJQUodPpxLhx48S3337r7yr5zaxZs0RiYqLQ6XSid+/eYtasWeLEiROW/Y2NjeKhhx4S0dHRIjQ0VMyYMUNcvHjR5hynT58WN998swgJCRExMTHiiSeeEAaDwdeX4lXbt28XANo85s6dK4QwTwd/9tlnRXx8vNDr9WLKlCni6NGjNue4dOmSmD17tggPDxcRERFi3rx5ora21qbM999/LyZMmCD0er3o3bu3eOGFF3x1iV7h7L41NDSIm266ScTGxgqtViv69u0rFixY0OaLRne7b3L3C4B49913LWU89Xe5fft2kZmZKXQ6nejfv7/Nc3Q17d234uJiMWnSJNGzZ0+h1+vFwIEDxeLFi23WuRGi+923++67T/Tt21fodDoRGxsrpkyZYglshAis15pKCCHca+shIiIiClwcc0NERESKwuCGiIiIFIXBDRERESkKgxsiIiJSFAY3REREpCgMboiIiEhRGNwQERGRojC4ISIiIkVhcENERESKwuCGiIiIFIXBDRERESkKgxsiIiJSlP8HgEnX95Psnq4AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ "x = 5\n", "#fit = 'logistic'\n", "fit = 'gompertz4'\n", - "cut, solu = fit_growth_curve(dmean,dstd,fit,x,None)\n", + "cut, solu = fit_growth_curve(dmean,dstd,fit,x,80)\n", "print(dmean.columns[x])\n", "print(np.log(2)/(solu[0][2]))\n", "plot_growth_curves(dmean,dstd,solu,cut,x,fit)" diff --git a/dev/stuff.ipynb b/dev/stuff.ipynb new file mode 100644 index 0000000..5961536 --- /dev/null +++ b/dev/stuff.ipynb @@ -0,0 +1,55 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Check, if a metabolite is in a libsbml model\n", + "\n", + "Note: takes way to much time -> maybe still include it in the toolbox, it is already programmed ...." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from refinegems.curation.polish import get_set_of_curies\n", + "\n", + "# @TODO make it a bit more suffisticated\n", + "def getAnnotationDict_libsbml(entity):\n", + " try: \n", + " for cvterm in entity.getCVTerms():\n", + " current_uris = [cvterm.getResourceURI(i) for i in range(cvterm.getNumResources())]\n", + " return get_set_of_curies(current_uris)[0]\n", + " except Exception as e:\n", + " return None\n", + " \n", + "def hasAnnotation_libmodel(id, idtype, entitytype, libmodel):\n", + " match entitytype:\n", + " case 'reaction':\n", + " entitylist = libmodel.getListOfReactions()\n", + " case 'species':\n", + " entitylist = libmodel.getListOfSpecies()\n", + " case _:\n", + " mes = f'Unknown entity type: {entitytype}'\n", + " raise ValueError(mes)\n", + " \n", + " found = []\n", + " for r in entitylist:\n", + " annots = getAnnotationDict_libsbml(r)\n", + " if annots and idtype in annots.keys() and id in annots[idtype]:\n", + " found.append(r.getId())\n", + " return found \n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dev/under_construction.ipynb b/dev/under_construction.ipynb index 8b53f6d..ce9224a 100644 --- a/dev/under_construction.ipynb +++ b/dev/under_construction.ipynb @@ -350,12 +350,24 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/brune/miniconda3/envs/sprg/lib/python3.10/site-packages/pydantic/_internal/_config.py:322: UserWarning: Valid config keys have changed in V2:\n", + "* 'underscore_attrs_are_private' has been removed\n", + " warnings.warn(message, UserWarning)\n" + ] + } + ], "source": [ "import sqlite3\n", "from refinegems.utility.databases import PATH_TO_DB\n", + "import pandas as pd\n", + "from tqdm import tqdm\n", "\n", "# DISCLAIMER:\n", "# Database information from MetaNetX\n", @@ -368,26 +380,43 @@ " ['id','mnx_equation','reference','ec-code','is_balanced','is_transport']),\n", " 'reac_xref': ('https://www.metanetx.org/cgi-bin/mnxget/mnxref/reac_xref.tsv',\n", " ['source','id','description']),\n", - " 'chem_prop': ('https://www.metanetx.org/cgi-bin/mnxget/mnxref/chem_xref.tsv',\n", + " 'chem_prop': ('https://www.metanetx.org/cgi-bin/mnxget/mnxref/chem_prop.tsv',\n", " ['id','name','reference','formula','charge','mass','InChI','InChIKey','SMILES']),\n", - " #'chem_xref': ('https://www.metanetx.org/cgi-bin/mnxget/mnxref/chem_prop.tsv',\n", - " # ['source','id','description'])\n", + " 'chem_xref': ('https://www.metanetx.org/cgi-bin/mnxget/mnxref/chem_xref.tsv',\n", + " ['source','id','description'])\n", " }\n", "\n", "# @TODO chemxref missing\n", - "def update_mnx_namespaces(db_connection):\n", + "# @TODO time warning + progress bar (waiting w/o info is tedious)\n", + "def update_mnx_namespaces(db_connection, chunksize=1):\n", " for name,values in mnx_db_namespace.items():\n", " link,colnames = values\n", - " mnx_table = pd.read_csv(link, sep='\\t', comment='#', names=colnames)\n", - " \n", + " mnx_table = []\n", + " for chunk in tqdm(pd.read_csv(link, sep='\\t', comment='#', \n", + " names=colnames, \n", + " chunksize=chunksize*1024), \n", + " desc=f'Downloading {name}'\n", + " ):# progress bar will not work -> no totel length info\n", + " mnx_table.append(chunk) \n", + "\n", " match name:\n", " # Reaction property table\n", " case 'reac_prop':\n", - " mnx_table.to_sql(\n", - " 'mnx_'+name, db_connection, \n", - " if_exists='replace', index=False, \n", - " dtype={'id':'TEXT PRIMARY KEY'}\n", - " )\n", + " total_len = sum([len(_) for _ in mnx_table])\n", + " with tqdm(total=total_len, unit='entries', \n", + " desc='Add to DB') as pbar:\n", + " for i,chunk in enumerate(mnx_table):\n", + " if i == 0:\n", + " exists = 'replace'\n", + " else:\n", + " exists = 'append'\n", + " chunk.to_sql(\n", + " 'mnx_'+name, db_connection, \n", + " if_exists=exists, index=False, \n", + " dtype={'id':'TEXT PRIMARY KEY'}\n", + " )\n", + " pbar.update(len(chunk)) \n", + " \n", " # Reaction cross-reference table\n", " case 'reac_xref':\n", " cursor = db_connection.cursor()\n", @@ -401,44 +430,77 @@ " );\n", " \"\"\"\n", " cursor.execute(empty_table)\n", - " mnx_table.to_sql(\n", - " 'mnx_'+name, db_connection, \n", - " if_exists='append', index=False\n", - " )\n", - " # Metabolite property table\n", + " total_len = sum([len(_) for _ in mnx_table])\n", + " with tqdm(total=total_len, unit='entries',\n", + " desc='Add to DB') as pbar:\n", + " for i,chunk in enumerate(mnx_table):\n", + " chunk.to_sql(\n", + " 'mnx_'+name, db_connection, \n", + " if_exists='append', index=False\n", + " )\n", + " pbar.update(len(chunk))\n", + " \n", + " # Metabolite properties table\n", " case 'chem_prop':\n", - " mnx_table.to_sql(\n", - " 'mnx_'+name, db_connection, \n", - " if_exists='replace', index=False, \n", - " dtype={'id':'TEXT PRIMARY KEY'}\n", - " )\n", - " # Metabolite cross-reference table\n", + " total_len = sum([len(_) for _ in mnx_table])\n", + " with tqdm(total=total_len, unit='entries',\n", + " desc='Add to DB') as pbar:\n", + " for i,chunk in enumerate(mnx_table):\n", + " if i == 0:\n", + " exists = 'replace'\n", + " else:\n", + " exists = 'append'\n", + " chunk.to_sql(\n", + " 'mnx_'+name, db_connection, \n", + " if_exists=exists, index=False, \n", + " dtype={'id':'TEXT PRIMARY KEY'}\n", + " )\n", + " pbar.update(len(chunk))\n", " # @TODO : there seems to be a problem with the unique constraint and case-sensitivity\n", - " # case 'chem_xref':\n", - " # cursor = db_connection.cursor()\n", - " # cursor.execute('DROP TABLE IF EXISTS mnx_chem_xref')\n", - " # empty_table = \"\"\" CREATE TABLE mnx_chem_xref (\n", - " # source VARCHAR(100) COLLATE SQL_Latin1_General_CP1_CS_AS,\n", - " # id VARCHAR(100) COLLATE SQL_Latin1_General_CP1_CS_AS,\n", - " # description TEXT,\n", - " # CONSTRAINT PK_chem_reac_xref PRIMARY KEY (source,id)\n", - " # FOREIGN KEY(id) REFERENCES mnx_chem_prop(id)\n", - " # );\n", - " # \"\"\"\n", - " # cursor.execute(empty_table)\n", - " # mnx_table.to_sql(\n", - " # 'mnx_'+name, db_connection, \n", - " # if_exists='append', index=False\n", - " # )\n", - " \n", + " case 'chem_xref':\n", + " total_len = sum([len(_) for _ in mnx_table])\n", + " cursor = db_connection.cursor()\n", + " cursor.execute('DROP TABLE IF EXISTS mnx_chem_xref')\n", + " empty_table = \"\"\" CREATE TABLE mnx_chem_xref (\n", + " source TEXT,\n", + " id TEXT,\n", + " description TEXT,\n", + " CONSTRAINT PK_mnx_chem_xref PRIMARY KEY (source,id)\n", + " FOREIGN KEY(id) REFERENCES mnx_chem_prop(id)\n", + " );\n", + " \"\"\"\n", + " cursor.execute(empty_table)\n", + " with tqdm(total=total_len, unit='entries',\n", + " desc='Add to DB') as pbar:\n", + " for i,chunk in enumerate(mnx_table):\n", + " chunk.to_sql(\n", + " 'mnx_'+name, db_connection, \n", + " if_exists='append', index=False\n", + " )\n", + " pbar.update(len(chunk))\n", " \n" ] }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading reac_prop: 73it [00:00, 349.12it/s]\n", + "Add to DB: 100%|██████████| 74613/74613 [00:00<00:00, 297720.88entries/s]\n", + "Downloading reac_xref: 376it [00:00, 513.48it/s]\n", + "Add to DB: 100%|██████████| 384802/384802 [00:03<00:00, 122374.97entries/s]\n", + "Downloading chem_prop: 1262it [00:05, 228.97it/s]\n", + "Add to DB: 100%|██████████| 1292154/1292154 [00:05<00:00, 242204.86entries/s]\n", + "Downloading chem_xref: 2927it [00:05, 512.50it/s]\n", + "Add to DB: 100%|██████████| 2996510/2996510 [00:28<00:00, 105285.64entries/s]\n" + ] + } + ], "source": [ "con = sqlite3.connect(PATH_TO_DB)\n", "update_mnx_namespaces(con)\n", diff --git a/src/refinegems/classes/gapfill.py b/src/refinegems/classes/gapfill.py index 2195509..be4e742 100644 --- a/src/refinegems/classes/gapfill.py +++ b/src/refinegems/classes/gapfill.py @@ -667,7 +667,7 @@ def get_missing_reacs(self, model:cobra.Model, # ----------------------------------- # -> access ncbi for ec (optional) # @DEBUGGING ................... - mapped_reacs = mapped_reacs.iloc[0:5,:] + mapped_reacs = mapped_reacs.iloc[300:350,:] print(UserWarning('Running in debugging mode.')) # .............................. if check_NCBI and mail: