Update cmd #118

draeger-lab · Aug 26, 2024 · 9fab756 · 9fab756
1 parent 35bab49
commit 9fab756
Show file tree

Hide file tree

Showing 2 changed files with 81 additions and 19 deletions.
diff --git a/docs/source/cmd_desc.rst b/docs/source/cmd_desc.rst
@@ -170,7 +170,7 @@ Add or update tables for additional namespaces/databases into/of the in-build da
 
 Options:
 
-- ``--chunksize/-c`` Size (in kB) of data to download per chunk, if a download is required.
+- ``--chunksize/-c``: Size (in kB) of data to download per chunk, if a download is required.
 
 .. code:: bash
 
@@ -184,21 +184,57 @@ refinegems gaps
 
 .. code:: bash 
   
-  refinegems gaps find [MODELPATH] [GFF_FILE] [ORGANISMID] [GAPFILL_PARAMS] [FILENAME]
+  refinegems gaps find [ALGORITHMN] [MODELPATH] [OPTIONS]
 
-Find gaps in a model based on the genes/gene products of the underlying organism.
+Find gaps in a model and optionally try to fill them.
+Algorthmn for the gap filling is chosen by the first argument.
 
-.. code:: bash 
-  
-  refinegems gaps fill [MODEL] [GAP_ANALYSIS_RESULTS]
+General Options
 
-Fill the gaps in a model based on a user-provided input file.
+- ``-o``, ``--outdir``: Path to a directory to write the output to.  [default: ``./``]
+- ``-f``, ``--fill``: If True, tries to fill the gaps in the model.
+- ``--fc``, ``--formula-check``: [none,existence,wildcard,strict] Set the filter for which metabolite formulas are valid to be added to the model. [default: existence]
+- ``--no-dna``: Exclude DNA reactions (name-based) from being added to the model.
+- ``--no-rna``: Exclude RNA reactions (name-based) from being added to the model.
+- ``-p``, ``--idprefix``: Prefix for the random IDs, if an ID does not exists for the given namespace. [default: refineGEMs]
+- ``-n``, ``--namespace``: [BiGG]  Namespace used in the model.  [default: BiGG]
 
-.. code:: bash 
-  
-  refinegems gaps autofill [MODELPATH] [GAFILL_PARAMS] [FILENAME]
+| KEGG required parameters: [all required if ``ALG="KEGG"``]
+| Parameters required when running the KEGG gap filling algorithmn
+
+- ``--orgid``: KEGG organism ID
+
+| BioCyc required parameters: [all required if ``ALG="BioCyc"``]
+| Parameters required when running the KEGG gap filling algorithmn
+
+- ``--gt``, ``--genetable``: Path to the BioCyc gene smart table.
+- ``--rt``, ``--reactable``: Path to the BioCyc gene smart table.
+- ``--gff-bc``: Path to the GFF.
+
+| Gene required parameters: [all required if ``ALG="Gene"``]
+| Parameters required when running the GeneGapFiller algorithm
+
+- ``--gff-g``: Path to the GFF.
+
+| Gene optional parameters:
+| Optional / conditionally interdependant parameters for the gene gap filling algorithm
+
+- ``--prot-prefix``: Prefix for pseudo-protein IDs.  [default: refineGEMs]
+- ``--mail``: Mail address for NCBI requests.
+- ``--ncbi``, ``--check-ncbi``: Enable searching protein IDs in NCBI. This increases the runtime significantly.
+- ``--fasta``: Path to the protein FASTA of the model.
+- ``--dmnd-db``: Path to the SwissProt DIAMOND database.
+- ``--sp-map``, ``--swissprot-mapping``: Path to the SwissProt mapping file (ID against EC and BRENDA)
+- ``-s``, ``--sensitivity``: [sensitive,more-sensitive,very-sensitive,ultra-sensitive] Sensitivity mode for running DIAMOND.  [default: more-sensitive]
+- ``--cov``: Coverage value (passed to DIAMOND)  [default: 90.0]
+- ``--pid``: Percentage identity threshold value for filtering DIAMOND results.
+- ``-t``, ``--threads``: Number of threads to be used by DIAMOND.  [default: 2]
+
+Constraints:
 
-Automatically find and fill the gaps based on the genes/gene products.
+- ``--mail`` is required if ``--check-ncbi`` is set
+- if one of ``--fasta``, ``--dmnd-db``, ``--swissprot-mapping`` is set, all need to be set
+- ``--fasta``, ``--dmnd-db``, ``--swissprot-mapping`` are all required if any of ``--sensitivity``, ``--cov``, ``--pid`` and ``--threads`` is set
 
 
 refinegems media

diff --git a/src/refinegems/cmd_access.py b/src/refinegems/cmd_access.py
@@ -192,12 +192,12 @@ def run(model,email,path,id_db,refseq_gff,protein_fasta,lab_strain,kegg_organism
 # Find and fill gaps in a model automatically/Fill gaps with manually created tables
 # ----------------------------------------------------------------------------------
 # @TODO gaps group still for the old gapfill - rewrite or delete
-
+# @TEST help is displayed alright but untested
 @cli.group()
 def gaps():
    """Find and fill gaps in a model."""
 
-@gaps.command()
+@gaps.command(show_constraints=True)
 @cloup.argument('alg', type=click.Choice(['KEGG','BioCyc','Gene']),
                 help='Type of automated gap filling algorithm, that shall be used.')
 @cloup.argument('modelpath', type=click.Path(exists=True, dir_okay=False), 
@@ -255,7 +255,7 @@ def gaps():
                 show_default = True,
                 help='Prefix for pseudo-protein IDs.'),
    cloup.option('--mail', type=str, default=None, help='Mail address for NCBI requests.'),
-   cloup.option('--nbci','--check-ncbi', is_flag=True, default=False,
+   cloup.option('--ncbi','--check-ncbi', is_flag=True, default=False,
                 help='Enable searching protein IDs in NCBI. This increases the runtime significantly.'),
    cloup.option('--fasta', type=click.Path(exists=True, dir_okay=False), default=None,
                 help='Path to the protein FASTA of the model.'),
@@ -274,8 +274,8 @@ def gaps():
                 help='Number of threads to be used by DIAMOND.'), 
 )
 @cloup.constraints.constraint(cloup.constraints.If(cloup.constraints.IsSet('ncbi'), then=cloup.constraints.require_all), ['mail'])
-@cloup.constraints.constraint(cloup.constraints.If(cloup.constraints.AnySet('fasta','dmnd_db','sp_map'),then=cloup.constraints.AllSet), ['fasta','dmnd_db','sp_map'])
-@cloup.constraints.constraint(cloup.constraints.If(cloup.constraints.AnySet('s','cov','pid','t'),then=cloup.constraints.AllSet), ['fasta','dmnd_db','sp_map'])
+@cloup.constraints.constraint(cloup.constraints.If(cloup.constraints.AnySet('fasta','dmnd_db','sp_map'),then=cloup.constraints.require_all), ['fasta','dmnd_db','sp_map'])
+@cloup.constraints.constraint(cloup.constraints.If(cloup.constraints.AnySet('sensitivity','cov','pid','threads'),then=cloup.constraints.require_all), ['fasta','dmnd_db','sp_map'])
 def automated_gapfill(alg,modelpath,outdir,fill,
                       formula_check, no_dna, no_rna,
                       idprefix, namespace,
@@ -290,16 +290,42 @@ def automated_gapfill(alg,modelpath,outdir,fill,
 
    cmodel = rg.utility.io.load_model(modelpath, 'cobra')
    model = rg.utility.io.load_model(modelpath, 'libsbml')
-   # find gaps
 
-   gapfiller = rg.classes.gapfill.KEGGapFiller(orgid)
+   # set class instance
+   match alg:
+      case 'KEGG':
+         gapfiller = rg.classes.gapfill.KEGGapFiller(orgid)
+         # find gaps
+         gapfiller.missing_genes(model)
+         gapfiller.missing_reacs(cmodel)
+      case 'BioCyc':
+         gapfiller = rg.classes.gapfill.BioCycGapFiller(genetable,
+                                                        reactable,
+                                                        gff_bc)
+         # find gaps
+         gapfiller.missing_genes(model)
+         gapfiller.missing_reacs(cmodel)
+      case 'Gene':
+         gapfiller = rg.classes.gapfill.GeneGapFiller()
+         # find gaps
+         gapfiller.missing_genes(gff_g,model)
+         gapfiller.missing_reacs(cmodel, prot_prefix, mail, ncbi, fasta, dmnd_db, sp_map,
+                                 sensitivity, cov, pid, threads)
+      case _:
+         mes = f'Unknown option for algorthmn type: {alg}'
+         raise ValueError(mes)
+   # find gaps
    gapfiller.missing_genes(model)
    gapfiller.missing_reacs(cmodel)
    # fill gaps
    if fill:
-      model = gapfiller.fill_model(model)
+      model = gapfiller.fill_model(model, 
+                                   formula_check=formula_check,
+                                   exclude_dnae=no_dna, exclude_rna=no_rna,
+                                   idprefix=idprefix, namespace=namespace)
       # save model
       write_model_to_file(model, Path(outdir, 'gapfilled_model.xml'))
+
    # @TODO report stats
    # @TODO report manual curation