docs + workflow update #8 #5

draeger-lab · May 21, 2024 · 9d2098e · 9d2098e
1 parent f77272f
commit 9d2098e
Show file tree

Hide file tree

Showing 24 changed files with 598 additions and 15 deletions.
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
diff --git a/docs/build/doctrees/hqtb/hqtb-config.doctree b/docs/build/doctrees/hqtb/hqtb-config.doctree
diff --git a/docs/build/doctrees/overview-pipes.doctree b/docs/build/doctrees/overview-pipes.doctree
diff --git a/docs/build/html/_sources/hqtb/hqtb-config.rst.txt b/docs/build/html/_sources/hqtb/hqtb-config.rst.txt
@@ -0,0 +1,125 @@
+HQTB Configuration File
+=======================
+
+Below, the configuration file with the underlying default, is listed.
+
+.. code-block:: yaml 
+
+    # information about the genome to be used to generate the new model
+    subject:
+        annotated_genome: __USER__
+        full_sequence: __USER__
+
+    # information about the template model/genome
+    template:
+        annotated_genome: __USER__
+        model: __USER__
+        namespace: BiGG
+
+    # information about the output
+    out:
+        dir: ./specimen_run/
+        name: specimen_model
+        memote: False
+
+    # data(bases) required to run the program
+    data:
+        # if this parameter is set, assumes that the directory structure from setup
+        # is used and uses this path to a directory as the parent folder for the
+        # following paths (assumes all data paths are relative ones)
+        data_direc: null
+        # required
+        diamond: __USER__
+        # needed but potentially downloaded
+        mnx_chem_prop: MetaNetX/chem_prop.tsv
+        mnx_chem_xref: MetaNetX/chem_xref.tsv
+        mnx_reac_prop: MetaNetX/reac_prop.tsv
+        mnx_reac_xref: MetaNetX/reac_xref.tsv
+        # optional, but good and manual
+        ncbi_map: null
+        ncbi_dat: null
+        # optional for directionality control
+        biocyc: null
+        # optional:
+        #   the pan-core model is used for analysis and if no universal model
+        #   is given, also for gapfilling
+        #   if the pan-core model is too small for useful gapfilling, use an
+        #   additional universal model for gapfilling
+        #   if none if given gapfilling (and core-pan analysis) is skipped
+        universal: null
+        pan-core: null
+
+    # paramters for the single steps of the pipeline
+    parameters:
+        bidirectional_blast:
+            # default should suffice except special cases
+            template_name: null
+            input_name: null
+            temp_header: null
+            in_header: null
+            # can be set by user if wanted, but not necessary
+            sensitivity: more-sensitive
+
+        generate_draft_model:
+            edit_names: no
+            pid: 80.0
+            medium: default
+
+        refinement_extension:
+            # default (usually) fine
+            id: locus_tag
+            # default fine
+            sensitivity: more-sensitive
+            # default alright but good to edit for trying different options
+            coverage: 95.0
+            pid: 90.0
+            # default almost needed, except for special cases
+            exclude_dna: True
+            exclude_rna: True
+
+        refinement_cleanup:
+            # default as standart
+            check_dupl_reac: True
+            check_dupl_meta: default
+            remove_unused_meta: False
+            remove_dupl_reac: True
+            remove_dupl_meta: True
+            # current default means no gapfilling
+            media_gap: null
+
+        refinement_annotation:
+            # for KEGG pathway annotation
+            viaEC: False
+            viaRC: False
+
+        refinement_smoothing:
+            # useful
+            mcc: skip
+            # ECG correction
+            egc: null
+            # depend on organism (current: Klebsiella )
+            dna_weight_frac: 0.023
+            ion_weight_frac: 0.05
+
+        # validation:
+            # default should suffice
+
+        analysis:
+            # default is currently only option
+            pc_based_on: id
+            # can be default but useful to edit
+            media_analysis: __USER__ # edit to fit a default media config file
+            test_aa_auxotrophies: True
+            # perform pathway analysis with KEGG
+            pathway: True
+
+    # options for performance
+    performance:
+        threads: 2
+        # for the gapfilling, if iterations and chunk_size are set (not null)
+        # use a heuristic for faster performance:
+        #     instead of using all reactions that can be added at once,
+        #     run x interations of gapfilling with n-size randomised chunks of reactions
+        gapfilling:
+            iterations: 3
+            chunk_size: 2000
diff --git a/docs/build/html/_sources/overview-pipes.rst.txt b/docs/build/html/_sources/overview-pipes.rst.txt
@@ -3,8 +3,8 @@ Overview of the Pipelines
 
 The following pipelines are currently available:
 
-- CarveMe + ModelPolisher based - CMPB
-- High-quality template based - HQTB
+- ``CMPB``: CarveMe + ModelPolisher based 
+- ``HQTB``: High-quality template based 
 
 More information about these differnt types of pipelines can be found below.
 
@@ -27,12 +27,13 @@ The HQTB pipeline curates a new model from an annotated genome based on a high-q
 
 This pipeline aims to profit from already performed (manual) curation of the already existing model, 
 to carry this knowledge into the new model. The closer the template is to the original, the more knowledge 
-can potential be carried over. Therefore, this pipeline is useful, if the user already has a model of
+can potential be carried over. Therefore, this pipeline is more useful, if the user already has a model of
 a similar organism to the one the new model should be curated for.
 
 .. toctree::
     :maxdepth: 2
     :caption: Further Information 
 
     About HQTB <hqtb/about-pipeline>
-    Run HQTB <hqtb/run-pipeline>
+    Run HQTB <hqtb/run-pipeline>
+    HQTB Configuration <hqtb/hqtb-config>
diff --git a/docs/build/html/hqtb/about-pipeline.html b/docs/build/html/hqtb/about-pipeline.html
@@ -62,6 +62,7 @@
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="run-pipeline.html">Run HQTB</a></li>
+<li class="toctree-l3"><a class="reference internal" href="hqtb-config.html">HQTB Configuration</a></li>
 </ul>
 </li>
 </ul>
-Original file line number
+Diff line change
@@ Expand Up / @@ -62,6 +62,7 @@ @@
     </ul>
     </li>
     <li class="toctree-l3"><a class="reference internal" href="run-pipeline.html">Run HQTB</a></li>
+    <li class="toctree-l3"><a class="reference internal" href="hqtb-config.html">HQTB Configuration</a></li>
     </ul>
     </li>
     </ul>
@@ Expand Down @@