Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hotfix/documentation #181

Merged
merged 3 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions big_scape/cli/cli_common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ def common_all(fn):
"--verbose",
is_flag=True,
help=(
"output all kinds of logs, "
"including debugging log info, and write to logfile."
"Prints more detailed information of each step in the analysis,"
"output all kinds of logs, including debugging log info, and writes to logfile."
"Toggle to activate."
CatarinaCarolina marked this conversation as resolved.
Show resolved Hide resolved
),
),
click.option(
Expand All @@ -64,7 +65,8 @@ def common_all(fn):
"--label",
default=None,
type=str,
help="A run label to be added to the output results folder name. "
help="A run label to be added to the output results folder name, as well as "
"dropdown menu in the visualization page. "
"By default, BiG-SCAPE runs will have a name such as YYYY-MM-DD_HH-MM-SS_[label]",
),
click.option(
Expand Down Expand Up @@ -98,15 +100,15 @@ def common_all(fn):
is_flag=True,
default=False,
help="Do not dump the sqlite database to disk. This will speed up your run,"
" but in case of a crashed run no info will be stored and youll have to"
" but in case of a crashed run no info will be stored and you'll have to"
" re-start the run from scratch",
),
click.option(
"--no-interactive",
type=bool,
is_flag=True,
default=False,
help="Do not generate an interactive visualization. This greatly speeds up runs, and "
help="Do not generate an interactive visualization. This speeds up runs, and "
"for runs with a large amount of BGCs, the interactive visualization can fail to load.",
),
click.option(
Expand Down Expand Up @@ -171,7 +173,7 @@ def common_cluster_query(fn):
"--mibig_version",
type=str,
required=False,
help="MIBiG release number (e.g. 3.1). If not provided, MIBiG will not be "
help="MIBiG release number (from 3.1 onwards). If not provided, MIBiG will not be "
"included in the analysis. If required, BiG-SCAPE will download the "
"MIBiG database to ./big_scape/MIBiG/mibig_antismash_<version>_gbk. "
"(Advanced) Any custom MIBiG collection can be used as long as the expected "
Expand All @@ -192,8 +194,8 @@ def common_cluster_query(fn):
default="cluster,region",
callback=validate_filter_gbk,
help=(
"A comma separated list of strings. "
"Only gbk files with this string(s) will be used for the analysis "
"A comma separated list of strings. Only gbk files that have "
"the string(s) in their filename will be used for the analysis "
"(default: 'cluster,region'). Use an asterisk to accept every "
"file (overrides '--exclude_gbk_str')."
),
Expand Down Expand Up @@ -278,7 +280,7 @@ def common_cluster_query(fn):
"Does not use antiSMASH BGC classes to run analyses on "
"class-based bins, instead it uses BiG-SCAPE v1 predefined groups: "
"PKS1, PKSOther, NRPS, NRPS-PKS-hybrid, RiPP, Saccharide, Terpene, Others. "
"Will also use BiG-SCAPEv1 legacy_weights for distance calculations. "
"Will also use BiG-SCAPE v1 legacy_weights for distance calculations. "
"This feature is available for backwards compatibility with "
"antiSMASH versions up to v7. For higher antiSMASH versions, use "
"at your own risk, as BGC classes may have changed. All antiSMASH "
Expand All @@ -301,7 +303,8 @@ def common_cluster_query(fn):
"of the shortest upstream/downstream arms in a compared pair. "
"'auto': use glocal when at least one of the BGCs in each pair "
"has the 'contig_edge' annotation from antiSMASH v4+, otherwise "
"use global mode on that pair (default: glocal)."
"use global mode on that pair. For an in depth description, see the wiki."
" (default: glocal)."
),
),
click.option(
Expand All @@ -310,7 +313,8 @@ def common_cluster_query(fn):
default="legacy",
callback=validate_extend_strategy,
help="Strategy to extend BGCs. 'legacy' will use the original BiG-SCAPE extension strategy, "
"while 'greedy' will use a new greedy extension strategy. (default: legacy).",
"while 'greedy' will use a new greedy extension strategy. For an in depth description,"
" see the wiki. (default: legacy).",
),
# networking parameters
click.option(
Expand Down
2 changes: 1 addition & 1 deletion big_scape/cli/cluster_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
@click.option(
"--include_singletons",
is_flag=True,
help=("Include singletons in the network. (default: False)"),
help=("Include singletons in the network."),
)
@click.pass_context
def cluster(ctx, *args, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions big_scape/cli/query_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
required=False,
help=(
"Query BGC record number. Used to select the specific record "
"from the query BGC gbk. Warning: interleaved or chemical hybrid proto "
"cluster/cores are merged, and the relevant number is that of the "
"from the query BGC gbk. Warning: if interleaved or chemical hybrid proto "
"cluster/cores are merged (see config), the relevant number is that of the "
"first record of the merged cluster (the one with the lowest number). "
"e.g. if records 1 and 2 get merged, the relevant number is 1. "
),
Expand Down
38 changes: 20 additions & 18 deletions config.yml
Original file line number Diff line number Diff line change
@@ -1,50 +1,52 @@
# PROFILER
# Update interval in seconds when profiler functionality is active
# Update interval in seconds when profiler functionality is active.
PROFILER_UPDATE_INTERVAL: 0.5

# INPUT
# list of cand_cluster types where subrecords will be merged
# list of cand_cluster types where subrecords will be merged.
MERGED_CAND_CLUSTER_TYPE:
- chemical_hybrid
- interleaved
# Minimum and maximum bgc lengths to be included in the analysis in number of base pairs
# Minimum and maximum bgc lengths to be included in the analysis in number of base pairs.
MIN_BGC_LENGTH: 0
MAX_BGC_LENGTH: 500000

# CDS and DOMAIN
# Specify at which overlap percentage (as a decimal) two CDS in a gbk
# are considered to overlap. This preserves longest overlapping CDS
# are considered to overlap. This preserves longest overlapping CDS.
CDS_OVERLAP_CUTOFF: 0.1
# Specify at which overlap percentage (as a decimal) two domains
# in a CDS are considered to overlap. Domain with the best score is kept
# in a CDS are considered to overlap. Domain with the best score is kept.
DOMAIN_OVERLAP_CUTOFF: 0.1

# LCS
# Minimum length criteria for accepting a Longest Common Subcluster (LCS), measured in
# fraction of included domains in one of the compared records, when comparing:
# Regions or Cand_clusters
# Minimum length percentage for accepting a Longest Common Subcluster (LCS),
# which must be satisfied in at least one of the compared records, measured in fraction
# of included domains, when comparing:
# - Regions or Cand_clusters
REGION_MIN_LCS_LEN: 0.1
#Protoclusters or Proto_cores
# - Protoclusters or Proto_cores
PROTO_MIN_LCS_LEN: 0

# EXPAND
# Minimum length citeria for accepting an extended LCS slice, measured in fraction of
# included domains in one of the compared records, when comparing:
# Regions or Cand_clusters with no biosynthetic domains in the extended slice
# Minimum length percentage for accepting an extended LCS slice, which must be
# satisfied in at least one of the compared records, measured in fraction of
# included domains, when comparing:
# - Regions or Cand_clusters with no biosynthetic domains in the extended slice
REGION_MIN_EXPAND_LEN: 0.3
# Regions or Cand_clusters with at least one biosynthetic domain in the extended slice
# - Regions or Cand_clusters with at least one biosynthetic domain in the extended slice
REGION_MIN_EXPAND_LEN_BIO: 0.2
# Protoclusters or Proto_cores with at least one biosynthetic domain in the extended slice
# - Protoclusters or Proto_cores with at least one biosynthetic domain in the extended slice
PROTO_MIN_EXPAND_LEN: 0.2
# list of product classes that do not require a minimum length
# List of product classes that do not require a minimum length.
NO_MIN_CLASSES:
- terpene
# Integer scoring metrics used in the LCS extension algorithm for match, mismatch and gap
# Integer scoring metrics used in the LCS extension algorithm for match, mismatch and gap.
EXPAND_MATCH_SCORE: 5
EXPAND_MISMATCH_SCORE: -3
EXPAND_GAP_SCORE: -2
# Maximum distance to accept a matching domain as an actual match during LCS extension
# as a percentage of total domains present in the compared record
# as a percentage of total domains present in the compared record.
EXPAND_MAX_MATCH_PERC: 0.1

# CLUSTER
Expand All @@ -53,7 +55,7 @@ EXPAND_MAX_MATCH_PERC: 0.1
PREFERENCE: 0.0

# TREE
# The number of common domains used to generate GCF trees in top frequencies of occurrence
# The number of common domains used to generate GCF trees in top frequencies of occurrence.
TOP_FREQS: 3

# ANCHOR DOMAINS
Expand Down
Loading