Skip to content

Commit

Permalink
Merge pull request #191 from medema-group/hotfix/disk-only-db
Browse files Browse the repository at this point in the history
re-implement (?) disk-only database
  • Loading branch information
adraismawur authored Oct 14, 2024
2 parents 205b485 + d434016 commit 0f35ec0
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 2 deletions.
7 changes: 6 additions & 1 deletion big_scape/cli/benchmark_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@

# from this module
from .cli_common_options import common_all
from .cli_validations import set_start, validate_output_paths
from .cli_validations import (
set_start,
validate_output_paths,
validate_disk_only,
)


# BiG-SCAPE benchmark mode
Expand Down Expand Up @@ -49,6 +53,7 @@ def benchmark(ctx, *args, **kwargs):

# workflow validations
validate_output_paths(ctx)
validate_disk_only(ctx)

# set start time and label
set_start(ctx.obj)
Expand Down
10 changes: 10 additions & 0 deletions big_scape/cli/cli_common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ def common_all(fn):
"but in case of a crashed run no info will be stored and you'll have to "
"re-start the run from scratch",
),
click.option(
"--disk-only",
type=bool,
is_flag=True,
default=False,
help="Do not store any results in memory, only on disk. This is almost certainly "
"slower than the default behaviour, but can be useful for very large runs or "
"runs with limited memory.",
),
click.option(
"--no-interactive",
type=bool,
Expand Down Expand Up @@ -336,6 +345,7 @@ def common_cluster_query(fn):
help="Use a specific type of antiSMASH record for comparison. (default: region).",
),
]

for opt in options[::-1]:
fn = opt(fn)
return fn
19 changes: 19 additions & 0 deletions big_scape/cli/cli_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,25 @@ def validate_output_paths(ctx) -> None:
return None


# db modes validations
def validate_disk_only(ctx) -> None:
"""Checks if the database storage/dumping modes that were set are compatible"""

if not ("no_db_dump" in ctx.obj and "disk_only" in ctx.obj):
raise RuntimeError(
"Something went wrong with the database storage/dumping mode parameters. "
"Please contact the developers."
)

if ctx.obj["no_db_dump"] and ctx.obj["disk_only"]:
logging.error(
"You have selected both --no-db-dump and --disk-only. Please select only one"
)
raise click.UsageError(
"You have selected both --no-db-dump and --disk-only. Please select only one"
)


# comparison validations


Expand Down
2 changes: 2 additions & 0 deletions big_scape/cli/cluster_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .cli_common_options import common_all, common_cluster_query
from .cli_validations import (
validate_output_paths,
validate_disk_only,
validate_binning_cluster_workflow,
validate_pfam_path,
validate_domain_include_list,
Expand Down Expand Up @@ -77,6 +78,7 @@ def cluster(ctx, *args, **kwargs):
validate_pfam_path(ctx)
validate_domain_include_list(ctx)
validate_output_paths(ctx)
validate_disk_only(ctx)

# set start time and run label
set_start(ctx.obj)
Expand Down
2 changes: 2 additions & 0 deletions big_scape/cli/query_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .cli_common_options import common_all, common_cluster_query
from .cli_validations import (
validate_output_paths,
validate_disk_only,
validate_query_bgc,
validate_pfam_path,
set_start,
Expand Down Expand Up @@ -81,6 +82,7 @@ def query(ctx, *args, **kwarg):
validate_output_paths(ctx)
validate_binning_query_workflow(ctx)
validate_query_record(ctx)
validate_disk_only(ctx)

# set start time and label
set_start(ctx.obj)
Expand Down
34 changes: 34 additions & 0 deletions big_scape/data/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,26 @@ def open_memory_connection() -> None:
)
DB.connection = DB.engine.connect()

def open_disk_connection(db_path: Path) -> None:
if DB.opened():
raise DBAlreadyOpenError()

DB.engine = create_engine(
"sqlite:///" + str(db_path),
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
DB.connection = DB.engine.connect()

@staticmethod
def create_on_disk(db_path: Path) -> None:
"""Open a connection to a database file"""
DB.open_disk_connection(db_path)

DB.create_tables()

DB.reflect()

@staticmethod
def create_in_mem() -> None:
"""Create a new database in-memory"""
Expand Down Expand Up @@ -124,6 +144,14 @@ def save_to_disk(db_path: Path, force=False) -> None:
if not force:
return

# skip this if we are using disk-only mode
if click_context and click_context.obj["disk_only"]:
return

# skip this if we are using disk-only mode
if click_context and click_context.obj["disk_only"]:
return

if not DB.opened():
raise DBClosedError()

Expand Down Expand Up @@ -181,6 +209,12 @@ def load_from_disk(db_path: Path) -> None:
if not db_path.exists():
raise FileNotFoundError()

# disk only means we don't have to dump to memory
click_context = click.get_current_context(silent=True)
if click_context and click_context.obj["disk_only"]:
DB.create_on_disk(db_path)
return

file_engine = create_engine("sqlite:///" + str(db_path))
file_engine.connect()

Expand Down
8 changes: 7 additions & 1 deletion big_scape/run_bigscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,14 @@ def signal_handler(sig, frame):

# INPUT - create an in memory bs_data.DB or load from disk
if not run["db_path"].exists():
bs_data.DB.create_in_mem()
if run["disk_only"]:
logging.info("Creating on disk database")
bs_data.DB.create_on_disk(run["db_path"])
else:
logging.info("Creating in memory database")
bs_data.DB.create_in_mem()
else:
logging.info("Loading database from disk")
bs_data.DB.load_from_disk(run["db_path"])
bs_data.DB.check_config_hash()

Expand Down

0 comments on commit 0f35ec0

Please sign in to comment.