diff --git a/docs/source/index.md b/docs/source/index.md index 7ceb832a..994e0d67 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -157,13 +157,11 @@ cli/mv :hidden: :maxdepth: 2 :caption: Snakemake Integration +snakemake/overview.md snakemake/quickstart.md -snakemake/tutorial.md -snakemake/metadata.md -snakemake/environments.md -snakemake/lifecycle.md -snakemake/resources.md snakemake/cloud.md +snakemake/configuration.md +snakemake/tutorial.md snakemake/debugging.md snakemake/troubleshooting.md ``` diff --git a/docs/source/snakemake/configuration.md b/docs/source/snakemake/configuration.md new file mode 100644 index 00000000..895a0ad5 --- /dev/null +++ b/docs/source/snakemake/configuration.md @@ -0,0 +1,7 @@ +# Advanced Configuration + +```{toctree} +:maxdepth: 2 +configuration/resources.md +configuration/registries.md +``` diff --git a/docs/source/snakemake/configuration/registries.md b/docs/source/snakemake/configuration/registries.md new file mode 100644 index 00000000..e47a2f7b --- /dev/null +++ b/docs/source/snakemake/configuration/registries.md @@ -0,0 +1,32 @@ +# Private Container Registries + +When executing Snakemake workflows in containers, the container images may exist in a private registry that the Latch cloud cannot access. Downloading images from private registries at runtime requires two steps: + +1. Upload your private container registry's password/access token to the Latch platform. See [Storing and using Secrets](../basics/adding_secrets.md). +2. Add the `docker_metadata` field to your workflow's `SnakemakeMetadata` object so the workflow engine knows where to pull your credentials. For example: + +``` +# latch_metadata.py +from latch.types.metadata import SnakemakeMetadata, SnakemakeFileParameter, DockerMetadata +from latch.types.directory import LatchDir +from latch.types.metadata import LatchAuthor, LatchMetadata, LatchParameter +from pathlib import Path + +SnakemakeMetadata( + display_name="snakemake_tutorial_workflow", + author=LatchAuthor( + name="latchbio", + ), + env_config=EnvironmentConfig( + use_conda=False, + use_container=True, + ), + docker_metadata=DockerMetadata( + username="user0", + secret_name="LATCH_SECRET_NAME", + ), + ... +) +``` + +**Note**: the `secret_name` field specifies the name of the Latch Secret uploaded in step #1, NOT the actual registry password. diff --git a/docs/source/snakemake/resources.md b/docs/source/snakemake/configuration/resources.md similarity index 98% rename from docs/source/snakemake/resources.md rename to docs/source/snakemake/configuration/resources.md index 2fa9c9cb..479fae1d 100644 --- a/docs/source/snakemake/resources.md +++ b/docs/source/snakemake/configuration/resources.md @@ -28,7 +28,7 @@ SnakemakeMetadata( name="Your Name", ), parameters=generated_parameters, - cores=8, # added + cores=8, # updated ) ``` diff --git a/docs/source/snakemake/environments.md b/docs/source/snakemake/environments.md deleted file mode 100644 index 65ce2951..00000000 --- a/docs/source/snakemake/environments.md +++ /dev/null @@ -1,76 +0,0 @@ -# Environments - -When registering a Snakemake workflow on Latch, we need to build a single container image containing all your runtime dependencies and the Latch packages. By default, all tasks (including the JIT step) will run inside this container. - -To generate a Dockerfile with all the Latch-specific dependencies, run the following command from inside your workflow directory: - -```console -latch dockerfile . --snakemake -``` - -Inspect the resulting Dockerfile and add any runtime dependencies required for your workflow. - -## Configuring Task Environments - -Sometimes, it is preferable to use isolated environments for each Snakemake rule using the `container` and `conda` [Snakemake directives](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#running-jobs-in-containers) instead of building one large image. When using the `container` directive, the Latch workflow will: - -1. Launch the workflow container. -2. Execute Latch-specific commands to setup the Snakemake job. -3. Pull the user-specified image and execute the Snakemake job in a sub-container of the workflow container. - -To configure your environment on Latch, add the `env_config` field to your workflow's `SnakemakeMetadata` object (this field is similar to the `--use-conda` and `--use-singularity` flags in Snakemake). For example: - -``` -# latch_metadata.py -from latch.types.metadata import SnakemakeMetadata, SnakemakeFileParameter, EnvironmentConfig -from latch.types.directory import LatchDir -from latch.types.metadata import LatchAuthor, LatchMetadata, LatchParameter -from pathlib import Path - -SnakemakeMetadata( - display_name="snakemake_tutorial_workflow", - author=LatchAuthor( - name="latchbio", - ), - env_config=EnvironmentConfig( - use_conda=False, - use_container=True, - ), - ... -) -``` - -**Note**: If no `env_config` is defined, Snakemake tasks on Latch will NOT use containers or conda environments by default. - -## Using Private Container Registries - -When executing Snakemake workflows in containers, the container images may exist in a private registry that the Latch cloud cannot access. Downloading images from private registries at runtime requires two steps: - -1. Upload your private container registry's password/access token to the Latch platform. See [Storing and using Secrets](../basics/adding_secrets.md). -2. Add the `docker_metadata` field to your workflow's `SnakemakeMetadata` object so the workflow engine knows where to pull your credentials. For example: - -``` -# latch_metadata.py -from latch.types.metadata import SnakemakeMetadata, SnakemakeFileParameter, DockerMetadata -from latch.types.directory import LatchDir -from latch.types.metadata import LatchAuthor, LatchMetadata, LatchParameter -from pathlib import Path - -SnakemakeMetadata( - display_name="snakemake_tutorial_workflow", - author=LatchAuthor( - name="latchbio", - ), - env_config=EnvironmentConfig( - use_conda=False, - use_container=True, - ), - docker_metadata=DockerMetadata( - username="user0", - secret_name="LATCH_SECRET_NAME", - ), - ... -) -``` - -**Note**: the `secret_name` field specifies the name of the Latch Secret uploaded in step #1, NOT the actual registry password. diff --git a/docs/source/snakemake/metadata.md b/docs/source/snakemake/metadata.md deleted file mode 100644 index e6aa056c..00000000 --- a/docs/source/snakemake/metadata.md +++ /dev/null @@ -1,138 +0,0 @@ -# Metadata - -The Snakemake framework was designed to allow developers to both define and execute their workflows. This often means that the workflow parameters are sometimes ill-defined and scattered throughout the project as configuration values, static values in the `Snakefile`, or command line flags. - -To construct a graphical interface from a Snakemake workflow, the file parameters need to be explicitly identified and defined so that they can be presented to scientists through a web application. - -## Generating Latch Metadata - -The `latch_metadata` folder holds these parameter definitions. - -To generate Latch metadata from a config file, type: - -```console -latch generate-metadata -``` - -The command automatically parses the existing `config.yaml` file in the Snakemake repository to create a `SnakemakeMetadata` object. Below is an explanation of the most relevant fields: - -#### output_dir - -A `LatchDir` object that points to the location in Latch Data where the Snakemake outputs will be stored after the workflow has finished executing. - -#### parameters - -Input parameters to the workflow. The Latch Console will expose these parameters to scientists before they execute the workflow. - -#### file_metadata - -Every input parameter of type `LatchFile` or `LatchDir` must have a corresponding `SnakemakeFileMetadata` in the `file_metadata` field. The `SnakemakeFileMetadata` object provides important metadata about the file to the workflow, such as: - -1. `path`: The local path inside the container where the workflow engine will copy Latch Data files/directories before the job executes -2. `config`: If `True`, exposes the local file path in the Snakemake config -3. `download`: If `True`, downloads the file in the JIT step instead of creating an empty file. - **Note**: To limit network consumption, only files, such as configuration files, used by the Snakefile at compilation time should set this field to `True`. - -## Example - -Below is an example `config.yaml` file and corresponding latch metadata after running `latch generate-metadata` - -`config.yaml` - -```yaml -paths: - sample_dir: data/samples/ - reference_dir: reference/ - -manifest: manifest.tsv - -metadata: - threads: 32 - num_samples: 2 -``` - -The `latch_metadata/` folder generated from the `latch generate-metadata` command contains two files: - -``` -├── config.yaml -├── latch_metadata -│   └── __init__.py -│   └── parameters.py -``` - -```python -# latch_metadata/__init__.py -from latch.types.metadata import SnakemakeMetadata, LatchAuthor -from latch.types.directory import LatchDir - -from .parameters import generated_parameters, file_metadata - -SnakemakeMetadata( - output_dir=LatchDir("latch:///your_output_directory"), - display_name="Your Workflow Name", - author=LatchAuthor( - name="Your Name", - ), - parameters=generated_parameters, - file_metadata=file_metadata, -) -``` - -```python -# latch_metadata/parameters.py -from dataclasses import dataclass -import typing - -from latch.types.metadata import SnakemakeParameter, SnakemakeFileParameter, SnakemakeFileMetadata -from latch.types.file import LatchFile -from latch.types.directory import LatchDir - -@dataclass -class paths: - sample_dir: LatchDir - reference_dir: LatchDir - - -@dataclass -class metadata: - threads: int - num_samples: int - - -generated_parameters = { - 'paths': SnakemakeParameter( - display_name='Paths', - type=paths, - ), - 'manifest': SnakemakeParameter( - display_name='Manifest', - type=LatchFile, - ), - 'metadata': SnakemakeParameter( - display_name='Metadata', - type=metadata, - default=metadata(threads=32, num_samples=2), - ), -} - -file_metadata = { - 'paths': { - 'sample_dir': SnakemakeFileMetadata( - path='data/samples/', - config=True, - ), - 'reference_dir': SnakemakeFileMetadata( - path='reference/', - config=True, - ), - }, - 'manifest': SnakemakeFileMetadata( - path='manifest.tsv', - config=True, - ), -} -``` - -After registering the above workflow to Latch, you will see an interface like the one below: - -![Snakemake workflow GUI](../assets/snakemake/metadata.png) diff --git a/docs/source/snakemake/lifecycle.md b/docs/source/snakemake/overview.md similarity index 84% rename from docs/source/snakemake/lifecycle.md rename to docs/source/snakemake/overview.md index 03a5220e..f553b1de 100644 --- a/docs/source/snakemake/lifecycle.md +++ b/docs/source/snakemake/overview.md @@ -1,4 +1,10 @@ -# Snakemake Execution Lifecycle +# Overview + +Latch's Snakemake integration allows developers to build graphical interfaces to expose their Snakemake workflows to wet lab teams. It also provides managed cloud infrastructure for executing the workflow's jobs. + +A primary goal for the Snakemake integration is to allow developers to register existing Snakemake projects with minimal added boilerplate and modifications to code. + +## Snakemake Execution on Latch There are two stages to every Snakemake execution: @@ -12,15 +18,10 @@ The first ("JIT") workflow does the following: 1. Create empty input files; this enables the JIT task to mock the file structure at runtime without using unnecessary network bandwidth from downloading the entire file 2. Import the Snakefile, calculate the dependency graph, and determine which jobs need to be run 3. Generate a Latch SDK workflow Python script for the second ("runtime") workflow and register it -4. Run the runtime workflow using the same inputs +4. Run the runtime workflow ![JIT task execution](../assets/snakemake/jit-task-with-logs.jpg) -Debugging: - -- The generated runtime workflow entrypoint is uploaded to `latch:///.snakemake_latch/workflows//entrypoint.py` -- Internal workflow specifications are uploaded to `latch:///.snakemake_latch/workflows//spec` - ### Runtime Workflow The runtime workflow will spawn a task per each Snakemake job. This means there will be a separate task per each wildcard instantiation of each rule. This can lead to workflows with hundreds of tasks. Note that the execution graph can be filtered by task status. @@ -33,7 +34,7 @@ When a task executes, it will: ![Runtime execution](../assets/snakemake/snakemake-execution.jpg) -### Limitations +## Limitations 1. The workflow will execute the first rule defined in the Snakefile (matching standard Snakemake behavior). There is no way to change the default rule other than by moving the desired rule up in the file 1. Rules only download their inputs, which can be a subset of the input files. If the Snakefile tries to read input files outside of the ones explicitly defined in the rule, it will usually fail at runtime diff --git a/docs/source/snakemake/quickstart.md b/docs/source/snakemake/quickstart.md index ee921af3..8778b94c 100644 --- a/docs/source/snakemake/quickstart.md +++ b/docs/source/snakemake/quickstart.md @@ -1,16 +1,8 @@ -# Quickstart +# Uploading an Existing Snakemake Workflow -## Motivation +The following guide will walk you through how to upload an existing Snakemake workflow to the Latch console in four simple steps. If you do not already have a Snakemake workflow, see the [Tutorial](./tutorial.md) to get started. -Latch's Snakemake integration allows developers to build graphical interfaces to expose their Snakemake workflows to wet lab teams. It also provides managed cloud infrastructure for executing the workflow's jobs. - -A primary design goal for the Snakemake integration is to allow developers to register existing projects with minimal added boilerplate and modifications to code. - -## Uploading an Existing Workflow to Latch - -The following guide will outline how to upload an existing Snakemake workflow to the Latch console with three simple commands. If you do not already have a Snakemake workflow that you would like to register, see the [Tutorial](./tutorial.md) to get started. - -### Prerequisites +## Prerequisites - Register for an account and log into the [Latch Console](https://console.latch.bio) - Install a compatible version of Python. The Latch SDK is currently only supported for Python >=3.8 and <=3.11 @@ -29,42 +21,114 @@ $ latch --version latch, version 2.38.8 ``` -### Step 1: Generate Metadata +## Step 1: Ensure your Snakefile is Cloud Compatible -Every Latch workflow requires the developer to define a workflow metadata object. The Latch Console uses this metadata to expose workflow input parameters to scientists in the UI. The Latch SDK provides a command to automatically generate this metadata file from a `config.yaml`. +Update your workflow's `Snakefile` to ensure compatibility with [cloud execution](./cloud.md) on Latch. + +## Step 2: Define Metadata and Input Parameters + +To construct a graphical interface from a Snakemake workflow, the input parameters need to be explicitly identified and defined so that they can be presented to scientists through a web application. + +The Latch SDK expects these parameters to be defined as a Python `SnakemakeMetadata` object. Fortunately, the Latch SDK provides a command to automatically generate this object from an existing config file. Run the following command in the root directory of your project: ```console -latch generate-metadata +latch generate-metadata +``` + +This command will generate two files: + ``` +latch_metadata/__init__.py +latch_metadata/parameters.py +``` + +The first file holds the `SnakemakeMetadata` object, and the second file contains the input parameter definitions. **Be sure to inspect the resulting files to verify that the input parameters are as expected.** + +Below is an explanation of the most relevant fields of the `SnakemakeMetadata` object: + +#### display_name + +Display name of the workflow, as it will appear on the Latch UI. + +#### output_dir -Be sure to inspect the generated files in the `latch_metadata` folder to verify that the types of the input parameters are as expected. Note that all input files hosted on Latch Data must be either a `LatchFile` or `LatchDir` type. +Points to the folder in Latch Data where Snakemake outputs are stored after the workflow finishes executing. -To learn more about Latch metadata for Snakemake, click [here](./metadata.md). +#### parameters -### Step 2: Define Container Environment +Input parameters to the workflow, defined as `SnakemakeParameter` objects. The Latch Console will expose these parameters to scientists before they execute the workflow. The `type` and `default` fields for each parameter are inferred from the config file when the `generate-metadata` command is run. -All Snakemake workflows run in a Docker container, which includes the Latch-specific dependencies required to run workflows on the Latch platform. To generate this Dockerfile, run the following command in your root directory: +#### file_metadata + +Every input parameter of type LatchFile or LatchDir must have a corresponding `SnakemakeFileMetadata` in the `file_metadata` field. LatchFile and LatchDir are pointers to remote files that are hosted on the Latch platform. The `SnakemakeFileMetadata` object provides required metadata about each remote file, such as: + +1. `path`: The local path inside the container where the workflow engine will copy Latch Data files/directories before the job executes +2. `config`: If `True`, exposes the local file path in the Snakemake config +3. `download`: If `True`, downloads the file in the JIT step instead of creating an empty file. + **Note**: To limit network consumption, the `download` field should only be `True` for files that the Snakefile reads at compilation time (such as configuration files). + +## Step 3: Define Container Environment + +When registering a Snakemake workflow on Latch, we need to build a single container image containing all your runtime dependencies and the Latch packages. By default, all tasks (including the JIT step) will run inside this container. + +The Latch SDK provides a command to generate a Dockerfile with all the Latch-specific dependencies required to run Snakemake workflows in the cloud. This command also attempts to infer and install the workflow's runtime dependencies based on the configuration files in your root directory. Read [here](../basics/defining_environment.md) for more details. + +Run the following command in the root directory of your project: ```console latch dockerfile . --snakemake ``` -By default, each Snakemake job will execute in this Docker container; therefore, this Dockerfile should specify runtime dependencies for your workflow. If your workflow has an `environment.yaml` in the root directory, the generated Dockerfile will use conda to install the packages in your environment file. Otherwise, you will need to install dependencies manually. +Inspect the resulting `Dockerfile` and **verify that it installs all required runtime dependencies for your workflow**. + +#### Per-Task Environments + +Sometimes, it is preferable to use isolated environments for each Snakemake rule using the `container` and `conda` [Snakemake directives](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#running-jobs-in-containers) instead of building one large image. -For more advanced environment setups (such as the use of the `container` and `conda` Snakemake directives), click [here](./environments.md). +When using the `container` directive, the Latch workflow will: -### Step 3: Register the Workflow +1. Launch the workflow container. +2. Execute Latch-specific commands to setup the Snakemake job. +3. Pull the user-specified image and execute the Snakemake job in a sub-container of the workflow container. + +To configure your environment on Latch, add the `env_config` field to your workflow's `SnakemakeMetadata` object in `latch_metadata/__init__.py` (this field is similar to the `--use-conda` and `--use-singularity` flags in Snakemake). For example: + +``` +# latch_metadata.py +from latch.types.metadata import SnakemakeMetadata, SnakemakeFileParameter, EnvironmentConfig +from latch.types.directory import LatchDir +from latch.types.metadata import LatchAuthor, LatchMetadata, LatchParameter +from pathlib import Path + +SnakemakeMetadata( + display_name="snakemake_tutorial_workflow", + author=LatchAuthor( + name="latchbio", + ), + env_config=EnvironmentConfig( + use_conda=False, + use_container=True, + ), + ... +) +``` + +## Step 4: Register the Workflow To register a Snakemake workflow to Latch, type: ```console +latch login latch register . --snakefile Snakefile ``` -We highly recommend reading about the [Snakemake Execution Lifecycle](./lifecycle.md) on Latch to understand what happens after registering and executing your workflow. +After registering your workflow to Latch, click on the link provided in the output of the `latch register` command. This will take you to an interface like the one below: + +![Snakemake workflow GUI](../assets/snakemake/metadata.png) + +To execute the workflow, provide appropriate input parameters and click `Launch Workflow` in the bottom right. -### Next Steps +## Next Steps -- Ensure your `Snakefile` is compatible with [cloud execution](./cloud.md) on Latch. -- See the [Resources](./resources.md) guide to configure resource requirements for your workflow. +- See the [Advanced Configuration](./configuration.md) section to learn about more advanced workflow settings. - See the [Troubleshooting](./troubleshooting.md) guide for debugging common workflow issues. diff --git a/docs/source/snakemake/tutorial.md b/docs/source/snakemake/tutorial.md index 64fa03dd..9c92be68 100644 --- a/docs/source/snakemake/tutorial.md +++ b/docs/source/snakemake/tutorial.md @@ -39,8 +39,6 @@ You can automatically generate the required metadata files from an existing `con latch generate-metadata config.yaml ``` -To learn more about the `generate-metadata` command, see [Metadata](./metadata.md) - This command will create a `latch_metadata` folder in your workflow directory: ``` @@ -121,7 +119,7 @@ How does the orchestrator know which local path to download the remote files? Fo ## Step 3: Define Workflow Environment -To execute Snakemake workflows in a cloud environment, we must define a single Docker container to run each task in. This container must contain both the runtime dependencies for the Snakemake tasks and Latch-specific dependencies (such as the Latch SDK). To learn more about managing task dependencies, read about [Environments](./environments.md). +To execute Snakemake workflows in a cloud environment, we must define a single Docker container to run each task in. This container must contain both the runtime dependencies for the Snakemake tasks and Latch-specific dependencies (such as the Latch SDK). Fortunately, the Latch SDK provides a convenient command to generate a Dockerfile with the required Latch dependencies. Run the following in your workflow directory: @@ -199,13 +197,12 @@ Once you have uploaded the data and selected the appropriate input parameters, c ![JIT task execution](../assets/snakemake/jit-task.png) -Snakemake support currently uses JIT (Just-In-Time) registration. This means that once the single-task workflow above is complete, it will produce a second workflow, which runs the actual Snakemake jobs. To learn more about the lifecycle of a Snakemake workflow on Latch, click [here](./lifecycle.md). +Snakemake support currently uses JIT (Just-In-Time) registration. This means that once the single-task workflow above is complete, it will produce a second workflow, which runs the actual Snakemake jobs. To learn more about the lifecycle of a Snakemake workflow on Latch, click [here](./overview.md). Once the workflow finishes running, results will be deposited under the `output_dir` folder, as defined in your Latch Metadata. ## Next Steps -- Learn more about the lifecycle of a Snakemake workflow on Latch by reading our [manual](../snakemake/lifecycle.md). - Learn about how to modify Snakemake workflows to be cloud-compatible [here](../snakemake/cloud.md). - Visit [troubleshooting](../snakemake/troubleshooting.md) to diagnose and find solutions to common issues. - Visit the repository of [public examples](https://github.com/latchbio/latch-snakemake-examples) of Snakemake workflows on Latch. diff --git a/latch_cli/services/register/utils.py b/latch_cli/services/register/utils.py index 148f8ea7..d492c5fe 100644 --- a/latch_cli/services/register/utils.py +++ b/latch_cli/services/register/utils.py @@ -176,7 +176,7 @@ def register_serialized_pkg( headers=headers, files=serialize_files, ) - + response.raise_for_status() return response.json() diff --git a/latch_cli/snakemake/config/parser.py b/latch_cli/snakemake/config/parser.py index dbd8c677..3b194440 100644 --- a/latch_cli/snakemake/config/parser.py +++ b/latch_cli/snakemake/config/parser.py @@ -209,7 +209,7 @@ def generate_metadata( metadata_path.write_text( reindent( r""" - from latch.types.metadata import SnakemakeMetadata, LatchAuthor + from latch.types.metadata import SnakemakeMetadata, LatchAuthor, EnvironmentConfig from latch.types.directory import LatchDir from .parameters import generated_parameters, file_metadata @@ -220,14 +220,21 @@ def generate_metadata( author=LatchAuthor( name="Your Name", ), + env_config=EnvironmentConfig( + use_conda=False, + use_container=False, + ), + cores=4, # Add more parameters parameters=generated_parameters, file_metadata=file_metadata, + ) """, 0, ) ) + click.secho("Generated `latch_metadata/__init__.py`.", fg="green") params_path = metadata_root / Path("parameters.py") if ( @@ -272,3 +279,4 @@ def generate_metadata( .replace("__params__", "\n".join(params)) .replace("__file_metadata__", "".join(file_metadata)) ) + click.secho("Generated `latch_metadata/parameters.py`.", fg="green")