Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for nested file paths in latch metadata #381

Merged
merged 24 commits into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6b1328b
SnakemakeFileParameter -> SnakemakeFileMetadata
rahuldesai1 Jan 20, 2024
daaa19a
add separate file_metadata field in generate-metadata command
rahuldesai1 Jan 24, 2024
cae1552
use path in config for default field
rahuldesai1 Jan 24, 2024
4036ad9
move reindent function to utils file
rahuldesai1 Jan 24, 2024
6c400e4
recursively upload files in input parameters
rahuldesai1 Jan 24, 2024
6132847
bug fixes
rahuldesai1 Jan 25, 2024
fadbf1a
remove None value in file_metadata
rahuldesai1 Jan 25, 2024
7406718
add guard around input file list size
rahuldesai1 Jan 25, 2024
020c5e3
add defaults back
rahuldesai1 Jan 25, 2024
85d1f5f
more bug fixes
rahuldesai1 Jan 25, 2024
81d9080
cleanup config updating logic
rahuldesai1 Jan 25, 2024
444bedc
cleanup
rahuldesai1 Jan 25, 2024
2736aa7
Merge remote-tracking branch 'origin/main' into rahuldesai1/snakemake…
rahuldesai1 Jan 25, 2024
0a0bf41
update environments doc to remove params
rahuldesai1 Jan 25, 2024
02f1070
udpate snakemake metadata docs
rahuldesai1 Jan 25, 2024
78d7a8a
Merge remote-tracking branch 'origin/main' into rahuldesai1/snakemake…
rahuldesai1 Jan 25, 2024
3563666
resolve PR comments
rahuldesai1 Jan 26, 2024
f0a2c53
add none default check to metadata post-init
rahuldesai1 Jan 26, 2024
ef952de
update tutorial to include new metadata generation code
rahuldesai1 Jan 26, 2024
271208b
disable support for lists containing LatchFile/LatchDir
rahuldesai1 Jan 26, 2024
374be13
disable support for lists containing LatchFile/LatchDir in generate-m…
rahuldesai1 Jan 26, 2024
c3ef8c0
resolve pr comments
rahuldesai1 Jan 26, 2024
3aafd6c
add type checking for Snakemake metadata defaults
rahuldesai1 Jan 27, 2024
230af84
cleanup
rahuldesai1 Jan 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions latch/types/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ class _IsDataclass(Protocol):
Type[Enum],
Type[_IsDataclass],
Type[List["ParameterType"]],
Type[LatchFile],
Type[LatchDir],
]


Expand All @@ -393,6 +395,7 @@ class SnakemakeParameter(LatchParameter):
default: Optional[Any] = None


# DEPRECATED: use `file_metadata` keyword in `SnakemakeMetadata` instead
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
@dataclass
class SnakemakeFileParameter(SnakemakeParameter):
type: Optional[
Expand All @@ -418,6 +421,22 @@ class SnakemakeFileParameter(SnakemakeParameter):
"""


@dataclass
class SnakemakeFileMetadata:
path: Optional[Path] = None
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
"""
The path where the file passed to this parameter will be copied.
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
"""
config: bool = False
"""
Whether or not the file path is exposed in the Snakemake config
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
"""
download: bool = False
"""
Whether or not the file is downloaded in the JIT step
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
"""


@dataclass
class LatchMetadata:
"""Class for organizing workflow metadata
Expand Down Expand Up @@ -549,6 +568,9 @@ class EnvironmentConfig:
"""


FileMetadata: TypeAlias = Dict[str, Union[SnakemakeFileMetadata, "FileMetadata"]]


@dataclass
class SnakemakeMetadata(LatchMetadata):
"""Class for organizing Snakemake workflow metadata"""
Expand All @@ -573,6 +595,10 @@ class SnakemakeMetadata(LatchMetadata):
"""
A dictionary mapping parameter names (strings) to `SnakemakeParameter` objects
"""
file_metadata: FileMetadata = field(default_factory=dict)
"""
A dictionary mapping parameter names to `SnakemakeFileMetadata` objects
"""

def __post_init__(self):
if self.name is None:
Expand Down
106 changes: 83 additions & 23 deletions latch_cli/snakemake/config/parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dataclasses import fields, is_dataclass
from pathlib import Path
from typing import Dict, List, Tuple, Type, TypeVar

Expand All @@ -6,11 +7,19 @@

from latch.types.directory import LatchDir
from latch.types.file import LatchFile
from latch_cli.snakemake.workflow import reindent
from latch_cli.snakemake.utils import reindent
from latch_cli.utils import identifier_from_str

from ..serialize_utils import best_effort_display_name
from .utils import JSONValue, get_preamble, parse_type, parse_value, type_repr
from .utils import (
JSONValue,
get_preamble,
is_list_type,
is_primitive_type,
parse_type,
parse_value,
type_repr,
)

T = TypeVar("T")

Expand Down Expand Up @@ -57,13 +66,60 @@ def parse_config(
parsed: Dict[str, Type] = {}
for k, v in res.items():
typ = parse_type(v, k, infer_files=infer_files)
val = parse_value(typ, v)
val, default = parse_value(typ, v)

parsed[k] = (typ, val)
parsed[k] = (typ, (val, default))

return parsed


def file_metadata_str(typ: Type, value: JSONValue, level: int = 0):
if is_primitive_type(typ):
return None
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved

if typ in {LatchFile, LatchDir}:
return reindent(
f"""\
SnakemakeFileMetadata(
path={repr(value)},
config=True,
),\n""",
level,
)

metadata: List[str] = []
if is_list_type(typ):
template = """
[
__metadata__],\n"""
for val in value:
metadata_str = file_metadata_str(typ.__args__[0], val, level + 1)
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
if metadata_str is None:
continue
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
metadata.append(metadata_str)
else:
template = """
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
{
__metadata__},\n"""
assert is_dataclass(typ)
for field in fields(typ):
metadata_str = file_metadata_str(
field.type, value.__getattribute__(field.name), level
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved
)
if metadata_str is None:
continue
metadata_str = f"{repr(identifier_from_str(field.name))}: {metadata_str}"
metadata.append(reindent(metadata_str, level + 1))

if len(metadata) == 0:
return None
rahuldesai1 marked this conversation as resolved.
Show resolved Hide resolved

return reindent(
template,
level,
).replace("__metadata__", "".join(metadata), level + 1)


# todo(ayush): print informative stuff here ala register
def generate_metadata(
config_path: Path,
Expand All @@ -76,37 +132,35 @@ def generate_metadata(

preambles: List[str] = []
params: List[str] = []
file_metadata: List[str] = []

for k, (typ, val) in parsed.items():
for k, (typ, (val, default)) in parsed.items():
preambles.append(get_preamble(typ))

is_file = typ in {LatchFile, LatchDir}
param_typ = "SnakemakeFileParameter" if is_file else "SnakemakeParameter"

param_str = reindent(
f"""\
{repr(identifier_from_str(k))}: {param_typ}(
{repr(identifier_from_str(k))}: SnakemakeParameter(
display_name={repr(best_effort_display_name(k))},
type={type_repr(typ)},
__config____default__),""",
__default__),""",
0,
)

config = ""
if is_file:
config = " config=True,\n"

param_str = param_str.replace("__config__", config)

default = ""
if generate_defaults and val is not None:
default = f" default={repr(val)},\n"
default_str = ""
if generate_defaults and default is not None:
default_str = f" default={repr(default)},\n"

param_str = param_str.replace("__default__", default)
param_str = param_str.replace("__default__", default_str)

param_str = reindent(param_str, 1)
params.append(param_str)

metadata_str = file_metadata_str(typ, val)
if metadata_str is None:
continue
metadata_str = f"{repr(identifier_from_str(k))}: {metadata_str}"
file_metadata.append(reindent(metadata_str, 1))

metadata_root = Path("latch_metadata")
if metadata_root.is_file():
if not click.confirm("A file exists at `latch_metadata`. Delete it?"):
Expand Down Expand Up @@ -146,7 +200,7 @@ def generate_metadata(
from latch.types.metadata import SnakemakeMetadata, LatchAuthor
from latch.types.directory import LatchDir

from .parameters import generated_parameters
from .parameters import generated_parameters, file_metadata

SnakemakeMetadata(
output_dir=LatchDir("latch:///your_output_directory"),
Expand All @@ -156,6 +210,7 @@ def generate_metadata(
),
# Add more parameters
parameters=generated_parameters,
file_metadata=file_metadata,
)
""",
0,
Expand All @@ -178,22 +233,27 @@ def generate_metadata(
from dataclasses import dataclass
import typing

from latch.types.metadata import SnakemakeParameter, SnakemakeFileParameter
from latch.types.metadata import SnakemakeParameter, SnakemakeFileParameter, SnakemakeFileMetadata
from latch.types.file import LatchFile
from latch.types.directory import LatchDir

__preambles__

# Import these into your `__init__.py` file:
#
# from .parameters import generated_parameters
# from .parameters import generated_parameters, file_metadata

generated_parameters = {
__params__
}

file_metadata = {
__file_metadata__}

""",
0,
)
.replace("__preambles__", "".join(preambles))
.replace("__params__", "\n".join(params))
.replace("__file_metadata__", "".join(file_metadata))
)
33 changes: 22 additions & 11 deletions latch_cli/snakemake/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@
"hdf5",
"he5",
"h5ad",
"yaml",
"yml",
}


Expand All @@ -108,11 +110,13 @@ def parse_type(
parse_type(
x,
name,
infer_files=False, # todo(ayush): enable recursive file inference
infer_files=infer_files,
)
for x in v
)
return List[Union[parsed_types]]
if len(set(parsed_types)) > 1:
raise ValueError("all types in List must be same")
return List[parsed_types[0]]

assert isinstance(v, dict)

Expand All @@ -124,44 +128,47 @@ def parse_type(
fields[identifier_from_str(k)] = parse_type(
x,
k,
infer_files=False, # todo(ayush): enable recursive file inference
infer_files=infer_files,
)

return make_dataclass(identifier_from_str(name), fields.items())


# returns raw value and generated default
def parse_value(t: Type, v: JSONValue):
if v is None:
return None
return None, None

if t in {LatchFile, LatchDir}:
# ayush: autogenerated defaults don't make sense for files/dirs since their
# value in the config is their local path
return None
return v, None

if is_primitive_value(v):
return v
return v, v

if isinstance(v, list):
assert get_origin(t) is list

sub_type = get_args(t)[0]

return [parse_value(sub_type, x) for x in v]
res = [parse_value(sub_type, x) for x in v]
return [x[0] for x in res], [x[1] for x in res]

assert isinstance(v, dict), v
assert is_dataclass(t), t

ret = {}
defaults = {}
fs = {identifier_from_str(f.name): f for f in fields(t)}

for k, x in v.items():
sanitized = identifier_from_str(k)
assert sanitized in fs, sanitized
val, default = parse_value(fs[sanitized].type, x)
ret[sanitized] = val
defaults[sanitized] = default

ret[sanitized] = parse_value(fs[sanitized].type, x)

return t(**ret)
return t(**ret), t(**defaults)


def is_primitive_type(
Expand All @@ -174,6 +181,10 @@ def is_primitive_value(val: object) -> TypeGuard[Union[None, str, bool, int, flo
return is_primitive_type(type(val))


def is_list_type(typ: Type) -> TypeGuard[Type[List]]:
return get_origin(typ) is list


def type_repr(t: Type, *, add_namespace: bool = False) -> str:
if is_primitive_type(t) or t is LatchFile or t is LatchDir:
return t.__name__
Expand Down
8 changes: 8 additions & 0 deletions latch_cli/snakemake/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import textwrap
from pathlib import Path
from typing import Optional

Expand All @@ -16,3 +17,10 @@ def load_snakemake_metadata(pkg_root: Path) -> Optional[Path]:
import_module_by_path(old_meta)

return old_meta


# todo(maximsmol): use a stateful writer that keeps track of indent level
def reindent(x: str, level: int) -> str:
if x[0] == "\n":
x = x[1:]
return textwrap.indent(textwrap.dedent(x), " " * level)
Loading
Loading