Skip to content

Commit

Permalink
feat: Validate dependencies.yaml using jsonschema (#29)
Browse files Browse the repository at this point in the history
This PR enables validating the contents of a dependencies.yaml file
directly without doing any processing. The schema is encoded using [JSON
Schema](https://json-schema.org/) and validated using [the Python
implementation](https://python-jsonschema.readthedocs.io/). The new
Python code is fairly minimal, and it would be even shorter except that
I leveraged the object-oriented API to show all errors in a file instead
of simply showing the first error using `jsonschema.validate`. The
majority of the new lines are from the schema definition. The validation
is injected into the normal CLI usage so that schemas are always
validated before dependency files are generated, ensuring that
developers see useful errors about why their dependencies.yaml file is
invalid rather than opaque runtime errors when dfg fails to use the
file.

---------

Co-authored-by: Simon Adorf <[email protected]>
  • Loading branch information
vyasr and csadorf authored Feb 8, 2023
1 parent e710ac0 commit f7e8234
Show file tree
Hide file tree
Showing 10 changed files with 310 additions and 17 deletions.
11 changes: 10 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ repos:
args:
- --py38-plus
- repo: https://github.com/PyCQA/isort
rev: '5.10.1'
rev: '5.12.0'
hooks:
- id: isort
- repo: https://github.com/psf/black
Expand All @@ -30,3 +30,12 @@ repos:
- id: flake8
args:
- --show-source
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.21.0
hooks:
- id: check-metaschema
files: ^src/rapids_dependency_file_generator/schema.json$
- id: check-jsonschema
files: ^tests/examples/([^/]*)/dependencies.yaml$
args: ["--schemafile", "src/rapids_dependency_file_generator/schema.json"]
- id: check-github-workflows
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include src/rapids_dependency_file_generator/schema.json
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ classifiers = [
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
]
requires-python = ">=3.8"
dependencies = [
"PyYAML",
"jsonschema",
]

[project.scripts]
Expand Down
3 changes: 3 additions & 0 deletions src/rapids_dependency_file_generator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ._version import __version__ as version
from .constants import OutputTypes, default_dependency_file_path
from .rapids_dependency_file_generator import make_dependency_files
from .rapids_dependency_file_validator import validate_dependencies


def validate_args(argv):
Expand Down Expand Up @@ -63,6 +64,8 @@ def main(argv=None):
with open(args.config) as f:
parsed_config = yaml.load(f, Loader=yaml.FullLoader)

validate_dependencies(parsed_config)

matrix = generate_matrix(args.matrix)
to_stdout = all([args.file_key, args.output, args.matrix is not None])

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Logic for validating dependency files."""

import json
import textwrap

import jsonschema
import pkg_resources
from jsonschema.exceptions import best_match

SCHEMA = json.loads(pkg_resources.resource_string(__name__, "schema.json"))


def validate_dependencies(dependencies):
"""Valid a dictionary against the dependencies.yaml spec.
Parameters
----------
dependencies : dict
The parsed dependencies.yaml file.
Raises
------
jsonschema.exceptions.ValidationError
If the dependencies do not conform to the schema
"""
validator = jsonschema.Draft7Validator(SCHEMA)
errors = list(validator.iter_errors(dependencies))
if len(errors) > 0:
print("The provided dependency file contains schema errors.")
best_matching_error = best_match(errors)
print("\n", textwrap.indent(str(best_matching_error), "\t"), "\n")
raise RuntimeError("The provided dependencies data is invalid.")
161 changes: 161 additions & 0 deletions src/rapids_dependency_file_generator/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://raw.githubusercontent.com/rapidsai/dependency-file-generator/v1.0.0/src/rapids_dependency_file_generator/schema.json",
"type": "object",
"title": "RAPIDS Package Dependency Specification Format",
"description": "Consolidated specification of RAPIDS project dependencies",
"properties": {
"files": {
"type": "object",
"patternProperties": {
".*": {
"type": "object",
"properties": {
"output": {"$ref": "#/$defs/outputs"},
"includes": {"type": "array", "items": {"type": "string"}},
"matrix": {"$ref": "#/$defs/matrix"},
"requirements_dir": {"type": "string"},
"conda_dir": {"type": "string"}
},
"additionalProperties": false,
"required": ["output", "includes"]
}
},
"minProperties": 1
},
"dependencies": {
"type": "object",
"patternProperties": {
".*": {
"type": "object",
"properties": {
"common": {
"type": "array",
"items": {
"type": "object",
"properties": {
"output_types": {"$ref": "#/$defs/outputs"},
"packages": {"$ref": "#/$defs/packages"}
},
"required": ["output_types", "packages"],
"additionalProperties": false
}
},
"specific": {
"type": "array",
"items": {
"type": "object",
"properties": {
"output_types": {"$ref": "#/$defs/outputs"},
"matrices": {"$ref": "#/$defs/matrices"}
},
"required": ["output_types", "matrices"],
"additionalProperties": false
}
}
},
"minProperties": 1,
"additionalProperties": false
}
}
},
"channels": {"$ref": "#/$defs/channels"}
},
"required": ["files", "dependencies"],
"additionalProperties": false,
"$defs": {
"channel": {
"type": "string",
"format": "iri-reference"
},
"channel-list": {
"type": "array",
"items": {
"$ref": "#/$defs/channel"
}
},
"channels": {
"$oneOf": [
{"$ref": "#/$defs/channel"},
{"$ref": "#/$defs/channel-list"}
]
},
"matrix": {
"type": "object",
"patternProperties": {
".*": {
"type": "array",
"items": {"type": "string"}
}
}
},
"matrix-matcher": {
"type": "object",
"properties": {
"matrix": {
"oneOf": [
{
"type": "object",
"patternProperties": {
".*": {"type": "string"}
}
},
{"type": "null"}
]
},
"packages": {"oneOf": [
{"$ref": "#/$defs/requirements"},
{"type": "null"}
]}
},
"requiredProperties": ["matrix", "packages"],
"additionalProperties": false
},
"matrices": {
"type": "array",
"items": {"$ref": "#/$defs/matrix-matcher"}
},
"output-types": {
"enum": ["conda", "requirements"]
},
"output-types-array": {
"type": "array",
"item": {"$ref": "#/$defs/output-types"}
},
"outputs": {
"oneOf": [
{"$ref": "#/$defs/output-types"},
{"$ref": "#/$defs/output-types-array"},
{"const": "none"}
]
},
"packages": {
"type": "array",
"items": {
"oneOf": [
{"$ref": "#/$defs/requirement"},
{"$ref": "#/$defs/pip-requirements"}
]
}
},
"requirement": {
"type": "string"
},
"requirements": {
"type": "array",
"items": {
"$ref": "#/$defs/requirement"
},
"minItems": 1

},
"pip-requirements": {
"type": "object",
"properties": {
"pip": {"$ref": "#/$defs/requirements"}
},
"additionalProperties": false,
"required": ["pip"]
}
}
}
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pytest

from rapids_dependency_file_generator.rapids_dependency_file_validator import SCHEMA


@pytest.fixture(scope="session")
def schema():
return SCHEMA
46 changes: 46 additions & 0 deletions tests/examples/invalid/invalid-requirement/dependencies.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
files:
build:
output: conda
conda_dir: output/actual
matrix:
cuda: ["11.5", "11.6"]
arch: [x86_64]
includes:
- build
channels:
- rapidsai
- conda-forge
dependencies:
build:
common:
- output_types: [conda, requirements]
packages:
- clang=11.1.0
- spdlog>=1.8.5,<1.9
- output_types: conda
packages:
- pip
- pip:
- git+https://github.com/python-streamz/streamz.git@master
specific:
- output_types: [conda, requirements]
matrices:
- matrix:
cuda: "11.5"
packages:
- 1234
- cuda-python>=11.5,<11.7.1
- matrix:
cuda: "11.6"
packages:
- cuda-python>=11.6,<11.7.1
- output_types: conda
matrices:
- matrix:
cuda: "11.5"
packages:
- cudatoolkit=11.5
- matrix:
cuda: "11.6"
packages:
- cudatoolkit=11.6
58 changes: 42 additions & 16 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,22 @@
import pathlib
import shutil

import jsonschema
import pytest
import yaml
from jsonschema.exceptions import ValidationError

from rapids_dependency_file_generator.cli import main

CURRENT_DIR = pathlib.Path(__file__).parent

EXAMPLE_FILES = [
pth
for pth in CURRENT_DIR.glob("examples/*/dependencies.yaml")
if "no-specific-match" not in str(pth.absolute())
]
INVALID_EXAMPLE_FILES = list(CURRENT_DIR.glob("examples/invalid/*/dependencies.yaml"))


@pytest.fixture(scope="session", autouse=True)
def clean_actual_files():
Expand All @@ -25,23 +35,26 @@ def make_file_set(file_dir):
}


@pytest.mark.parametrize(
"test_name",
[
"conda-minimal",
"integration",
"matrix",
"no-matrix",
"requirements-minimal",
"specific-fallback-first",
"specific-fallback",
],
@pytest.fixture(
params=[example_file.parent for example_file in EXAMPLE_FILES],
ids=[example_file.parent.stem for example_file in EXAMPLE_FILES],
)
def test_examples(test_name):
test_dir = CURRENT_DIR.joinpath("examples", test_name)
expected_dir = test_dir.joinpath("output", "expected")
actual_dir = test_dir.joinpath("output", "actual")
dep_file_path = test_dir.joinpath("dependencies.yaml")
def example_dir(request):
return request.param


@pytest.fixture(
params=[example_file.parent for example_file in INVALID_EXAMPLE_FILES],
ids=[example_file.parent.stem for example_file in INVALID_EXAMPLE_FILES],
)
def invalid_example_dir(request):
return request.param


def test_examples(example_dir):
expected_dir = example_dir.joinpath("output", "expected")
actual_dir = example_dir.joinpath("output", "actual")
dep_file_path = example_dir.joinpath("dependencies.yaml")

main(["--config", str(dep_file_path)])

Expand All @@ -63,3 +76,16 @@ def test_error_examples(test_name):

with pytest.raises(ValueError):
main(["--config", str(dep_file_path)])


def test_examples_are_valid(schema, example_dir):
dep_file_path = example_dir / "dependencies.yaml"
instance = yaml.load(dep_file_path.read_text(), Loader=yaml.SafeLoader)
jsonschema.validate(instance, schema=schema)


def test_invalid_examples_are_invalid(schema, invalid_example_dir):
dep_file_path = invalid_example_dir / "dependencies.yaml"
instance = yaml.load(dep_file_path.read_text(), Loader=yaml.SafeLoader)
with pytest.raises(ValidationError):
jsonschema.validate(instance, schema=schema)
5 changes: 5 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import jsonschema


def test_schema_is_valid(schema):
jsonschema.Draft7Validator.check_schema(schema)

0 comments on commit f7e8234

Please sign in to comment.