From eed1aea0d03efcb23093a5e7fc7a2534c39e85a5 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 20 Jan 2023 13:13:25 -0600 Subject: [PATCH] Fix how '--check-metaschema' builds validators (#230) * Simplify: remove `make_validator` This was an intermediate method which doesn't appear to be necessary, and leverages caching behavior which is never used. Remove it and move the body into `get_validator` (its only caller). * Minor cleanup to acceptance test naming * Fix how '--check-metaschema' builds a validator Metaschema checking was not building the validator correctly. Primarily two fixes are applied here: - the metaschema's schema dialect is no longer copied from the schema being checked, but is taken from the metaschema document - metaschema checking now automatically includes format checking and applies the CLI parameters for format checking (including the ability to disable format checking) Add test for an invalid format under metaschema. This test requires one of the format checking libraries, and therefore drives the need for new features in the testsuite, including the addition of config for the example file tests. Example file config is schema-validated, which serves as another dogfooding self-check. The test file imitates the driving use-case in #220 The acceptance test definition is refactored to make managing the test data a little easier. --- .pre-commit-config.yaml | 7 +- CHANGELOG.rst | 4 + src/check_jsonschema/schema_loader/main.py | 36 +++---- tests/acceptance/test_example_files.py | 89 +++++++++++++++--- tests/example-files/config_schema.json | 37 ++++++++ .../metaschema/2020_invalid_format_value.json | 93 +++++++++++++++++++ .../hooks/negative/metaschema/_config.yaml | 4 + .../metaschema/2020_invalid_format_value.json | 93 +++++++++++++++++++ .../hooks/positive/metaschema/_config.yaml | 3 + 9 files changed, 334 insertions(+), 32 deletions(-) create mode 100644 tests/example-files/config_schema.json create mode 100644 tests/example-files/hooks/negative/metaschema/2020_invalid_format_value.json create mode 100644 tests/example-files/hooks/negative/metaschema/_config.yaml create mode 100644 tests/example-files/hooks/positive/metaschema/2020_invalid_format_value.json create mode 100644 tests/example-files/hooks/positive/metaschema/_config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e296ddb3..2e0fae5e7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,10 +6,13 @@ repos: - id: check-dependabot - id: check-github-workflows - id: check-readthedocs - - id: check-jsonschema + - id: check-metaschema name: Validate Vendored Schemas - args: ["--check-metaschema"] files: ^src/check_jsonschema/builtin_schemas/vendor/.*\.json$ + - id: check-jsonschema + name: Validate Test Configs + args: ["--schemafile", "tests/example-files/config_schema.json"] + files: ^tests/example-files/.*/_config.yaml$ - repo: https://github.com/pre-commit/pre-commit-hooks.git rev: v4.4.0 hooks: diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 467c5b9ee..baeaf3539 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,10 @@ Unreleased .. vendor-insert-here - Update vendored schemas (2023-01-18) +- Fix a bug in which `--check-metaschema` was not building validators correctly. + The metaschema's schema dialect is chosen correctly now, and metaschema + formats are now checked by default. This can be disabled with + `--disable-format`. 0.20.0 ------ diff --git a/src/check_jsonschema/schema_loader/main.py b/src/check_jsonschema/schema_loader/main.py index 3e1cae2e0..8f7a49336 100644 --- a/src/check_jsonschema/schema_loader/main.py +++ b/src/check_jsonschema/schema_loader/main.py @@ -74,9 +74,6 @@ def __init__( # setup a schema reader lazily, when needed self._reader: LocalSchemaReader | HttpSchemaReader | None = None - # setup a location to store the validator so that it is only built once by default - self._validator: jsonschema.Validator | None = None - @property def reader(self) -> LocalSchemaReader | HttpSchemaReader: if self._reader is None: @@ -105,8 +102,12 @@ def get_schema_ref_base(self) -> str | None: def get_schema(self) -> dict[str, t.Any]: return self.reader.read_schema() - def make_validator( - self, format_opts: FormatOptions, fill_defaults: bool + def get_validator( + self, + path: pathlib.Path, + instance_doc: dict[str, t.Any], + format_opts: FormatOptions, + fill_defaults: bool, ) -> jsonschema.Validator: schema_uri = self.get_schema_ref_base() schema = self.get_schema() @@ -138,16 +139,6 @@ def make_validator( ) return t.cast(jsonschema.Validator, validator) - def get_validator( - self, - path: pathlib.Path, - instance_doc: dict[str, t.Any], - format_opts: FormatOptions, - fill_defaults: bool, - ) -> jsonschema.Validator: - self._validator = self.make_validator(format_opts, fill_defaults) - return self._validator - class BuiltinSchemaLoader(SchemaLoader): def __init__(self, schema_name: str) -> None: @@ -168,5 +159,16 @@ def get_validator( format_opts: FormatOptions, fill_defaults: bool, ) -> jsonschema.Validator: - validator = jsonschema.validators.validator_for(instance_doc) - return t.cast(jsonschema.Validator, validator(validator.META_SCHEMA)) + schema_validator = jsonschema.validators.validator_for(instance_doc) + meta_validator_class = jsonschema.validators.validator_for( + schema_validator.META_SCHEMA, default=schema_validator + ) + + # format checker (which may be None) + meta_schema_dialect = schema_validator.META_SCHEMA.get("$schema") + format_checker = make_format_checker(format_opts, meta_schema_dialect) + + meta_validator = meta_validator_class( + schema_validator.META_SCHEMA, format_checker=format_checker + ) + return meta_validator diff --git a/tests/acceptance/test_example_files.py b/tests/acceptance/test_example_files.py index 9df46a173..3b23e2c2f 100644 --- a/tests/acceptance/test_example_files.py +++ b/tests/acceptance/test_example_files.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +import dataclasses +import importlib.util import shlex from pathlib import Path @@ -36,7 +40,9 @@ def _build_hook_cases(category): example_dir = EXAMPLE_HOOK_FILES / category / hookid if example_dir.exists(): for example in example_dir.iterdir(): - res[str(example.relative_to(EXAMPLE_HOOK_FILES))] = hookid + if example.name == "_config.yaml": + continue + res[str(example.relative_to(EXAMPLE_HOOK_FILES / category))] = hookid return res @@ -48,37 +54,31 @@ def _get_explicit_cases(category): return res -def _check_case_skip(case_name): - if case_name.endswith("json5") and not JSON5_ENABLED: - pytest.skip("cannot check json5 support without json5 enabled") - if case_name.endswith("toml") and not TOML_ENABLED: - pytest.skip("cannot check toml support without toml enabled") - - POSITIVE_HOOK_CASES = _build_hook_cases("positive") NEGATIVE_HOOK_CASES = _build_hook_cases("negative") @pytest.mark.parametrize("case_name", POSITIVE_HOOK_CASES.keys()) def test_hook_positive_examples(case_name, run_line): - _check_case_skip(case_name) + rcase = ResolvedCase.load_positive(case_name) hook_id = POSITIVE_HOOK_CASES[case_name] - ret = run_line(HOOK_CONFIG[hook_id] + [str(EXAMPLE_HOOK_FILES / case_name)]) + ret = run_line(HOOK_CONFIG[hook_id] + [rcase.path] + rcase.add_args) assert ret.exit_code == 0 @pytest.mark.parametrize("case_name", NEGATIVE_HOOK_CASES.keys()) def test_hook_negative_examples(case_name, run_line): - _check_case_skip(case_name) + rcase = ResolvedCase.load_negative(case_name) + hook_id = NEGATIVE_HOOK_CASES[case_name] - ret = run_line(HOOK_CONFIG[hook_id] + [str(EXAMPLE_HOOK_FILES / case_name)]) + ret = run_line(HOOK_CONFIG[hook_id] + [rcase.path] + rcase.add_args) assert ret.exit_code == 1 @pytest.mark.parametrize("case_name", _get_explicit_cases("positive")) def test_explicit_positive_examples(case_name, run_line): - _check_case_skip(case_name) + _check_file_format_skip(case_name) casedir = EXAMPLE_EXPLICIT_FILES / "positive" / case_name instance = casedir / "instance.json" @@ -104,3 +104,66 @@ def test_explicit_positive_examples(case_name, run_line): ] ) assert ret.exit_code == 0 + + +def _check_file_format_skip(case_name): + if case_name.endswith("json5") and not JSON5_ENABLED: + pytest.skip("cannot check json5 support without json5 enabled") + if case_name.endswith("toml") and not TOML_ENABLED: + pytest.skip("cannot check toml support without toml enabled") + + +@dataclasses.dataclass +class ResolvedCase: + category: str + path: str + add_args: list[str] + config: dict + + def check_skip(self) -> None: + if "requires_packages" in self.config: + for pkg in self.config["requires_packages"]: + if _package_is_installed(pkg): + continue + pytest.skip(f"cannot check because '{pkg}' is not installed") + + def __post_init__(self) -> None: + self.check_skip() + + @classmethod + def load_positive(cls: type[ResolvedCase], case_name: str) -> ResolvedCase: + return cls._load("positive", case_name) + + @classmethod + def load_negative(cls: type[ResolvedCase], case_name: str) -> ResolvedCase: + return cls._load("negative", case_name) + + @classmethod + def _load(cls: type[ResolvedCase], category: str, case_name: str) -> ResolvedCase: + _check_file_format_skip(case_name) + + path = EXAMPLE_HOOK_FILES / category / case_name + config = cls._load_file_config(path.parent / "_config.yaml", path.name) + + return cls( + category=category, + path=str(path), + add_args=config.get("add_args", []), + config=config, + ) + + @staticmethod + def _load_file_config(config_path, name): + if not config_path.is_file(): + return {} + with open(config_path) as fp: + loaded_conf = yaml.load(fp) + files_section = loaded_conf.get("files", {}) + return files_section.get(name, {}) + + +def _package_is_installed(pkg: str) -> bool: + spec = importlib.util.find_spec(pkg) + if spec is None: + return False + return True diff --git a/tests/example-files/config_schema.json b/tests/example-files/config_schema.json new file mode 100644 index 000000000..911aef780 --- /dev/null +++ b/tests/example-files/config_schema.json @@ -0,0 +1,37 @@ +{ + "$comment": "An internal schema used to check the testsuite _config.yaml files.", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "spec": { + "type": "object", + "properties": { + "requires_packages": { + "type": "array", + "items": { + "type": "string" + } + }, + "add_args": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false + } + }, + "type": "object", + "properties": { + "files": { + "type": "object", + "patternProperties": { + "^.+\\.(json|yml|yaml|json5|toml)$": { + "$ref": "#/$defs/spec" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/tests/example-files/hooks/negative/metaschema/2020_invalid_format_value.json b/tests/example-files/hooks/negative/metaschema/2020_invalid_format_value.json new file mode 100644 index 000000000..92b0ddb38 --- /dev/null +++ b/tests/example-files/hooks/negative/metaschema/2020_invalid_format_value.json @@ -0,0 +1,93 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Meta-schema defining a ref with invalid URI reference", + "$defs": { + "prop<(str|list)>": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": true + } + ] + }, + "anchorString": { + "type": "string", + "pattern": "^[A-Za-z_][-A-Za-z0-9._]*$" + }, + "uriString": { + "type": "string", + "format": "uri" + }, + "uriReferenceString": { + "type": "string", + "format": "uri-reference" + }, + "original2020metaschema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/core": true + }, + "$dynamicAnchor": "meta", + "title": "Core vocabulary meta-schema", + "type": [ + "object", + "boolean" + ], + "properties": { + "$id": { + "$ref": "#/$defs/uriReferenceString", + "$comment": "Non-empty fragments not allowed.", + "pattern": "^[^#]*#?$" + }, + "$schema": { + "$ref": "#/$defs/uriString" + }, + "$ref": { + "$ref": "#/$defs/uriReferenceString" + }, + "$anchor": { + "$ref": "#/$defs/anchorString" + }, + "$dynamicRef": { + "$ref": "#/$defs/uriReferenceString" + }, + "$dynamicAnchor": { + "$ref": "#/$defs/anchorString" + }, + "$vocabulary": { + "type": "object", + "propertyNames": { + "$ref": "#/$defs/uriString" + }, + "additionalProperties": { + "type": "boolean" + } + }, + "$comment": { + "type": "string" + }, + "$defs": { + "type": "object", + "additionalProperties": { + "$dynamicRef": "#meta" + } + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/original2020metaschema" + }, + { + "properties": { + "title": { + "$ref": "#/$defs/prop<(str|list)>" + } + } + } + ] +} diff --git a/tests/example-files/hooks/negative/metaschema/_config.yaml b/tests/example-files/hooks/negative/metaschema/_config.yaml new file mode 100644 index 000000000..16f8bdfb2 --- /dev/null +++ b/tests/example-files/hooks/negative/metaschema/_config.yaml @@ -0,0 +1,4 @@ +files: + 2020_invalid_format_value.json: + requires_packages: + - rfc3987 diff --git a/tests/example-files/hooks/positive/metaschema/2020_invalid_format_value.json b/tests/example-files/hooks/positive/metaschema/2020_invalid_format_value.json new file mode 100644 index 000000000..92b0ddb38 --- /dev/null +++ b/tests/example-files/hooks/positive/metaschema/2020_invalid_format_value.json @@ -0,0 +1,93 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Meta-schema defining a ref with invalid URI reference", + "$defs": { + "prop<(str|list)>": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": true + } + ] + }, + "anchorString": { + "type": "string", + "pattern": "^[A-Za-z_][-A-Za-z0-9._]*$" + }, + "uriString": { + "type": "string", + "format": "uri" + }, + "uriReferenceString": { + "type": "string", + "format": "uri-reference" + }, + "original2020metaschema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/core": true + }, + "$dynamicAnchor": "meta", + "title": "Core vocabulary meta-schema", + "type": [ + "object", + "boolean" + ], + "properties": { + "$id": { + "$ref": "#/$defs/uriReferenceString", + "$comment": "Non-empty fragments not allowed.", + "pattern": "^[^#]*#?$" + }, + "$schema": { + "$ref": "#/$defs/uriString" + }, + "$ref": { + "$ref": "#/$defs/uriReferenceString" + }, + "$anchor": { + "$ref": "#/$defs/anchorString" + }, + "$dynamicRef": { + "$ref": "#/$defs/uriReferenceString" + }, + "$dynamicAnchor": { + "$ref": "#/$defs/anchorString" + }, + "$vocabulary": { + "type": "object", + "propertyNames": { + "$ref": "#/$defs/uriString" + }, + "additionalProperties": { + "type": "boolean" + } + }, + "$comment": { + "type": "string" + }, + "$defs": { + "type": "object", + "additionalProperties": { + "$dynamicRef": "#meta" + } + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/original2020metaschema" + }, + { + "properties": { + "title": { + "$ref": "#/$defs/prop<(str|list)>" + } + } + } + ] +} diff --git a/tests/example-files/hooks/positive/metaschema/_config.yaml b/tests/example-files/hooks/positive/metaschema/_config.yaml new file mode 100644 index 000000000..2b604563f --- /dev/null +++ b/tests/example-files/hooks/positive/metaschema/_config.yaml @@ -0,0 +1,3 @@ +files: + 2020_invalid_format_value.json: + add_args: ["--disable-format"]