From 4ddd4e967061901f0634e59c95abbec7e68ec9d4 Mon Sep 17 00:00:00 2001 From: Stefan Binder Date: Sat, 7 Jan 2023 22:57:28 +0100 Subject: [PATCH] Dynamically determine jsonschema validator (#2812) * Use property to dynamically determine jsonschema validator * Fix regression introduced in #2771 for older jsonschema versions which did not yet make format checker accessible on validator class * Add test --- altair/utils/display.py | 5 ++- altair/utils/schemapi.py | 33 ++++++++++++-------- tests/utils/tests/test_schemapi.py | 44 ++++++++++++++++++++++++++- tools/schemapi/schemapi.py | 33 ++++++++++++-------- tools/schemapi/tests/test_schemapi.py | 44 ++++++++++++++++++++++++++- 5 files changed, 132 insertions(+), 27 deletions(-) diff --git a/altair/utils/display.py b/altair/utils/display.py index bcf8232b3..50c87b10e 100644 --- a/altair/utils/display.py +++ b/altair/utils/display.py @@ -125,7 +125,10 @@ def _validate(self): # type: () -> None """Validate the spec against the schema.""" schema_dict = json.loads(pkgutil.get_data(*self.schema_path).decode("utf-8")) - validate_jsonschema(self.spec, schema_dict) + validate_jsonschema( + self.spec, + schema_dict, + ) def _repr_mimebundle_(self, include=None, exclude=None): """Return a MIME bundle for display in Jupyter frontends.""" diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index 2c86b1d2e..b94017c94 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -9,12 +9,12 @@ import jsonschema import jsonschema.exceptions +import jsonschema.validators import numpy as np import pandas as pd from altair import vegalite -JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator # If DEBUG_MODE is True, then schema objects are converted to dict and # validated at creation time. This slows things down, particularly for # larger specs, but leads to much more useful tracebacks for the user. @@ -44,7 +44,7 @@ def debug_mode(arg): DEBUG_MODE = original -def validate_jsonschema(spec, schema, resolver=None): +def validate_jsonschema(spec, schema, rootschema=None): # We don't use jsonschema.validate as this would validate the schema itself. # Instead, we pass the schema directly to the validator class. This is done for # two reasons: The schema comes from Vega-Lite and is not based on the user @@ -54,9 +54,18 @@ def validate_jsonschema(spec, schema, resolver=None): # e.g. '#/definitions/ValueDefWithCondition' would be a valid $ref in a Vega-Lite schema but # it is not a valid URI reference due to the characters such as '<'. - validator = JSONSCHEMA_VALIDATOR( - schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver - ) + if rootschema is not None: + validator_cls = jsonschema.validators.validator_for(rootschema) + resolver = jsonschema.RefResolver.from_schema(rootschema) + else: + validator_cls = jsonschema.validators.validator_for(schema) + # No resolver is necessary if the schema is already the full schema + resolver = None + + validator_kwargs = {"resolver": resolver} + if hasattr(validator_cls, "FORMAT_CHECKER"): + validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER + validator = validator_cls(schema, **validator_kwargs) error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) if error is not None: raise error @@ -177,7 +186,6 @@ class SchemaBase(object): _schema = None _rootschema = None _class_is_valid_at_instantiation = True - _validator = JSONSCHEMA_VALIDATOR def __init__(self, *args, **kwds): # Two valid options for initialization, which should be handled by @@ -466,8 +474,9 @@ def validate(cls, instance, schema=None): """ if schema is None: schema = cls._schema - resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return validate_jsonschema(instance, schema, resolver=resolver) + return validate_jsonschema( + instance, schema, rootschema=cls._rootschema or cls._schema + ) @classmethod def resolve_references(cls, schema=None): @@ -485,8 +494,9 @@ def validate_property(cls, name, value, schema=None): """ value = _todict(value, validate=False, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) - resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return validate_jsonschema(value, props.get(name, {}), resolver=resolver) + return validate_jsonschema( + value, props.get(name, {}), rootschema=cls._rootschema or cls._schema + ) def __dir__(self): return list(self._kwds.keys()) @@ -578,9 +588,8 @@ def from_dict( if "anyOf" in schema or "oneOf" in schema: schemas = schema.get("anyOf", []) + schema.get("oneOf", []) for possible_schema in schemas: - resolver = jsonschema.RefResolver.from_schema(rootschema) try: - validate_jsonschema(dct, possible_schema, resolver=resolver) + validate_jsonschema(dct, possible_schema, rootschema=rootschema) except jsonschema.ValidationError: continue else: diff --git a/tests/utils/tests/test_schemapi.py b/tests/utils/tests/test_schemapi.py index 00ce019d3..b83ac67c4 100644 --- a/tests/utils/tests/test_schemapi.py +++ b/tests/utils/tests/test_schemapi.py @@ -9,6 +9,7 @@ import numpy as np +from altair import load_schema from altair.utils.schemapi import ( UndefinedType, SchemaBase, @@ -17,6 +18,7 @@ SchemaValidationError, ) +_JSONSCHEMA_DRAFT = load_schema()["$schema"] # Make tests inherit from _TestSchema, so that when we test from_dict it won't # try to use SchemaBase objects defined elsewhere as wrappers. @@ -29,6 +31,7 @@ def _default_wrapper_classes(cls): class MySchema(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "definitions": { "StringMapping": { "type": "object", @@ -65,6 +68,7 @@ class StringArray(_TestSchema): class Derived(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "definitions": { "Foo": {"type": "object", "properties": {"d": {"type": "string"}}}, "Bar": {"type": "string", "enum": ["A", "B"]}, @@ -90,7 +94,10 @@ class Bar(_TestSchema): class SimpleUnion(_TestSchema): - _schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]} + _schema = { + "$schema": _JSONSCHEMA_DRAFT, + "anyOf": [{"type": "integer"}, {"type": "string"}], + } class DefinitionUnion(_TestSchema): @@ -100,6 +107,7 @@ class DefinitionUnion(_TestSchema): class SimpleArray(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "type": "array", "items": {"anyOf": [{"type": "integer"}, {"type": "string"}]}, } @@ -107,11 +115,30 @@ class SimpleArray(_TestSchema): class InvalidProperties(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "type": "object", "properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}}, } +class Draft7Schema(_TestSchema): + _schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "e": {"items": [{"type": "string"}, {"type": "string"}]}, + }, + } + + +class Draft202012Schema(_TestSchema): + _schema = { + "$schema": "http://json-schema.org/draft/2020-12/schema#", + "properties": { + "e": {"items": [{"type": "string"}, {"type": "string"}]}, + }, + } + + def test_construct_multifaceted_schema(): dct = { "a": {"foo": "bar"}, @@ -221,6 +248,21 @@ def test_undefined_singleton(): assert Undefined is UndefinedType() +def test_schema_validator_selection(): + # Tests if the correct validator class is chosen based on the $schema + # property in the schema. Reason for the AttributeError below is, that Draft 2020-12 + # introduced changes to the "items" keyword, see + # https://json-schema.org/draft/2020-12/release-notes.html#changes-to- + # items-and-additionalitems + dct = { + "e": ["a", "b"], + } + + assert Draft7Schema.from_dict(dct).to_dict() == dct + with pytest.raises(AttributeError, match="'list' object has no attribute 'get'"): + Draft202012Schema.from_dict(dct) + + @pytest.fixture def dct(): return { diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index e652d2c3f..73e530fc8 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -7,12 +7,12 @@ import jsonschema import jsonschema.exceptions +import jsonschema.validators import numpy as np import pandas as pd from altair import vegalite -JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator # If DEBUG_MODE is True, then schema objects are converted to dict and # validated at creation time. This slows things down, particularly for # larger specs, but leads to much more useful tracebacks for the user. @@ -42,7 +42,7 @@ def debug_mode(arg): DEBUG_MODE = original -def validate_jsonschema(spec, schema, resolver=None): +def validate_jsonschema(spec, schema, rootschema=None): # We don't use jsonschema.validate as this would validate the schema itself. # Instead, we pass the schema directly to the validator class. This is done for # two reasons: The schema comes from Vega-Lite and is not based on the user @@ -52,9 +52,18 @@ def validate_jsonschema(spec, schema, resolver=None): # e.g. '#/definitions/ValueDefWithCondition' would be a valid $ref in a Vega-Lite schema but # it is not a valid URI reference due to the characters such as '<'. - validator = JSONSCHEMA_VALIDATOR( - schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver - ) + if rootschema is not None: + validator_cls = jsonschema.validators.validator_for(rootschema) + resolver = jsonschema.RefResolver.from_schema(rootschema) + else: + validator_cls = jsonschema.validators.validator_for(schema) + # No resolver is necessary if the schema is already the full schema + resolver = None + + validator_kwargs = {"resolver": resolver} + if hasattr(validator_cls, "FORMAT_CHECKER"): + validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER + validator = validator_cls(schema, **validator_kwargs) error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) if error is not None: raise error @@ -175,7 +184,6 @@ class SchemaBase(object): _schema = None _rootschema = None _class_is_valid_at_instantiation = True - _validator = JSONSCHEMA_VALIDATOR def __init__(self, *args, **kwds): # Two valid options for initialization, which should be handled by @@ -464,8 +472,9 @@ def validate(cls, instance, schema=None): """ if schema is None: schema = cls._schema - resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return validate_jsonschema(instance, schema, resolver=resolver) + return validate_jsonschema( + instance, schema, rootschema=cls._rootschema or cls._schema + ) @classmethod def resolve_references(cls, schema=None): @@ -483,8 +492,9 @@ def validate_property(cls, name, value, schema=None): """ value = _todict(value, validate=False, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) - resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return validate_jsonschema(value, props.get(name, {}), resolver=resolver) + return validate_jsonschema( + value, props.get(name, {}), rootschema=cls._rootschema or cls._schema + ) def __dir__(self): return list(self._kwds.keys()) @@ -576,9 +586,8 @@ def from_dict( if "anyOf" in schema or "oneOf" in schema: schemas = schema.get("anyOf", []) + schema.get("oneOf", []) for possible_schema in schemas: - resolver = jsonschema.RefResolver.from_schema(rootschema) try: - validate_jsonschema(dct, possible_schema, resolver=resolver) + validate_jsonschema(dct, possible_schema, rootschema=rootschema) except jsonschema.ValidationError: continue else: diff --git a/tools/schemapi/tests/test_schemapi.py b/tools/schemapi/tests/test_schemapi.py index de2ef6fe8..40b9b971e 100644 --- a/tools/schemapi/tests/test_schemapi.py +++ b/tools/schemapi/tests/test_schemapi.py @@ -7,6 +7,7 @@ import numpy as np +from altair import load_schema from altair.utils.schemapi import ( UndefinedType, SchemaBase, @@ -15,6 +16,7 @@ SchemaValidationError, ) +_JSONSCHEMA_DRAFT = load_schema()["$schema"] # Make tests inherit from _TestSchema, so that when we test from_dict it won't # try to use SchemaBase objects defined elsewhere as wrappers. @@ -27,6 +29,7 @@ def _default_wrapper_classes(cls): class MySchema(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "definitions": { "StringMapping": { "type": "object", @@ -63,6 +66,7 @@ class StringArray(_TestSchema): class Derived(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "definitions": { "Foo": {"type": "object", "properties": {"d": {"type": "string"}}}, "Bar": {"type": "string", "enum": ["A", "B"]}, @@ -88,7 +92,10 @@ class Bar(_TestSchema): class SimpleUnion(_TestSchema): - _schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]} + _schema = { + "$schema": _JSONSCHEMA_DRAFT, + "anyOf": [{"type": "integer"}, {"type": "string"}], + } class DefinitionUnion(_TestSchema): @@ -98,6 +105,7 @@ class DefinitionUnion(_TestSchema): class SimpleArray(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "type": "array", "items": {"anyOf": [{"type": "integer"}, {"type": "string"}]}, } @@ -105,11 +113,30 @@ class SimpleArray(_TestSchema): class InvalidProperties(_TestSchema): _schema = { + "$schema": _JSONSCHEMA_DRAFT, "type": "object", "properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}}, } +class Draft7Schema(_TestSchema): + _schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "e": {"items": [{"type": "string"}, {"type": "string"}]}, + }, + } + + +class Draft202012Schema(_TestSchema): + _schema = { + "$schema": "http://json-schema.org/draft/2020-12/schema#", + "properties": { + "e": {"items": [{"type": "string"}, {"type": "string"}]}, + }, + } + + def test_construct_multifaceted_schema(): dct = { "a": {"foo": "bar"}, @@ -219,6 +246,21 @@ def test_undefined_singleton(): assert Undefined is UndefinedType() +def test_schema_validator_selection(): + # Tests if the correct validator class is chosen based on the $schema + # property in the schema. Reason for the AttributeError below is, that Draft 2020-12 + # introduced changes to the "items" keyword, see + # https://json-schema.org/draft/2020-12/release-notes.html#changes-to- + # items-and-additionalitems + dct = { + "e": ["a", "b"], + } + + assert Draft7Schema.from_dict(dct).to_dict() == dct + with pytest.raises(AttributeError, match="'list' object has no attribute 'get'"): + Draft202012Schema.from_dict(dct) + + @pytest.fixture def dct(): return {