Skip to content

Commit

Permalink
Dynamically determine jsonschema validator (#2812)
Browse files Browse the repository at this point in the history
* Use  property to dynamically determine jsonschema validator

* Fix regression introduced in #2771 for older jsonschema versions which did not yet make format checker accessible on validator class

* Add test
  • Loading branch information
binste committed Jan 7, 2023
1 parent 4933664 commit 4ddd4e9
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 27 deletions.
5 changes: 4 additions & 1 deletion altair/utils/display.py
Expand Up @@ -125,7 +125,10 @@ def _validate(self):
# type: () -> None
"""Validate the spec against the schema."""
schema_dict = json.loads(pkgutil.get_data(*self.schema_path).decode("utf-8"))
validate_jsonschema(self.spec, schema_dict)
validate_jsonschema(
self.spec,
schema_dict,
)

def _repr_mimebundle_(self, include=None, exclude=None):
"""Return a MIME bundle for display in Jupyter frontends."""
Expand Down
33 changes: 21 additions & 12 deletions altair/utils/schemapi.py
Expand Up @@ -9,12 +9,12 @@

import jsonschema
import jsonschema.exceptions
import jsonschema.validators
import numpy as np
import pandas as pd

from altair import vegalite

JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator
# If DEBUG_MODE is True, then schema objects are converted to dict and
# validated at creation time. This slows things down, particularly for
# larger specs, but leads to much more useful tracebacks for the user.
Expand Down Expand Up @@ -44,7 +44,7 @@ def debug_mode(arg):
DEBUG_MODE = original


def validate_jsonschema(spec, schema, resolver=None):
def validate_jsonschema(spec, schema, rootschema=None):
# We don't use jsonschema.validate as this would validate the schema itself.
# Instead, we pass the schema directly to the validator class. This is done for
# two reasons: The schema comes from Vega-Lite and is not based on the user
Expand All @@ -54,9 +54,18 @@ def validate_jsonschema(spec, schema, resolver=None):
# e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
# (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
# it is not a valid URI reference due to the characters such as '<'.
validator = JSONSCHEMA_VALIDATOR(
schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver
)
if rootschema is not None:
validator_cls = jsonschema.validators.validator_for(rootschema)
resolver = jsonschema.RefResolver.from_schema(rootschema)
else:
validator_cls = jsonschema.validators.validator_for(schema)
# No resolver is necessary if the schema is already the full schema
resolver = None

validator_kwargs = {"resolver": resolver}
if hasattr(validator_cls, "FORMAT_CHECKER"):
validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
validator = validator_cls(schema, **validator_kwargs)
error = jsonschema.exceptions.best_match(validator.iter_errors(spec))
if error is not None:
raise error
Expand Down Expand Up @@ -177,7 +186,6 @@ class SchemaBase(object):
_schema = None
_rootschema = None
_class_is_valid_at_instantiation = True
_validator = JSONSCHEMA_VALIDATOR

def __init__(self, *args, **kwds):
# Two valid options for initialization, which should be handled by
Expand Down Expand Up @@ -466,8 +474,9 @@ def validate(cls, instance, schema=None):
"""
if schema is None:
schema = cls._schema
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(instance, schema, resolver=resolver)
return validate_jsonschema(
instance, schema, rootschema=cls._rootschema or cls._schema
)

@classmethod
def resolve_references(cls, schema=None):
Expand All @@ -485,8 +494,9 @@ def validate_property(cls, name, value, schema=None):
"""
value = _todict(value, validate=False, context={})
props = cls.resolve_references(schema or cls._schema).get("properties", {})
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(value, props.get(name, {}), resolver=resolver)
return validate_jsonschema(
value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
)

def __dir__(self):
return list(self._kwds.keys())
Expand Down Expand Up @@ -578,9 +588,8 @@ def from_dict(
if "anyOf" in schema or "oneOf" in schema:
schemas = schema.get("anyOf", []) + schema.get("oneOf", [])
for possible_schema in schemas:
resolver = jsonschema.RefResolver.from_schema(rootschema)
try:
validate_jsonschema(dct, possible_schema, resolver=resolver)
validate_jsonschema(dct, possible_schema, rootschema=rootschema)
except jsonschema.ValidationError:
continue
else:
Expand Down
44 changes: 43 additions & 1 deletion tests/utils/tests/test_schemapi.py
Expand Up @@ -9,6 +9,7 @@

import numpy as np

from altair import load_schema
from altair.utils.schemapi import (
UndefinedType,
SchemaBase,
Expand All @@ -17,6 +18,7 @@
SchemaValidationError,
)

_JSONSCHEMA_DRAFT = load_schema()["$schema"]
# Make tests inherit from _TestSchema, so that when we test from_dict it won't
# try to use SchemaBase objects defined elsewhere as wrappers.

Expand All @@ -29,6 +31,7 @@ def _default_wrapper_classes(cls):

class MySchema(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"StringMapping": {
"type": "object",
Expand Down Expand Up @@ -65,6 +68,7 @@ class StringArray(_TestSchema):

class Derived(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"Foo": {"type": "object", "properties": {"d": {"type": "string"}}},
"Bar": {"type": "string", "enum": ["A", "B"]},
Expand All @@ -90,7 +94,10 @@ class Bar(_TestSchema):


class SimpleUnion(_TestSchema):
_schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"anyOf": [{"type": "integer"}, {"type": "string"}],
}


class DefinitionUnion(_TestSchema):
Expand All @@ -100,18 +107,38 @@ class DefinitionUnion(_TestSchema):

class SimpleArray(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "array",
"items": {"anyOf": [{"type": "integer"}, {"type": "string"}]},
}


class InvalidProperties(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "object",
"properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}},
}


class Draft7Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


class Draft202012Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft/2020-12/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


def test_construct_multifaceted_schema():
dct = {
"a": {"foo": "bar"},
Expand Down Expand Up @@ -221,6 +248,21 @@ def test_undefined_singleton():
assert Undefined is UndefinedType()


def test_schema_validator_selection():
# Tests if the correct validator class is chosen based on the $schema
# property in the schema. Reason for the AttributeError below is, that Draft 2020-12
# introduced changes to the "items" keyword, see
# https://json-schema.org/draft/2020-12/release-notes.html#changes-to-
# items-and-additionalitems
dct = {
"e": ["a", "b"],
}

assert Draft7Schema.from_dict(dct).to_dict() == dct
with pytest.raises(AttributeError, match="'list' object has no attribute 'get'"):
Draft202012Schema.from_dict(dct)


@pytest.fixture
def dct():
return {
Expand Down
33 changes: 21 additions & 12 deletions tools/schemapi/schemapi.py
Expand Up @@ -7,12 +7,12 @@

import jsonschema
import jsonschema.exceptions
import jsonschema.validators
import numpy as np
import pandas as pd

from altair import vegalite

JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator
# If DEBUG_MODE is True, then schema objects are converted to dict and
# validated at creation time. This slows things down, particularly for
# larger specs, but leads to much more useful tracebacks for the user.
Expand Down Expand Up @@ -42,7 +42,7 @@ def debug_mode(arg):
DEBUG_MODE = original


def validate_jsonschema(spec, schema, resolver=None):
def validate_jsonschema(spec, schema, rootschema=None):
# We don't use jsonschema.validate as this would validate the schema itself.
# Instead, we pass the schema directly to the validator class. This is done for
# two reasons: The schema comes from Vega-Lite and is not based on the user
Expand All @@ -52,9 +52,18 @@ def validate_jsonschema(spec, schema, resolver=None):
# e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
# (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
# it is not a valid URI reference due to the characters such as '<'.
validator = JSONSCHEMA_VALIDATOR(
schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver
)
if rootschema is not None:
validator_cls = jsonschema.validators.validator_for(rootschema)
resolver = jsonschema.RefResolver.from_schema(rootschema)
else:
validator_cls = jsonschema.validators.validator_for(schema)
# No resolver is necessary if the schema is already the full schema
resolver = None

validator_kwargs = {"resolver": resolver}
if hasattr(validator_cls, "FORMAT_CHECKER"):
validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
validator = validator_cls(schema, **validator_kwargs)
error = jsonschema.exceptions.best_match(validator.iter_errors(spec))
if error is not None:
raise error
Expand Down Expand Up @@ -175,7 +184,6 @@ class SchemaBase(object):
_schema = None
_rootschema = None
_class_is_valid_at_instantiation = True
_validator = JSONSCHEMA_VALIDATOR

def __init__(self, *args, **kwds):
# Two valid options for initialization, which should be handled by
Expand Down Expand Up @@ -464,8 +472,9 @@ def validate(cls, instance, schema=None):
"""
if schema is None:
schema = cls._schema
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(instance, schema, resolver=resolver)
return validate_jsonschema(
instance, schema, rootschema=cls._rootschema or cls._schema
)

@classmethod
def resolve_references(cls, schema=None):
Expand All @@ -483,8 +492,9 @@ def validate_property(cls, name, value, schema=None):
"""
value = _todict(value, validate=False, context={})
props = cls.resolve_references(schema or cls._schema).get("properties", {})
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return validate_jsonschema(value, props.get(name, {}), resolver=resolver)
return validate_jsonschema(
value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
)

def __dir__(self):
return list(self._kwds.keys())
Expand Down Expand Up @@ -576,9 +586,8 @@ def from_dict(
if "anyOf" in schema or "oneOf" in schema:
schemas = schema.get("anyOf", []) + schema.get("oneOf", [])
for possible_schema in schemas:
resolver = jsonschema.RefResolver.from_schema(rootschema)
try:
validate_jsonschema(dct, possible_schema, resolver=resolver)
validate_jsonschema(dct, possible_schema, rootschema=rootschema)
except jsonschema.ValidationError:
continue
else:
Expand Down
44 changes: 43 additions & 1 deletion tools/schemapi/tests/test_schemapi.py
Expand Up @@ -7,6 +7,7 @@

import numpy as np

from altair import load_schema
from altair.utils.schemapi import (
UndefinedType,
SchemaBase,
Expand All @@ -15,6 +16,7 @@
SchemaValidationError,
)

_JSONSCHEMA_DRAFT = load_schema()["$schema"]
# Make tests inherit from _TestSchema, so that when we test from_dict it won't
# try to use SchemaBase objects defined elsewhere as wrappers.

Expand All @@ -27,6 +29,7 @@ def _default_wrapper_classes(cls):

class MySchema(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"StringMapping": {
"type": "object",
Expand Down Expand Up @@ -63,6 +66,7 @@ class StringArray(_TestSchema):

class Derived(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"definitions": {
"Foo": {"type": "object", "properties": {"d": {"type": "string"}}},
"Bar": {"type": "string", "enum": ["A", "B"]},
Expand All @@ -88,7 +92,10 @@ class Bar(_TestSchema):


class SimpleUnion(_TestSchema):
_schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"anyOf": [{"type": "integer"}, {"type": "string"}],
}


class DefinitionUnion(_TestSchema):
Expand All @@ -98,18 +105,38 @@ class DefinitionUnion(_TestSchema):

class SimpleArray(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "array",
"items": {"anyOf": [{"type": "integer"}, {"type": "string"}]},
}


class InvalidProperties(_TestSchema):
_schema = {
"$schema": _JSONSCHEMA_DRAFT,
"type": "object",
"properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}},
}


class Draft7Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


class Draft202012Schema(_TestSchema):
_schema = {
"$schema": "http://json-schema.org/draft/2020-12/schema#",
"properties": {
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}


def test_construct_multifaceted_schema():
dct = {
"a": {"foo": "bar"},
Expand Down Expand Up @@ -219,6 +246,21 @@ def test_undefined_singleton():
assert Undefined is UndefinedType()


def test_schema_validator_selection():
# Tests if the correct validator class is chosen based on the $schema
# property in the schema. Reason for the AttributeError below is, that Draft 2020-12
# introduced changes to the "items" keyword, see
# https://json-schema.org/draft/2020-12/release-notes.html#changes-to-
# items-and-additionalitems
dct = {
"e": ["a", "b"],
}

assert Draft7Schema.from_dict(dct).to_dict() == dct
with pytest.raises(AttributeError, match="'list' object has no attribute 'get'"):
Draft202012Schema.from_dict(dct)


@pytest.fixture
def dct():
return {
Expand Down

0 comments on commit 4ddd4e9

Please sign in to comment.