Skip to content

Commit

Permalink
Disable uri-reference format check in jsonsschema (#2771)
Browse files Browse the repository at this point in the history
* Disable uri-reference format check. Consistently use same validator across codebase

* Remove validation in SchemaInfo as not used anywhere and it referenced the wrong jsonschema draft

* Add compatibility for older jsonschema versions

* Improve comments

* Simplify validate_jsonschema
  • Loading branch information
binste committed Jan 4, 2023
1 parent 1d90aec commit b1774e6
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 179 deletions.
5 changes: 2 additions & 3 deletions altair/utils/display.py
Expand Up @@ -4,10 +4,9 @@
from typing import Callable, Dict
import uuid

from jsonschema import validate

from .plugin_registry import PluginRegistry
from .mimebundle import spec_to_mimebundle
from .schemapi import validate_jsonschema


# ==============================================================================
Expand Down Expand Up @@ -126,7 +125,7 @@ def _validate(self):
# type: () -> None
"""Validate the spec against the schema."""
schema_dict = json.loads(pkgutil.get_data(*self.schema_path).decode("utf-8"))
validate(self.spec, schema_dict)
validate_jsonschema(self.spec, schema_dict)

def _repr_mimebundle_(self, include=None, exclude=None):
"""Return a MIME bundle for display in Jupyter frontends."""
Expand Down
31 changes: 24 additions & 7 deletions altair/utils/schemapi.py
Expand Up @@ -8,12 +8,13 @@
from typing import Any

import jsonschema
import jsonschema.exceptions
import numpy as np
import pandas as pd

from altair import vegalite


JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator
# If DEBUG_MODE is True, then schema objects are converted to dict and
# validated at creation time. This slows things down, particularly for
# larger specs, but leads to much more useful tracebacks for the user.
Expand Down Expand Up @@ -43,6 +44,24 @@ def debug_mode(arg):
DEBUG_MODE = original


def validate_jsonschema(spec, schema, resolver=None):
# We don't use jsonschema.validate as this would validate the schema itself.
# Instead, we pass the schema directly to the validator class. This is done for
# two reasons: The schema comes from Vega-Lite and is not based on the user
# input, therefore there is no need to validate it in the first place. Furthermore,
# the "uri-reference" format checker fails for some of the references as URIs in
# "$ref" are not encoded,
# e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
# (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
# it is not a valid URI reference due to the characters such as '<'.
validator = JSONSCHEMA_VALIDATOR(
schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver
)
error = jsonschema.exceptions.best_match(validator.iter_errors(spec))
if error is not None:
raise error


def _subclasses(cls):
"""Breadth-first sequence of all classes which inherit from cls."""
seen = set()
Expand Down Expand Up @@ -158,7 +177,7 @@ class SchemaBase(object):
_schema = None
_rootschema = None
_class_is_valid_at_instantiation = True
_validator = jsonschema.Draft7Validator
_validator = JSONSCHEMA_VALIDATOR

def __init__(self, *args, **kwds):
# Two valid options for initialization, which should be handled by
Expand Down Expand Up @@ -448,9 +467,7 @@ def validate(cls, instance, schema=None):
if schema is None:
schema = cls._schema
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return jsonschema.validate(
instance, schema, cls=cls._validator, resolver=resolver
)
return validate_jsonschema(instance, schema, resolver=resolver)

@classmethod
def resolve_references(cls, schema=None):
Expand All @@ -469,7 +486,7 @@ def validate_property(cls, name, value, schema=None):
value = _todict(value, validate=False, context={})
props = cls.resolve_references(schema or cls._schema).get("properties", {})
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return jsonschema.validate(value, props.get(name, {}), resolver=resolver)
return validate_jsonschema(value, props.get(name, {}), resolver=resolver)

def __dir__(self):
return list(self._kwds.keys())
Expand Down Expand Up @@ -563,7 +580,7 @@ def from_dict(
for possible_schema in schemas:
resolver = jsonschema.RefResolver.from_schema(rootschema)
try:
jsonschema.validate(dct, possible_schema, resolver=resolver)
validate_jsonschema(dct, possible_schema, resolver=resolver)
except jsonschema.ValidationError:
continue
else:
Expand Down
149 changes: 0 additions & 149 deletions tools/schemapi/jsonschema-draft04.json

This file was deleted.

31 changes: 24 additions & 7 deletions tools/schemapi/schemapi.py
Expand Up @@ -6,12 +6,13 @@
from typing import Any

import jsonschema
import jsonschema.exceptions
import numpy as np
import pandas as pd

from altair import vegalite


JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator
# If DEBUG_MODE is True, then schema objects are converted to dict and
# validated at creation time. This slows things down, particularly for
# larger specs, but leads to much more useful tracebacks for the user.
Expand Down Expand Up @@ -41,6 +42,24 @@ def debug_mode(arg):
DEBUG_MODE = original


def validate_jsonschema(spec, schema, resolver=None):
# We don't use jsonschema.validate as this would validate the schema itself.
# Instead, we pass the schema directly to the validator class. This is done for
# two reasons: The schema comes from Vega-Lite and is not based on the user
# input, therefore there is no need to validate it in the first place. Furthermore,
# the "uri-reference" format checker fails for some of the references as URIs in
# "$ref" are not encoded,
# e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
# (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
# it is not a valid URI reference due to the characters such as '<'.
validator = JSONSCHEMA_VALIDATOR(
schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver
)
error = jsonschema.exceptions.best_match(validator.iter_errors(spec))
if error is not None:
raise error


def _subclasses(cls):
"""Breadth-first sequence of all classes which inherit from cls."""
seen = set()
Expand Down Expand Up @@ -156,7 +175,7 @@ class SchemaBase(object):
_schema = None
_rootschema = None
_class_is_valid_at_instantiation = True
_validator = jsonschema.Draft7Validator
_validator = JSONSCHEMA_VALIDATOR

def __init__(self, *args, **kwds):
# Two valid options for initialization, which should be handled by
Expand Down Expand Up @@ -446,9 +465,7 @@ def validate(cls, instance, schema=None):
if schema is None:
schema = cls._schema
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return jsonschema.validate(
instance, schema, cls=cls._validator, resolver=resolver
)
return validate_jsonschema(instance, schema, resolver=resolver)

@classmethod
def resolve_references(cls, schema=None):
Expand All @@ -467,7 +484,7 @@ def validate_property(cls, name, value, schema=None):
value = _todict(value, validate=False, context={})
props = cls.resolve_references(schema or cls._schema).get("properties", {})
resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema)
return jsonschema.validate(value, props.get(name, {}), resolver=resolver)
return validate_jsonschema(value, props.get(name, {}), resolver=resolver)

def __dir__(self):
return list(self._kwds.keys())
Expand Down Expand Up @@ -561,7 +578,7 @@ def from_dict(
for possible_schema in schemas:
resolver = jsonschema.RefResolver.from_schema(rootschema)
try:
jsonschema.validate(dct, possible_schema, resolver=resolver)
validate_jsonschema(dct, possible_schema, resolver=resolver)
except jsonschema.ValidationError:
continue
else:
Expand Down
14 changes: 1 addition & 13 deletions tools/schemapi/utils.py
@@ -1,8 +1,6 @@
"""Utilities for working with schemas"""

import json
import keyword
import pkgutil
import re
import textwrap
import urllib
Expand All @@ -13,12 +11,6 @@
EXCLUDE_KEYS = ("definitions", "title", "description", "$schema", "id")


def load_metaschema():
schema = pkgutil.get_data("schemapi", "jsonschema-draft04.json")
schema = schema.decode()
return json.loads(schema)


def resolve_references(schema, root=None):
"""Resolve References within a JSON schema"""
resolver = jsonschema.RefResolver.from_schema(root or schema)
Expand Down Expand Up @@ -144,18 +136,14 @@ def values(self):
class SchemaInfo(object):
"""A wrapper for inspecting a JSON schema"""

def __init__(self, schema, rootschema=None, validate=False):
def __init__(self, schema, rootschema=None):
if hasattr(schema, "_schema"):
if hasattr(schema, "_rootschema"):
schema, rootschema = schema._schema, schema._rootschema
else:
schema, rootschema = schema._schema, schema._schema
elif not rootschema:
rootschema = schema
if validate:
metaschema = load_metaschema()
jsonschema.validate(schema, metaschema)
jsonschema.validate(rootschema, metaschema)
self.raw_schema = schema
self.rootschema = rootschema
self.schema = resolve_references(schema, rootschema)
Expand Down

0 comments on commit b1774e6

Please sign in to comment.