From b1774e673423af87ff09a49567592c52479be349 Mon Sep 17 00:00:00 2001 From: Stefan Binder Date: Wed, 4 Jan 2023 19:24:28 +0100 Subject: [PATCH] Disable uri-reference format check in jsonsschema (#2771) * Disable uri-reference format check. Consistently use same validator across codebase * Remove validation in SchemaInfo as not used anywhere and it referenced the wrong jsonschema draft * Add compatibility for older jsonschema versions * Improve comments * Simplify validate_jsonschema --- altair/utils/display.py | 5 +- altair/utils/schemapi.py | 31 +++-- tools/schemapi/jsonschema-draft04.json | 149 ------------------------- tools/schemapi/schemapi.py | 31 +++-- tools/schemapi/utils.py | 14 +-- 5 files changed, 51 insertions(+), 179 deletions(-) delete mode 100644 tools/schemapi/jsonschema-draft04.json diff --git a/altair/utils/display.py b/altair/utils/display.py index 92d7c7835..bcf8232b3 100644 --- a/altair/utils/display.py +++ b/altair/utils/display.py @@ -4,10 +4,9 @@ from typing import Callable, Dict import uuid -from jsonschema import validate - from .plugin_registry import PluginRegistry from .mimebundle import spec_to_mimebundle +from .schemapi import validate_jsonschema # ============================================================================== @@ -126,7 +125,7 @@ def _validate(self): # type: () -> None """Validate the spec against the schema.""" schema_dict = json.loads(pkgutil.get_data(*self.schema_path).decode("utf-8")) - validate(self.spec, schema_dict) + validate_jsonschema(self.spec, schema_dict) def _repr_mimebundle_(self, include=None, exclude=None): """Return a MIME bundle for display in Jupyter frontends.""" diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index 1cdc27787..2c86b1d2e 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -8,12 +8,13 @@ from typing import Any import jsonschema +import jsonschema.exceptions import numpy as np import pandas as pd from altair import vegalite - +JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator # If DEBUG_MODE is True, then schema objects are converted to dict and # validated at creation time. This slows things down, particularly for # larger specs, but leads to much more useful tracebacks for the user. @@ -43,6 +44,24 @@ def debug_mode(arg): DEBUG_MODE = original +def validate_jsonschema(spec, schema, resolver=None): + # We don't use jsonschema.validate as this would validate the schema itself. + # Instead, we pass the schema directly to the validator class. This is done for + # two reasons: The schema comes from Vega-Lite and is not based on the user + # input, therefore there is no need to validate it in the first place. Furthermore, + # the "uri-reference" format checker fails for some of the references as URIs in + # "$ref" are not encoded, + # e.g. '#/definitions/ValueDefWithCondition' would be a valid $ref in a Vega-Lite schema but + # it is not a valid URI reference due to the characters such as '<'. + validator = JSONSCHEMA_VALIDATOR( + schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver + ) + error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) + if error is not None: + raise error + + def _subclasses(cls): """Breadth-first sequence of all classes which inherit from cls.""" seen = set() @@ -158,7 +177,7 @@ class SchemaBase(object): _schema = None _rootschema = None _class_is_valid_at_instantiation = True - _validator = jsonschema.Draft7Validator + _validator = JSONSCHEMA_VALIDATOR def __init__(self, *args, **kwds): # Two valid options for initialization, which should be handled by @@ -448,9 +467,7 @@ def validate(cls, instance, schema=None): if schema is None: schema = cls._schema resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate( - instance, schema, cls=cls._validator, resolver=resolver - ) + return validate_jsonschema(instance, schema, resolver=resolver) @classmethod def resolve_references(cls, schema=None): @@ -469,7 +486,7 @@ def validate_property(cls, name, value, schema=None): value = _todict(value, validate=False, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate(value, props.get(name, {}), resolver=resolver) + return validate_jsonschema(value, props.get(name, {}), resolver=resolver) def __dir__(self): return list(self._kwds.keys()) @@ -563,7 +580,7 @@ def from_dict( for possible_schema in schemas: resolver = jsonschema.RefResolver.from_schema(rootschema) try: - jsonschema.validate(dct, possible_schema, resolver=resolver) + validate_jsonschema(dct, possible_schema, resolver=resolver) except jsonschema.ValidationError: continue else: diff --git a/tools/schemapi/jsonschema-draft04.json b/tools/schemapi/jsonschema-draft04.json deleted file mode 100644 index bcbb84743..000000000 --- a/tools/schemapi/jsonschema-draft04.json +++ /dev/null @@ -1,149 +0,0 @@ -{ - "id": "http://json-schema.org/draft-04/schema#", - "$schema": "http://json-schema.org/draft-04/schema#", - "description": "Core schema meta-schema", - "definitions": { - "schemaArray": { - "type": "array", - "minItems": 1, - "items": { "$ref": "#" } - }, - "positiveInteger": { - "type": "integer", - "minimum": 0 - }, - "positiveIntegerDefault0": { - "allOf": [ { "$ref": "#/definitions/positiveInteger" }, { "default": 0 } ] - }, - "simpleTypes": { - "enum": [ "array", "boolean", "integer", "null", "number", "object", "string" ] - }, - "stringArray": { - "type": "array", - "items": { "type": "string" }, - "minItems": 1, - "uniqueItems": true - } - }, - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "$schema": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "default": {}, - "multipleOf": { - "type": "number", - "minimum": 0, - "exclusiveMinimum": true - }, - "maximum": { - "type": "number" - }, - "exclusiveMaximum": { - "type": "boolean", - "default": false - }, - "minimum": { - "type": "number" - }, - "exclusiveMinimum": { - "type": "boolean", - "default": false - }, - "maxLength": { "$ref": "#/definitions/positiveInteger" }, - "minLength": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "pattern": { - "type": "string", - "format": "regex" - }, - "additionalItems": { - "anyOf": [ - { "type": "boolean" }, - { "$ref": "#" } - ], - "default": {} - }, - "items": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/schemaArray" } - ], - "default": {} - }, - "maxItems": { "$ref": "#/definitions/positiveInteger" }, - "minItems": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "uniqueItems": { - "type": "boolean", - "default": false - }, - "maxProperties": { "$ref": "#/definitions/positiveInteger" }, - "minProperties": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "required": { "$ref": "#/definitions/stringArray" }, - "additionalProperties": { - "anyOf": [ - { "type": "boolean" }, - { "$ref": "#" } - ], - "default": {} - }, - "definitions": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "properties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "patternProperties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "dependencies": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/stringArray" } - ] - } - }, - "enum": { - "type": "array", - "minItems": 1, - "uniqueItems": true - }, - "type": { - "anyOf": [ - { "$ref": "#/definitions/simpleTypes" }, - { - "type": "array", - "items": { "$ref": "#/definitions/simpleTypes" }, - "minItems": 1, - "uniqueItems": true - } - ] - }, - "format": { "type": "string" }, - "allOf": { "$ref": "#/definitions/schemaArray" }, - "anyOf": { "$ref": "#/definitions/schemaArray" }, - "oneOf": { "$ref": "#/definitions/schemaArray" }, - "not": { "$ref": "#" } - }, - "dependencies": { - "exclusiveMaximum": [ "maximum" ], - "exclusiveMinimum": [ "minimum" ] - }, - "default": {} -} diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index a3f9cd3bd..e652d2c3f 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -6,12 +6,13 @@ from typing import Any import jsonschema +import jsonschema.exceptions import numpy as np import pandas as pd from altair import vegalite - +JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator # If DEBUG_MODE is True, then schema objects are converted to dict and # validated at creation time. This slows things down, particularly for # larger specs, but leads to much more useful tracebacks for the user. @@ -41,6 +42,24 @@ def debug_mode(arg): DEBUG_MODE = original +def validate_jsonschema(spec, schema, resolver=None): + # We don't use jsonschema.validate as this would validate the schema itself. + # Instead, we pass the schema directly to the validator class. This is done for + # two reasons: The schema comes from Vega-Lite and is not based on the user + # input, therefore there is no need to validate it in the first place. Furthermore, + # the "uri-reference" format checker fails for some of the references as URIs in + # "$ref" are not encoded, + # e.g. '#/definitions/ValueDefWithCondition' would be a valid $ref in a Vega-Lite schema but + # it is not a valid URI reference due to the characters such as '<'. + validator = JSONSCHEMA_VALIDATOR( + schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver + ) + error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) + if error is not None: + raise error + + def _subclasses(cls): """Breadth-first sequence of all classes which inherit from cls.""" seen = set() @@ -156,7 +175,7 @@ class SchemaBase(object): _schema = None _rootschema = None _class_is_valid_at_instantiation = True - _validator = jsonschema.Draft7Validator + _validator = JSONSCHEMA_VALIDATOR def __init__(self, *args, **kwds): # Two valid options for initialization, which should be handled by @@ -446,9 +465,7 @@ def validate(cls, instance, schema=None): if schema is None: schema = cls._schema resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate( - instance, schema, cls=cls._validator, resolver=resolver - ) + return validate_jsonschema(instance, schema, resolver=resolver) @classmethod def resolve_references(cls, schema=None): @@ -467,7 +484,7 @@ def validate_property(cls, name, value, schema=None): value = _todict(value, validate=False, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate(value, props.get(name, {}), resolver=resolver) + return validate_jsonschema(value, props.get(name, {}), resolver=resolver) def __dir__(self): return list(self._kwds.keys()) @@ -561,7 +578,7 @@ def from_dict( for possible_schema in schemas: resolver = jsonschema.RefResolver.from_schema(rootschema) try: - jsonschema.validate(dct, possible_schema, resolver=resolver) + validate_jsonschema(dct, possible_schema, resolver=resolver) except jsonschema.ValidationError: continue else: diff --git a/tools/schemapi/utils.py b/tools/schemapi/utils.py index 38875ab0a..612617739 100644 --- a/tools/schemapi/utils.py +++ b/tools/schemapi/utils.py @@ -1,8 +1,6 @@ """Utilities for working with schemas""" -import json import keyword -import pkgutil import re import textwrap import urllib @@ -13,12 +11,6 @@ EXCLUDE_KEYS = ("definitions", "title", "description", "$schema", "id") -def load_metaschema(): - schema = pkgutil.get_data("schemapi", "jsonschema-draft04.json") - schema = schema.decode() - return json.loads(schema) - - def resolve_references(schema, root=None): """Resolve References within a JSON schema""" resolver = jsonschema.RefResolver.from_schema(root or schema) @@ -144,7 +136,7 @@ def values(self): class SchemaInfo(object): """A wrapper for inspecting a JSON schema""" - def __init__(self, schema, rootschema=None, validate=False): + def __init__(self, schema, rootschema=None): if hasattr(schema, "_schema"): if hasattr(schema, "_rootschema"): schema, rootschema = schema._schema, schema._rootschema @@ -152,10 +144,6 @@ def __init__(self, schema, rootschema=None, validate=False): schema, rootschema = schema._schema, schema._schema elif not rootschema: rootschema = schema - if validate: - metaschema = load_metaschema() - jsonschema.validate(schema, metaschema) - jsonschema.validate(rootschema, metaschema) self.raw_schema = schema self.rootschema = rootschema self.schema = resolve_references(schema, rootschema)