From 6d214f9b767638e55710f2990fee92ebec69cb5a Mon Sep 17 00:00:00 2001 From: Jan Tilly Date: Fri, 20 Jan 2023 21:17:23 +0100 Subject: [PATCH] Backport bug fixes for a 4.2.1 release (#2827) * DOC: remove unused section * Disable uri-reference format check in jsonsschema (#2771) * Disable uri-reference format check. Consistently use same validator across codebase * Remove validation in SchemaInfo as not used anywhere and it referenced the wrong jsonschema draft * Add compatibility for older jsonschema versions * Improve comments * Simplify validate_jsonschema * Replace `iteritems` with `items` due to pandas deprecation (#2683) * Add changelog entry. * Bump version. * Run black and flake8. * Pin selenium in CI. Co-authored-by: Jake VanderPlas Co-authored-by: Stefan Binder Co-authored-by: Joel Ostblom --- .github/workflows/build.yml | 1 + .github/workflows/docbuild.yml | 3 +- altair/__init__.py | 2 +- altair/utils/core.py | 2 +- altair/utils/display.py | 5 +- altair/utils/schemapi.py | 31 ++++- altair/utils/tests/test_plugin_registry.py | 6 +- altair/utils/tests/test_utils.py | 12 +- doc/releases/changes.rst | 9 ++ tools/schemapi/jsonschema-draft04.json | 149 --------------------- tools/schemapi/schemapi.py | 31 ++++- tools/schemapi/utils.py | 14 +- 12 files changed, 73 insertions(+), 192 deletions(-) delete mode 100644 tools/schemapi/jsonschema-draft04.json diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 40922ad2f..43666249e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,6 +24,7 @@ jobs: run: | python -m pip install --upgrade pip pip install .[dev] + pip install "selenium<4.3.0" pip install altair_saver - name: Test with pytest run: | diff --git a/.github/workflows/docbuild.yml b/.github/workflows/docbuild.yml index 1c99835fd..10f25992b 100644 --- a/.github/workflows/docbuild.yml +++ b/.github/workflows/docbuild.yml @@ -19,10 +19,9 @@ jobs: run: | python -m pip install --upgrade pip pip install .[dev] + pip install "selenium<4.3.0" pip install altair_saver pip install -r doc/requirements.txt - name: Run docbuild run: | cd doc && make ${{ matrix.build-type }} - - diff --git a/altair/__init__.py b/altair/__init__.py index 7079e8ccc..bcbc87492 100644 --- a/altair/__init__.py +++ b/altair/__init__.py @@ -1,5 +1,5 @@ # flake8: noqa -__version__ = "4.2.0" +__version__ = "4.2.1" from .vegalite import * from . import examples diff --git a/altair/utils/core.py b/altair/utils/core.py index 43bc1b4d5..588e8c608 100644 --- a/altair/utils/core.py +++ b/altair/utils/core.py @@ -314,7 +314,7 @@ def to_list_if_array(val): else: return val - for col_name, dtype in df.dtypes.iteritems(): + for col_name, dtype in df.dtypes.items(): if str(dtype) == "category": # XXXX: work around bug in to_json for categorical types # https://github.com/pydata/pandas/issues/10778 diff --git a/altair/utils/display.py b/altair/utils/display.py index 92d7c7835..bcf8232b3 100644 --- a/altair/utils/display.py +++ b/altair/utils/display.py @@ -4,10 +4,9 @@ from typing import Callable, Dict import uuid -from jsonschema import validate - from .plugin_registry import PluginRegistry from .mimebundle import spec_to_mimebundle +from .schemapi import validate_jsonschema # ============================================================================== @@ -126,7 +125,7 @@ def _validate(self): # type: () -> None """Validate the spec against the schema.""" schema_dict = json.loads(pkgutil.get_data(*self.schema_path).decode("utf-8")) - validate(self.spec, schema_dict) + validate_jsonschema(self.spec, schema_dict) def _repr_mimebundle_(self, include=None, exclude=None): """Return a MIME bundle for display in Jupyter frontends.""" diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index 2dfdc8ee1..f170acd3f 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -6,10 +6,11 @@ import json import jsonschema +import jsonschema.exceptions import numpy as np import pandas as pd - +JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator # If DEBUG_MODE is True, then schema objects are converted to dict and # validated at creation time. This slows things down, particularly for # larger specs, but leads to much more useful tracebacks for the user. @@ -39,6 +40,24 @@ def debug_mode(arg): DEBUG_MODE = original +def validate_jsonschema(spec, schema, resolver=None): + # We don't use jsonschema.validate as this would validate the schema itself. + # Instead, we pass the schema directly to the validator class. This is done for + # two reasons: The schema comes from Vega-Lite and is not based on the user + # input, therefore there is no need to validate it in the first place. Furthermore, + # the "uri-reference" format checker fails for some of the references as URIs in + # "$ref" are not encoded, + # e.g. '#/definitions/ValueDefWithCondition' would be a valid $ref in a Vega-Lite schema but + # it is not a valid URI reference due to the characters such as '<'. + validator = JSONSCHEMA_VALIDATOR( + schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver + ) + error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) + if error is not None: + raise error + + def _subclasses(cls): """Breadth-first sequence of all classes which inherit from cls.""" seen = set() @@ -150,7 +169,7 @@ class SchemaBase(object): _schema = None _rootschema = None _class_is_valid_at_instantiation = True - _validator = jsonschema.Draft7Validator + _validator = JSONSCHEMA_VALIDATOR def __init__(self, *args, **kwds): # Two valid options for initialization, which should be handled by @@ -440,9 +459,7 @@ def validate(cls, instance, schema=None): if schema is None: schema = cls._schema resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate( - instance, schema, cls=cls._validator, resolver=resolver - ) + return validate_jsonschema(instance, schema, resolver=resolver) @classmethod def resolve_references(cls, schema=None): @@ -461,7 +478,7 @@ def validate_property(cls, name, value, schema=None): value = _todict(value, validate=False, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate(value, props.get(name, {}), resolver=resolver) + return validate_jsonschema(value, props.get(name, {}), resolver=resolver) def __dir__(self): return list(self._kwds.keys()) @@ -555,7 +572,7 @@ def from_dict( for possible_schema in schemas: resolver = jsonschema.RefResolver.from_schema(rootschema) try: - jsonschema.validate(dct, possible_schema, resolver=resolver) + validate_jsonschema(dct, possible_schema, resolver=resolver) except jsonschema.ValidationError: continue else: diff --git a/altair/utils/tests/test_plugin_registry.py b/altair/utils/tests/test_plugin_registry.py index cbfb62ac2..38f9cc053 100644 --- a/altair/utils/tests/test_plugin_registry.py +++ b/altair/utils/tests/test_plugin_registry.py @@ -26,7 +26,7 @@ def test_plugin_registry(): assert plugins.get() is None assert repr(plugins) == "TypedCallableRegistry(active='', registered=[])" - plugins.register("new_plugin", lambda x: x ** 2) + plugins.register("new_plugin", lambda x: x**2) assert plugins.names() == ["new_plugin"] assert plugins.active == "" assert plugins.get() is None @@ -46,7 +46,7 @@ def test_plugin_registry(): def test_plugin_registry_extra_options(): plugins = GeneralCallableRegistry() - plugins.register("metadata_plugin", lambda x, p=2: x ** p) + plugins.register("metadata_plugin", lambda x, p=2: x**p) plugins.enable("metadata_plugin") assert plugins.get()(3) == 9 @@ -86,7 +86,7 @@ def test_plugin_registry_global_settings(): def test_plugin_registry_context(): plugins = GeneralCallableRegistry() - plugins.register("default", lambda x, p=2: x ** p) + plugins.register("default", lambda x, p=2: x**p) # At first there is no plugin enabled assert plugins.active == "" diff --git a/altair/utils/tests/test_utils.py b/altair/utils/tests/test_utils.py index 1ccede414..54a127216 100644 --- a/altair/utils/tests/test_utils.py +++ b/altair/utils/tests/test_utils.py @@ -129,9 +129,9 @@ def test_sanitize_nullable_integers(): ) df_clean = sanitize_dataframe(df) - assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"} + assert {col.dtype.name for _, col in df_clean.items()} == {"object"} - result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()} + result_python = {col_name: list(col) for col_name, col in df_clean.items()} assert result_python == { "int_np": [1, 2, 3, 4, 5], "int64": [1, 2, 3, None, 5], @@ -157,9 +157,9 @@ def test_sanitize_string_dtype(): ) df_clean = sanitize_dataframe(df) - assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"} + assert {col.dtype.name for _, col in df_clean.items()} == {"object"} - result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()} + result_python = {col_name: list(col) for col_name, col in df_clean.items()} assert result_python == { "string_object": ["a", "b", "c", "d"], "string_string": ["a", "b", "c", "d"], @@ -182,9 +182,9 @@ def test_sanitize_boolean_dtype(): ) df_clean = sanitize_dataframe(df) - assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"} + assert {col.dtype.name for _, col in df_clean.items()} == {"object"} - result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()} + result_python = {col_name: list(col) for col_name, col in df_clean.items()} assert result_python == { "bool_none": [True, False, None], "none": [None, None, None], diff --git a/doc/releases/changes.rst b/doc/releases/changes.rst index c69f85acb..4b58cdaff 100644 --- a/doc/releases/changes.rst +++ b/doc/releases/changes.rst @@ -3,6 +3,15 @@ Altair Change Log ================= +Version 4.2.1 (released XXX XX, 2023) +------------------------------------- + +Bug Fixes +~~~~~~~~~ +- Disable uri-reference format check in jsonsschema (#2771) +- Replace ``iteritems`` with ```items``` due to pandas deprecation (#2683) + + Version 4.2.0 (released Dec 29, 2021) ------------------------------------- - Update Vega-Lite from version 4.8.1 to version 4.17.0; diff --git a/tools/schemapi/jsonschema-draft04.json b/tools/schemapi/jsonschema-draft04.json deleted file mode 100644 index bcbb84743..000000000 --- a/tools/schemapi/jsonschema-draft04.json +++ /dev/null @@ -1,149 +0,0 @@ -{ - "id": "http://json-schema.org/draft-04/schema#", - "$schema": "http://json-schema.org/draft-04/schema#", - "description": "Core schema meta-schema", - "definitions": { - "schemaArray": { - "type": "array", - "minItems": 1, - "items": { "$ref": "#" } - }, - "positiveInteger": { - "type": "integer", - "minimum": 0 - }, - "positiveIntegerDefault0": { - "allOf": [ { "$ref": "#/definitions/positiveInteger" }, { "default": 0 } ] - }, - "simpleTypes": { - "enum": [ "array", "boolean", "integer", "null", "number", "object", "string" ] - }, - "stringArray": { - "type": "array", - "items": { "type": "string" }, - "minItems": 1, - "uniqueItems": true - } - }, - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "$schema": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "default": {}, - "multipleOf": { - "type": "number", - "minimum": 0, - "exclusiveMinimum": true - }, - "maximum": { - "type": "number" - }, - "exclusiveMaximum": { - "type": "boolean", - "default": false - }, - "minimum": { - "type": "number" - }, - "exclusiveMinimum": { - "type": "boolean", - "default": false - }, - "maxLength": { "$ref": "#/definitions/positiveInteger" }, - "minLength": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "pattern": { - "type": "string", - "format": "regex" - }, - "additionalItems": { - "anyOf": [ - { "type": "boolean" }, - { "$ref": "#" } - ], - "default": {} - }, - "items": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/schemaArray" } - ], - "default": {} - }, - "maxItems": { "$ref": "#/definitions/positiveInteger" }, - "minItems": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "uniqueItems": { - "type": "boolean", - "default": false - }, - "maxProperties": { "$ref": "#/definitions/positiveInteger" }, - "minProperties": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "required": { "$ref": "#/definitions/stringArray" }, - "additionalProperties": { - "anyOf": [ - { "type": "boolean" }, - { "$ref": "#" } - ], - "default": {} - }, - "definitions": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "properties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "patternProperties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "dependencies": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/stringArray" } - ] - } - }, - "enum": { - "type": "array", - "minItems": 1, - "uniqueItems": true - }, - "type": { - "anyOf": [ - { "$ref": "#/definitions/simpleTypes" }, - { - "type": "array", - "items": { "$ref": "#/definitions/simpleTypes" }, - "minItems": 1, - "uniqueItems": true - } - ] - }, - "format": { "type": "string" }, - "allOf": { "$ref": "#/definitions/schemaArray" }, - "anyOf": { "$ref": "#/definitions/schemaArray" }, - "oneOf": { "$ref": "#/definitions/schemaArray" }, - "not": { "$ref": "#" } - }, - "dependencies": { - "exclusiveMaximum": [ "maximum" ], - "exclusiveMinimum": [ "minimum" ] - }, - "default": {} -} diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index 3ca87b991..3844483a7 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -4,10 +4,11 @@ import json import jsonschema +import jsonschema.exceptions import numpy as np import pandas as pd - +JSONSCHEMA_VALIDATOR = jsonschema.Draft7Validator # If DEBUG_MODE is True, then schema objects are converted to dict and # validated at creation time. This slows things down, particularly for # larger specs, but leads to much more useful tracebacks for the user. @@ -37,6 +38,24 @@ def debug_mode(arg): DEBUG_MODE = original +def validate_jsonschema(spec, schema, resolver=None): + # We don't use jsonschema.validate as this would validate the schema itself. + # Instead, we pass the schema directly to the validator class. This is done for + # two reasons: The schema comes from Vega-Lite and is not based on the user + # input, therefore there is no need to validate it in the first place. Furthermore, + # the "uri-reference" format checker fails for some of the references as URIs in + # "$ref" are not encoded, + # e.g. '#/definitions/ValueDefWithCondition' would be a valid $ref in a Vega-Lite schema but + # it is not a valid URI reference due to the characters such as '<'. + validator = JSONSCHEMA_VALIDATOR( + schema, format_checker=JSONSCHEMA_VALIDATOR.FORMAT_CHECKER, resolver=resolver + ) + error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) + if error is not None: + raise error + + def _subclasses(cls): """Breadth-first sequence of all classes which inherit from cls.""" seen = set() @@ -148,7 +167,7 @@ class SchemaBase(object): _schema = None _rootschema = None _class_is_valid_at_instantiation = True - _validator = jsonschema.Draft7Validator + _validator = JSONSCHEMA_VALIDATOR def __init__(self, *args, **kwds): # Two valid options for initialization, which should be handled by @@ -438,9 +457,7 @@ def validate(cls, instance, schema=None): if schema is None: schema = cls._schema resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate( - instance, schema, cls=cls._validator, resolver=resolver - ) + return validate_jsonschema(instance, schema, resolver=resolver) @classmethod def resolve_references(cls, schema=None): @@ -459,7 +476,7 @@ def validate_property(cls, name, value, schema=None): value = _todict(value, validate=False, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) resolver = jsonschema.RefResolver.from_schema(cls._rootschema or cls._schema) - return jsonschema.validate(value, props.get(name, {}), resolver=resolver) + return validate_jsonschema(value, props.get(name, {}), resolver=resolver) def __dir__(self): return list(self._kwds.keys()) @@ -553,7 +570,7 @@ def from_dict( for possible_schema in schemas: resolver = jsonschema.RefResolver.from_schema(rootschema) try: - jsonschema.validate(dct, possible_schema, resolver=resolver) + validate_jsonschema(dct, possible_schema, resolver=resolver) except jsonschema.ValidationError: continue else: diff --git a/tools/schemapi/utils.py b/tools/schemapi/utils.py index d0b149f3f..5a73c96cd 100644 --- a/tools/schemapi/utils.py +++ b/tools/schemapi/utils.py @@ -1,8 +1,6 @@ """Utilities for working with schemas""" -import json import keyword -import pkgutil import re import textwrap import urllib @@ -13,12 +11,6 @@ EXCLUDE_KEYS = ("definitions", "title", "description", "$schema", "id") -def load_metaschema(): - schema = pkgutil.get_data("schemapi", "jsonschema-draft04.json") - schema = schema.decode() - return json.loads(schema) - - def resolve_references(schema, root=None): """Resolve References within a JSON schema""" resolver = jsonschema.RefResolver.from_schema(root or schema) @@ -144,7 +136,7 @@ def values(self): class SchemaInfo(object): """A wrapper for inspecting a JSON schema""" - def __init__(self, schema, rootschema=None, validate=False): + def __init__(self, schema, rootschema=None): if hasattr(schema, "_schema"): if hasattr(schema, "_rootschema"): schema, rootschema = schema._schema, schema._rootschema @@ -152,10 +144,6 @@ def __init__(self, schema, rootschema=None, validate=False): schema, rootschema = schema._schema, schema._schema elif not rootschema: rootschema = schema - if validate: - metaschema = load_metaschema() - jsonschema.validate(schema, metaschema) - jsonschema.validate(rootschema, metaschema) self.raw_schema = schema self.rootschema = rootschema self.schema = resolve_references(schema, rootschema)