From 7b4274ccf150827388c3d372abea0b85289e7603 Mon Sep 17 00:00:00 2001 From: goanpeca Date: Thu, 24 Sep 2020 15:07:54 -0500 Subject: [PATCH] Split classes and add reading the current validator from environment variable --- .travis.yml | 2 - appveyor.yml | 2 - nbformat/__init__.py | 4 +- nbformat/json_compat.py | 71 +++++--- nbformat/tests/base.py | 9 +- nbformat/tests/test_validator.py | 297 ++++++++++++++++++------------- nbformat/validator.py | 21 ++- 7 files changed, 242 insertions(+), 164 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5a548e7d..23df02dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,8 +16,6 @@ install: - pip freeze script: - py.test -v --cov nbformat nbformat - - pip uninstall fastjsonschema --yes - - py.test -v --cov nbformat nbformat after_success: - codecov matrix: diff --git a/appveyor.yml b/appveyor.yml index ddea683c..2edcdd36 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -23,8 +23,6 @@ build: off # to run your custom scripts instead of automatic tests test_script: - 'py.test -v --cov nbformat nbformat' - - 'pip uninstall fastjsonschema --yes' - - 'py.test -v --cov nbformat nbformat' on_success: - codecov diff --git a/nbformat/__init__.py b/nbformat/__init__.py index 24cf75e3..fa05fc27 100644 --- a/nbformat/__init__.py +++ b/nbformat/__init__.py @@ -74,7 +74,7 @@ def reads(s, as_version, **kwargs): if as_version is not NO_CONVERT: nb = convert(nb, as_version) try: - validate(nb, use_fast=True) + validate(nb) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return nb @@ -104,7 +104,7 @@ def writes(nb, version=NO_CONVERT, **kwargs): else: version, _ = reader.get_version(nb) try: - validate(nb, use_fast=True) + validate(nb) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return versions[version].writes_json(nb, **kwargs) diff --git a/nbformat/json_compat.py b/nbformat/json_compat.py index 788b5723..6f458c2b 100644 --- a/nbformat/json_compat.py +++ b/nbformat/json_compat.py @@ -5,6 +5,9 @@ libraries. """ +import os + +import jsonschema from jsonschema import Draft4Validator as _JsonSchemaValidator from jsonschema import ValidationError @@ -16,32 +19,56 @@ _JsonSchemaException = ValidationError -class Validator: - """ - Common validator wrapper to provide a uniform usage of other schema validation - libraries. - """ +class JsonSchemaValidator: + name = "jsonschema" def __init__(self, schema): self._schema = schema - - # Validation libraries - self._jsonschema = _JsonSchemaValidator(schema) # Default - self._fastjsonschema_validate = fastjsonschema.compile(schema) if fastjsonschema else None + self._default_validator = _JsonSchemaValidator(schema) # Default + self._validator = self._default_validator def validate(self, data): - """ - Validate the schema of ``data``. - - Will use ``fastjsonschema`` if available. - """ - if fastjsonschema: - try: - self._fastjsonschema_validate(data) - except _JsonSchemaException as e: - raise ValidationError(e.message) - else: - self._jsonschema.validate(data) + self._default_validator.validate(data) def iter_errors(self, data, schema=None): - return self._jsonschema.iter_errors(data, schema) + return self._default_validator.iter_errors(data, schema) + + +class FastJsonSchemaValidator(JsonSchemaValidator): + name = "fastjsonschema" + + def __init__(self, schema): + super().__init__(schema) + + self._validator = fastjsonschema.compile(schema) + + def validate(self, data): + try: + self._validator(data) + except _JsonSchemaException as error: + raise ValidationError(error.message, schema_path=error.path) + + +_VALIDATOR_MAP = [ + ("fastjsonschema", fastjsonschema, FastJsonSchemaValidator), + ("jsonschema", jsonschema, JsonSchemaValidator), +] +VALIDATORS = [item[0] for item in _VALIDATOR_MAP] + + +def _validator_for_name(validator_name): + if validator_name not in VALIDATORS: + raise ValueError("Invalid validator '{0}' value!\nValid values are: {1}".format( + validator_name, VALIDATORS)) + + for (name, module, validator_cls) in _VALIDATOR_MAP: + if module and validator_name == name: + return validator_cls + + +def get_current_validator(): + """ + Return the current validator based on the value of an environment variable. + """ + validator_name = os.environ.get("NBFORMAT_VALIDATOR", "jsonschema") + return _validator_for_name(validator_name) diff --git a/nbformat/tests/base.py b/nbformat/tests/base.py index 303d2f73..312c22bb 100644 --- a/nbformat/tests/base.py +++ b/nbformat/tests/base.py @@ -12,9 +12,10 @@ class TestsBase(unittest.TestCase): """Base tests class.""" - def fopen(self, f, mode=u'r',encoding='utf-8'): - return io.open(os.path.join(self._get_files_path(), f), mode, encoding=encoding) + @classmethod + def fopen(cls, f, mode=u'r',encoding='utf-8'): + return io.open(os.path.join(cls._get_files_path(), f), mode, encoding=encoding) - - def _get_files_path(self): + @classmethod + def _get_files_path(cls): return os.path.dirname(__file__) diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index 67750d3c..7723dc91 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -4,147 +4,196 @@ # Distributed under the terms of the Modified BSD License. import os -import time +import re from .base import TestsBase from jsonschema import ValidationError from nbformat import read from ..validator import isvalid, validate, iter_validate +from ..json_compat import VALIDATORS + +import pytest + + +# Fixtures +@pytest.fixture(autouse=True) +def clean_env_before_and_after_tests(): + """Fixture to clean up env variables before and after tests.""" + os.environ.pop("NBFORMAT_VALIDATOR", None) + yield + os.environ.pop("NBFORMAT_VALIDATOR", None) + + +# Helpers +def set_validator(validator_name): + os.environ["NBFORMAT_VALIDATOR"] = validator_name + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb2(validator_name): + """Test that a v2 notebook converted to current passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test2.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb3(validator_name): + """Test that a v3 notebook passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test3.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4(validator_name): + """Test that a v4 notebook passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4_document_info(validator_name): + """Test that a notebook with document_info passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4docinfo.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4custom(validator_name): + """Test that a notebook with a custom JSON mimetype passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4custom.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4jupyter_metadata(validator_name): + """Test that a notebook with a jupyter metadata passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4jupyter_metadata.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4jupyter_metadata_timings(validator_name): + """Tests that a notebook with "timing" in metadata passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_invalid(validator_name): + """Test than an invalid notebook does not pass validation""" + set_validator(validator_name) + # this notebook has a few different errors: + # - one cell is missing its source + # - invalid cell type + # - invalid output_type + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError): + validate(nb) + assert not isvalid(nb) -try: - import fastjsonschema -except ImportError: - fastjsonschema = None +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_validate_empty(validator_name): + """Test that an empty notebook (invalid) fails validation""" + set_validator(validator_name) + with pytest.raises(ValidationError) as e: + validate({}) -class TestValidator(TestsBase): - def test_nb2(self): - """Test that a v2 notebook converted to current passes validation""" - with self.fopen(u'test2.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_future(validator_name): + """Test that a notebook from the future with extra keys passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4plus.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError): + validate(nb, version=4, version_minor=3) - def test_nb3(self): - """Test that a v3 notebook passes validation""" - with self.fopen(u'test3.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) + assert not isvalid(nb, version=4, version_minor=3) + assert isvalid(nb) - def test_nb4(self): - """Test that a v4 notebook passes validation""" - with self.fopen(u'test4.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4_document_info(self): - """Test that a notebook with document_info passes validation""" - with self.fopen(u'test4docinfo.ipynb', u'r') as f: - nb = read(f, as_version=4) +# This is only a valid test for the default validator, jsonschema +@pytest.mark.parametrize("validator_name", ["jsonschema"]) +def test_validation_error(validator_name): + set_validator(validator_name) + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError) as exception_info: validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4custom(self): - """Test that a notebook with a custom JSON mimetype passes validation""" - with self.fopen(u'test4custom.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) + s = str(exception_info.value) + assert re.compile(r"validating .required. in markdown_cell").search(s) + assert re.compile(r"source.* is a required property").search(s) + assert re.compile(r"On instance\[u?['\"].*cells['\"]\]\[0\]").search(s) + assert len(s.splitlines()) < 10 - def test_nb4jupyter_metadata(self): - """Test that a notebook with a jupyter metadata passes validation""" - with self.fopen(u'test4jupyter_metadata.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4jupyter_metadata_timings(self): - """Tests that a notebook with "timing" in metadata passes validation""" - with self.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertTrue(isvalid(nb)) - - def test_invalid(self): - """Test than an invalid notebook does not pass validation""" - # this notebook has a few different errors: - # - one cell is missing its source - # - invalid cell type - # - invalid output_type - with self.fopen(u'invalid.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError): - validate(nb) - self.assertEqual(isvalid(nb), False) - - def test_validate_empty(self): - """Test that an empty notebook (invalid) fails validation""" - with self.assertRaises(ValidationError) as e: - validate({}) - - def test_future(self): - """Test that a notebook from the future with extra keys passes validation""" - with self.fopen(u'test4plus.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError): - validate(nb, version=4, version_minor=3) +# This is only a valid test for the default validator, jsonschema +@pytest.mark.parametrize("validator_name", ["jsonschema"]) +def test_iter_validation_error(validator_name): + set_validator(validator_name) + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) - self.assertEqual(isvalid(nb, version=4, version_minor=3), False) - self.assertEqual(isvalid(nb), True) + errors = list(iter_validate(nb)) + assert len(errors) == 3 + assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'} - def test_validation_error(self): - with self.fopen(u'invalid.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError) as e: - validate(nb) - s = str(e.exception) - self.assertRegex(s, "validating.*required.* in markdown_cell") - self.assertRegex(s, "source.* is a required property") - self.assertRegex(s, r"On instance\[u?['\"].*cells['\"]\]\[0\]") - self.assertLess(len(s.splitlines()), 10) - - def test_iter_validation_error(self): - with self.fopen(u'invalid.ipynb', u'r') as f: + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_iter_validation_empty(validator_name): + """Test that an empty notebook (invalid) fails validation via iter_validate""" + set_validator(validator_name) + errors = list(iter_validate({})) + assert len(errors) == 1 + assert type(errors[0]) == ValidationError + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_validation_no_version(validator_name): + """Test that an invalid notebook with no version fails validation""" + set_validator(validator_name) + with pytest.raises(ValidationError) as e: + validate({'invalid': 'notebook'}) + + +def test_invalid_validator_raises_value_error(): + """Test that an invalid notebook with no version fails validation""" + set_validator("foobar") + with pytest.raises(ValueError): + with TestsBase.fopen(u'test2.ipynb', u'r') as f: nb = read(f, as_version=4) - errors = list(iter_validate(nb)) - assert len(errors) == 3 - assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'} - - def test_iter_validation_empty(self): - """Test that an empty notebook (invalid) fails validation via iter_validate""" - errors = list(iter_validate({})) - assert len(errors) == 1 - assert type(errors[0]) == ValidationError - - def test_validation_no_version(self): - """Test that an invalid notebook with no version fails validation""" - with self.assertRaises(ValidationError) as e: - validate({'invalid': 'notebook'}) - - def test_fast_validation(self): - """ - Test that valid notebook with too many outputs is parsed ~12 times - faster with fastjsonschema. - """ - if fastjsonschema: - with self.fopen(u'many_tracebacks.ipynb', u'r') as f: - nb = read(f, as_version=4) - - # Multiply output - base_output = nb["cells"][0]["outputs"][0] - for i in range(50000): - nb["cells"][0]["outputs"].append(base_output) - - start_time = time.time() - validate(nb, use_fast=True) - fast_time = time.time() - start_time - - start_time = time.time() - validate(nb) - slow_time = time.time() - start_time - - self.assertGreater(slow_time / fast_time, 12) + +def test_invalid_validator_raises_value_error_after_read(): + """Test that an invalid notebook with no version fails validation""" + set_validator("jsonschema") + with TestsBase.fopen(u'test2.ipynb', u'r') as f: + nb = read(f, as_version=4) + + set_validator("foobar") + with pytest.raises(ValueError): + validate(nb) diff --git a/nbformat/validator.py b/nbformat/validator.py index ecf8b735..baf37202 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -9,10 +9,9 @@ import warnings from ipython_genutils.importstring import import_item -from .json_compat import Validator, ValidationError +from .json_compat import get_current_validator, ValidationError from .reader import get_version, reads - validators = {} def _relax_additional_properties(obj): @@ -49,7 +48,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): if version_minor is None: version_minor = current_minor - version_tuple = (version, version_minor) + current_validator = get_current_validator() + version_tuple = (current_validator.name, version, version_minor) if version_tuple not in validators: try: @@ -63,7 +63,7 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): # and allow undefined cell types and outputs schema_json = _allow_undefined(schema_json) - validators[version_tuple] = Validator(schema_json) + validators[version_tuple] = current_validator(schema_json) if relax_add_props: try: @@ -74,7 +74,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): # this allows properties to be added for intermediate # representations while validating for all other kinds of errors schema_json = _relax_additional_properties(schema_json) - validators[version_tuple] = Validator(schema_json) + validators[version_tuple] = current_validator(schema_json) + return validators[version_tuple] @@ -260,12 +261,16 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, validator = get_validator(version, version_minor, relax_add_props=relax_add_props) if validator is None: raise ValidationError("No schema for validating v%s notebooks" % version) - elif use_fast: + elif validator.name != "jsonschema": + # If not using default validator then, skip iter_validate, and provide + # less legible errors validator.validate(nbdict) else: + # If using default validator then use iter_validate, and provide + # more readable errors for error in iter_validate(nbdict, ref=ref, version=version, - version_minor=version_minor, - relax_add_props=relax_add_props): + version_minor=version_minor, + relax_add_props=relax_add_props): raise error