diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f8377d92..0105c34f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -22,7 +22,7 @@ jobs:
- name: Install test dependencies
run: |
pip install --upgrade pip setuptools
- pip install nbformat[test]
+ pip install .[test]
pip install codecov
- name: Install nbformat
run: |
diff --git a/README.md b/README.md
index 0322a5e3..f1ddadd7 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,7 @@
[![codecov.io](https://codecov.io/github/jupyter/nbformat/coverage.svg?branch=master)](https://codecov.io/github/jupyter/nbformat?branch=master)
[![Code Health](https://landscape.io/github/jupyter/nbformat/master/landscape.svg?style=flat)](https://landscape.io/github/jupyter/nbformat/master)
-
-
+![CI Tests](https://github.com/jupyter/nbformat/workflows/Run%20tests/badge.svg)
`nbformat` contains the reference implementation of the [Jupyter Notebook format][],
and Python APIs for working with notebooks.
@@ -20,6 +19,16 @@ From the command line:
pip install nbformat
```
+## Using a different json schema validator
+
+You can install and use [fastjsonschema](https://horejsek.github.io/python-fastjsonschema/) by running:
+
+``` {.sourceCode .bash}
+pip install nbformat[fast]
+```
+
+To enable fast validation with `fastjsonschema`, set the environment variable `NBFORMAT_VALIDATOR` to the value `fastjsonschema`.
+
## Python Version Support
This library supported python 2.7 and python 3.5+ for `4.x.x` releases. With python 2's end-of-life nbformat `5.x.x` is now python 3.5+ only. Support for 3.5 will be dropped when it's officially sunset by the python organization.
diff --git a/docs/api.rst b/docs/api.rst
index 2e5493ec..dddde60b 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -120,3 +120,15 @@ bindings are available, and an in-memory store otherwise.
.. autoclass:: SQLiteSignatureStore
.. autoclass:: MemorySignatureStore
+
+Optional fast validation
+------------------------
+
+You can install and use `fastjsonschema `_ by running::
+
+ pip install nbformat[fast]
+
+
+To enable fast validation with `fastjsonschema`, set the environment variable::
+
+ NBFORMAT_VALIDATOR="fastjsonschema"
diff --git a/docs/changelog.rst b/docs/changelog.rst
index b6b3b89e..8367576f 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,6 +7,10 @@ Changes in nbformat
In Development
==============
+- Add optional support for using `fastjsonschema` as the JSON validation library.
+ To enable fast validation, install `fastjsonschema` and set the environment
+ variable `NBFORMAT_VALIDATOR` to the value `fastjsonschema`.
+
5.0.7
=====
diff --git a/nbformat/json_compat.py b/nbformat/json_compat.py
new file mode 100644
index 00000000..936d3c87
--- /dev/null
+++ b/nbformat/json_compat.py
@@ -0,0 +1,82 @@
+# Copyright (c) Jupyter Development Team.
+# Distributed under the terms of the Modified BSD License.
+"""
+Common validator wrapper to provide a uniform usage of other schema validation
+libraries.
+"""
+
+import os
+
+import jsonschema
+from jsonschema import Draft4Validator as _JsonSchemaValidator
+from jsonschema import ValidationError
+
+try:
+ import fastjsonschema
+ from fastjsonschema import JsonSchemaException as _JsonSchemaException
+except ImportError:
+ fastjsonschema = None
+ _JsonSchemaException = ValidationError
+
+
+class JsonSchemaValidator:
+ name = "jsonschema"
+
+ def __init__(self, schema):
+ self._schema = schema
+ self._default_validator = _JsonSchemaValidator(schema) # Default
+ self._validator = self._default_validator
+
+ def validate(self, data):
+ self._default_validator.validate(data)
+
+ def iter_errors(self, data, schema=None):
+ return self._default_validator.iter_errors(data, schema)
+
+
+class FastJsonSchemaValidator(JsonSchemaValidator):
+ name = "fastjsonschema"
+
+ def __init__(self, schema):
+ self._validator = fastjsonschema.compile(schema)
+
+ def validate(self, data):
+ try:
+ self._validator(data)
+ except _JsonSchemaException as error:
+ raise ValidationError(error.message, schema_path=error.path)
+
+ def iter_errors(self, data, schema=None):
+ errors = []
+ validate_func = self._validator if schema is None else fastjsonschema.compile(schema)
+ try:
+ validate_func(data)
+ except _JsonSchemaException as error:
+ errors = [ValidationError(error.message, schema_path=error.path)]
+
+ return errors
+
+
+_VALIDATOR_MAP = [
+ ("fastjsonschema", fastjsonschema, FastJsonSchemaValidator),
+ ("jsonschema", jsonschema, JsonSchemaValidator),
+]
+VALIDATORS = [item[0] for item in _VALIDATOR_MAP]
+
+
+def _validator_for_name(validator_name):
+ if validator_name not in VALIDATORS:
+ raise ValueError("Invalid validator '{0}' value!\nValid values are: {1}".format(
+ validator_name, VALIDATORS))
+
+ for (name, module, validator_cls) in _VALIDATOR_MAP:
+ if module and validator_name == name:
+ return validator_cls
+
+
+def get_current_validator():
+ """
+ Return the default validator based on the value of an environment variable.
+ """
+ validator_name = os.environ.get("NBFORMAT_VALIDATOR", "jsonschema")
+ return _validator_for_name(validator_name)
diff --git a/nbformat/tests/base.py b/nbformat/tests/base.py
index 303d2f73..312c22bb 100644
--- a/nbformat/tests/base.py
+++ b/nbformat/tests/base.py
@@ -12,9 +12,10 @@
class TestsBase(unittest.TestCase):
"""Base tests class."""
- def fopen(self, f, mode=u'r',encoding='utf-8'):
- return io.open(os.path.join(self._get_files_path(), f), mode, encoding=encoding)
+ @classmethod
+ def fopen(cls, f, mode=u'r',encoding='utf-8'):
+ return io.open(os.path.join(cls._get_files_path(), f), mode, encoding=encoding)
-
- def _get_files_path(self):
+ @classmethod
+ def _get_files_path(cls):
return os.path.dirname(__file__)
diff --git a/nbformat/tests/many_tracebacks.ipynb b/nbformat/tests/many_tracebacks.ipynb
new file mode 100644
index 00000000..a37eda86
--- /dev/null
+++ b/nbformat/tests/many_tracebacks.ipynb
@@ -0,0 +1,46 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'iAmNotDefined' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0miAmNotDefined\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m: name 'iAmNotDefined' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "# Imagine this cell called a function which runs things on a cluster and you have an error"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py
index 308d98a2..7723dc91 100644
--- a/nbformat/tests/test_validator.py
+++ b/nbformat/tests/test_validator.py
@@ -4,117 +4,196 @@
# Distributed under the terms of the Modified BSD License.
import os
+import re
from .base import TestsBase
from jsonschema import ValidationError
from nbformat import read
from ..validator import isvalid, validate, iter_validate
+from ..json_compat import VALIDATORS
+
+import pytest
+
+
+# Fixtures
+@pytest.fixture(autouse=True)
+def clean_env_before_and_after_tests():
+ """Fixture to clean up env variables before and after tests."""
+ os.environ.pop("NBFORMAT_VALIDATOR", None)
+ yield
+ os.environ.pop("NBFORMAT_VALIDATOR", None)
+
+
+# Helpers
+def set_validator(validator_name):
+ os.environ["NBFORMAT_VALIDATOR"] = validator_name
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_nb2(validator_name):
+ """Test that a v2 notebook converted to current passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test2.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ validate(nb)
+ assert isvalid(nb) == True
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_nb3(validator_name):
+ """Test that a v3 notebook passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test3.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ validate(nb)
+ assert isvalid(nb) == True
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_nb4(validator_name):
+ """Test that a v4 notebook passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test4.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ validate(nb)
+ assert isvalid(nb) == True
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_nb4_document_info(validator_name):
+ """Test that a notebook with document_info passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test4docinfo.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ validate(nb)
+ assert isvalid(nb)
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_nb4custom(validator_name):
+ """Test that a notebook with a custom JSON mimetype passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test4custom.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ validate(nb)
+ assert isvalid(nb)
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_nb4jupyter_metadata(validator_name):
+ """Test that a notebook with a jupyter metadata passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test4jupyter_metadata.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ validate(nb)
+ assert isvalid(nb)
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_nb4jupyter_metadata_timings(validator_name):
+ """Tests that a notebook with "timing" in metadata passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ validate(nb)
+ assert isvalid(nb)
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_invalid(validator_name):
+ """Test than an invalid notebook does not pass validation"""
+ set_validator(validator_name)
+ # this notebook has a few different errors:
+ # - one cell is missing its source
+ # - invalid cell type
+ # - invalid output_type
+ with TestsBase.fopen(u'invalid.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ with pytest.raises(ValidationError):
+ validate(nb)
+ assert not isvalid(nb)
-class TestValidator(TestsBase):
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_validate_empty(validator_name):
+ """Test that an empty notebook (invalid) fails validation"""
+ set_validator(validator_name)
+ with pytest.raises(ValidationError) as e:
+ validate({})
- def test_nb2(self):
- """Test that a v2 notebook converted to current passes validation"""
- with self.fopen(u'test2.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- validate(nb)
- self.assertEqual(isvalid(nb), True)
- def test_nb3(self):
- """Test that a v3 notebook passes validation"""
- with self.fopen(u'test3.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- validate(nb)
- self.assertEqual(isvalid(nb), True)
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_future(validator_name):
+ """Test that a notebook from the future with extra keys passes validation"""
+ set_validator(validator_name)
+ with TestsBase.fopen(u'test4plus.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ with pytest.raises(ValidationError):
+ validate(nb, version=4, version_minor=3)
- def test_nb4(self):
- """Test that a v4 notebook passes validation"""
- with self.fopen(u'test4.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- validate(nb)
- self.assertEqual(isvalid(nb), True)
+ assert not isvalid(nb, version=4, version_minor=3)
+ assert isvalid(nb)
- def test_nb4_document_info(self):
- """Test that a notebook with document_info passes validation"""
- with self.fopen(u'test4docinfo.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- validate(nb)
- self.assertEqual(isvalid(nb), True)
- def test_nb4custom(self):
- """Test that a notebook with a custom JSON mimetype passes validation"""
- with self.fopen(u'test4custom.ipynb', u'r') as f:
- nb = read(f, as_version=4)
+# This is only a valid test for the default validator, jsonschema
+@pytest.mark.parametrize("validator_name", ["jsonschema"])
+def test_validation_error(validator_name):
+ set_validator(validator_name)
+ with TestsBase.fopen(u'invalid.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
+ with pytest.raises(ValidationError) as exception_info:
validate(nb)
- self.assertEqual(isvalid(nb), True)
- def test_nb4jupyter_metadata(self):
- """Test that a notebook with a jupyter metadata passes validation"""
- with self.fopen(u'test4jupyter_metadata.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- validate(nb)
- self.assertEqual(isvalid(nb), True)
+ s = str(exception_info.value)
+ assert re.compile(r"validating .required. in markdown_cell").search(s)
+ assert re.compile(r"source.* is a required property").search(s)
+ assert re.compile(r"On instance\[u?['\"].*cells['\"]\]\[0\]").search(s)
+ assert len(s.splitlines()) < 10
- def test_nb4jupyter_metadata_timings(self):
- """Tests that a notebook with "timing" in metadata passes validation"""
- with self.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- validate(nb)
- self.assertTrue(isvalid(nb))
-
- def test_invalid(self):
- """Test than an invalid notebook does not pass validation"""
- # this notebook has a few different errors:
- # - one cell is missing its source
- # - invalid cell type
- # - invalid output_type
- with self.fopen(u'invalid.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- with self.assertRaises(ValidationError):
- validate(nb)
- self.assertEqual(isvalid(nb), False)
-
- def test_validate_empty(self):
- """Test that an empty notebook (invalid) fails validation"""
- with self.assertRaises(ValidationError) as e:
- validate({})
-
- def test_future(self):
- """Test that a notebook from the future with extra keys passes validation"""
- with self.fopen(u'test4plus.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- with self.assertRaises(ValidationError):
- validate(nb, version=4, version_minor=3)
- self.assertEqual(isvalid(nb, version=4, version_minor=3), False)
- self.assertEqual(isvalid(nb), True)
+# This is only a valid test for the default validator, jsonschema
+@pytest.mark.parametrize("validator_name", ["jsonschema"])
+def test_iter_validation_error(validator_name):
+ set_validator(validator_name)
+ with TestsBase.fopen(u'invalid.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
- def test_validation_error(self):
- with self.fopen(u'invalid.ipynb', u'r') as f:
- nb = read(f, as_version=4)
- with self.assertRaises(ValidationError) as e:
- validate(nb)
- s = str(e.exception)
- self.assertRegex(s, "validating.*required.* in markdown_cell")
- self.assertRegex(s, "source.* is a required property")
- self.assertRegex(s, r"On instance\[u?['\"].*cells['\"]\]\[0\]")
- self.assertLess(len(s.splitlines()), 10)
-
- def test_iter_validation_error(self):
- with self.fopen(u'invalid.ipynb', u'r') as f:
+ errors = list(iter_validate(nb))
+ assert len(errors) == 3
+ assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'}
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_iter_validation_empty(validator_name):
+ """Test that an empty notebook (invalid) fails validation via iter_validate"""
+ set_validator(validator_name)
+ errors = list(iter_validate({}))
+ assert len(errors) == 1
+ assert type(errors[0]) == ValidationError
+
+
+@pytest.mark.parametrize("validator_name", VALIDATORS)
+def test_validation_no_version(validator_name):
+ """Test that an invalid notebook with no version fails validation"""
+ set_validator(validator_name)
+ with pytest.raises(ValidationError) as e:
+ validate({'invalid': 'notebook'})
+
+
+def test_invalid_validator_raises_value_error():
+ """Test that an invalid notebook with no version fails validation"""
+ set_validator("foobar")
+ with pytest.raises(ValueError):
+ with TestsBase.fopen(u'test2.ipynb', u'r') as f:
nb = read(f, as_version=4)
- errors = list(iter_validate(nb))
- assert len(errors) == 3
- assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'}
- def test_iter_validation_empty(self):
- """Test that an empty notebook (invalid) fails validation via iter_validate"""
- errors = list(iter_validate({}))
- assert len(errors) == 1
- assert type(errors[0]) == ValidationError
+def test_invalid_validator_raises_value_error_after_read():
+ """Test that an invalid notebook with no version fails validation"""
+ set_validator("jsonschema")
+ with TestsBase.fopen(u'test2.ipynb', u'r') as f:
+ nb = read(f, as_version=4)
- def test_validation_no_version(self):
- """Test that an invalid notebook with no version fails validation"""
- with self.assertRaises(ValidationError) as e:
- validate({'invalid': 'notebook'})
+ set_validator("foobar")
+ with pytest.raises(ValueError):
+ validate(nb)
diff --git a/nbformat/validator.py b/nbformat/validator.py
index 1be77072..bd44729c 100644
--- a/nbformat/validator.py
+++ b/nbformat/validator.py
@@ -8,23 +8,10 @@
import sys
import warnings
-try:
- from jsonschema import ValidationError
- from jsonschema import Draft4Validator as Validator
-except ImportError as e:
- verbose_msg = """
- Jupyter notebook format depends on the jsonschema package:
-
- https://pypi.python.org/pypi/jsonschema
-
- Please install it first.
- """
- raise ImportError(verbose_msg) from e
-
from ipython_genutils.importstring import import_item
+from .json_compat import get_current_validator, ValidationError
from .reader import get_version, reads
-
validators = {}
def _relax_additional_properties(obj):
@@ -61,7 +48,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False):
if version_minor is None:
version_minor = current_minor
- version_tuple = (version, version_minor)
+ current_validator = get_current_validator()
+ version_tuple = (current_validator.name, version, version_minor)
if version_tuple not in validators:
try:
@@ -75,7 +63,7 @@ def get_validator(version=None, version_minor=None, relax_add_props=False):
# and allow undefined cell types and outputs
schema_json = _allow_undefined(schema_json)
- validators[version_tuple] = Validator(schema_json)
+ validators[version_tuple] = current_validator(schema_json)
if relax_add_props:
try:
@@ -86,8 +74,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False):
# this allows properties to be added for intermediate
# representations while validating for all other kinds of errors
schema_json = _relax_additional_properties(schema_json)
+ validators[version_tuple] = current_validator(schema_json)
- validators[version_tuple] = Validator(schema_json)
return validators[version_tuple]
@@ -257,7 +245,6 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
else:
raise TypeError("validate() missing 1 required argument: 'nbdict'")
-
if ref is None:
# if ref is not specified, we have a whole notebook, so we can get the version
nbdict_version, nbdict_version_minor = get_version(nbdict)
diff --git a/setup.py b/setup.py
index ea4c93af..5fb46c8d 100644
--- a/setup.py
+++ b/setup.py
@@ -90,7 +90,8 @@
]
extras_require = setuptools_args['extras_require'] = {
- 'test': ['testpath', 'pytest', 'pytest-cov'],
+ 'fast': ['fastjsonschema'],
+ 'test': ['fastjsonschema', 'testpath', 'pytest', 'pytest-cov'],
}
if 'setuptools' in sys.modules: