diff --git a/.travis.yml b/.travis.yml index 9fe1c317..5a548e7d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,11 +10,14 @@ sudo: false install: - pip install --upgrade setuptools pip - pip install --upgrade pytest + - pip install --upgrade fastjsonschema - pip install . codecov - pip install nbformat[test] - pip freeze script: - py.test -v --cov nbformat nbformat + - pip uninstall fastjsonschema --yes + - py.test -v --cov nbformat nbformat after_success: - codecov matrix: diff --git a/appveyor.yml b/appveyor.yml index dc7d1647..ddea683c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -15,6 +15,7 @@ install: - 'SET "PATH=%PYTHON%\\Scripts;%PATH%"' # Install our package: - pip install codecov + - pip install fastjsonschema - 'pip install --upgrade ".[test]"' build: off @@ -22,6 +23,8 @@ build: off # to run your custom scripts instead of automatic tests test_script: - 'py.test -v --cov nbformat nbformat' + - 'pip uninstall fastjsonschema --yes' + - 'py.test -v --cov nbformat nbformat' on_success: - codecov diff --git a/nbformat/__init__.py b/nbformat/__init__.py index fa05fc27..24cf75e3 100644 --- a/nbformat/__init__.py +++ b/nbformat/__init__.py @@ -74,7 +74,7 @@ def reads(s, as_version, **kwargs): if as_version is not NO_CONVERT: nb = convert(nb, as_version) try: - validate(nb) + validate(nb, use_fast=True) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return nb @@ -104,7 +104,7 @@ def writes(nb, version=NO_CONVERT, **kwargs): else: version, _ = reader.get_version(nb) try: - validate(nb) + validate(nb, use_fast=True) except ValidationError as e: get_logger().error("Notebook JSON is invalid: %s", e) return versions[version].writes_json(nb, **kwargs) diff --git a/nbformat/tests/many_tracebacks.ipynb b/nbformat/tests/many_tracebacks.ipynb new file mode 100644 index 00000000..a37eda86 --- /dev/null +++ b/nbformat/tests/many_tracebacks.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'iAmNotDefined' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0miAmNotDefined\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'iAmNotDefined' is not defined" + ] + } + ], + "source": [ + "# Imagine this cell called a function which runs things on a cluster and you have an error" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index 308d98a2..67750d3c 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -4,12 +4,18 @@ # Distributed under the terms of the Modified BSD License. import os +import time from .base import TestsBase from jsonschema import ValidationError from nbformat import read from ..validator import isvalid, validate, iter_validate +try: + import fastjsonschema +except ImportError: + fastjsonschema = None + class TestValidator(TestsBase): @@ -118,3 +124,27 @@ def test_validation_no_version(self): """Test that an invalid notebook with no version fails validation""" with self.assertRaises(ValidationError) as e: validate({'invalid': 'notebook'}) + + def test_fast_validation(self): + """ + Test that valid notebook with too many outputs is parsed ~12 times + faster with fastjsonschema. + """ + if fastjsonschema: + with self.fopen(u'many_tracebacks.ipynb', u'r') as f: + nb = read(f, as_version=4) + + # Multiply output + base_output = nb["cells"][0]["outputs"][0] + for i in range(50000): + nb["cells"][0]["outputs"].append(base_output) + + start_time = time.time() + validate(nb, use_fast=True) + fast_time = time.time() - start_time + + start_time = time.time() + validate(nb) + slow_time = time.time() - start_time + + self.assertGreater(slow_time / fast_time, 12) diff --git a/nbformat/validator.py b/nbformat/validator.py index 1be77072..0ea7ef84 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -21,11 +21,20 @@ """ raise ImportError(verbose_msg) from e +# Use fastjsonschema if installed +try: + import fastjsonschema + from fastjsonschema import JsonSchemaException +except ImportError: + fastjsonschema = None + JsonSchemaException = ValidationError + from ipython_genutils.importstring import import_item from .reader import get_version, reads validators = {} +fast_validators = {} def _relax_additional_properties(obj): """relax any `additionalProperties`""" @@ -50,7 +59,7 @@ def _allow_undefined(schema): ) return schema -def get_validator(version=None, version_minor=None, relax_add_props=False): +def get_validator(version=None, version_minor=None, relax_add_props=False, use_fast=False): """Load the JSON schema into a Validator""" if version is None: from . import current_nbformat @@ -77,6 +86,10 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): validators[version_tuple] = Validator(schema_json) + # If fastjsonschema is installed use it to validate + if use_fast and fastjsonschema is not None and version_tuple not in fast_validators: + fast_validators[version_tuple] = fastjsonschema.compile(schema_json) + if relax_add_props: try: schema_json = _get_schema_json(v, version=version, version_minor=version_minor) @@ -88,7 +101,15 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): schema_json = _relax_additional_properties(schema_json) validators[version_tuple] = Validator(schema_json) - return validators[version_tuple] + + # If fastjsonschema is installed use it to validate + if use_fast and fastjsonschema is not None: + fast_validators[version_tuple] = fastjsonschema.compile(schema_json) + + if use_fast and fastjsonschema is not None: + return fast_validators[version_tuple] + else: + return validators[version_tuple] def _get_schema_json(v, version=None, version_minor=None): @@ -241,7 +262,7 @@ def better_validation_error(error, version, version_minor): def validate(nbdict=None, ref=None, version=None, version_minor=None, - relax_add_props=False, nbjson=None): + relax_add_props=False, nbjson=None, use_fast=False): """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. @@ -270,10 +291,22 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = 1, 0 - for error in iter_validate(nbdict, ref=ref, version=version, - version_minor=version_minor, - relax_add_props=relax_add_props): - raise error + validator = get_validator(version, version_minor, relax_add_props=relax_add_props, + use_fast=True) + + if fastjsonschema is not None and use_fast: + if validator is None: + raise ValidationError("No schema for validating v%s notebooks" % version) + + try: + validator(nbdict) + except JsonSchemaException as e: + raise ValidationError(e.message, schema_path=e.path) + else: + for error in iter_validate(nbdict, ref=ref, version=version, + version_minor=version_minor, + relax_add_props=relax_add_props): + raise error def iter_validate(nbdict=None, ref=None, version=None, version_minor=None, @@ -294,7 +327,8 @@ def iter_validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = get_version(nbdict) - validator = get_validator(version, version_minor, relax_add_props=relax_add_props) + validator = get_validator(version, version_minor, relax_add_props=relax_add_props, + use_fast=False) if validator is None: # no validator