From 26a82bddab5fd9d6fad4e9d75ef8c9d075e39171 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Tue, 26 Dec 2023 10:38:03 -0500 Subject: [PATCH] convert coalescese_streams function to CoalesceStreamsPreprocessor `coalescese_streams` was the last remaining "decorated function Preprocessor", and I couldn't find an example of how to use it. Here it is converted to be a Preprocessor subclass, like the others. A top-level --coalesce-streams flag is also added. --- docs/source/api/preprocessors.rst | 6 +- nbconvert/exporters/exporter.py | 2 +- nbconvert/nbconvertapp.py | 8 ++ nbconvert/preprocessors/__init__.py | 5 +- nbconvert/preprocessors/coalescestreams.py | 99 +++++++-------------- tests/preprocessors/test_coalescestreams.py | 79 ++++++++-------- 6 files changed, 89 insertions(+), 110 deletions(-) diff --git a/docs/source/api/preprocessors.rst b/docs/source/api/preprocessors.rst index 168d2b70e..189e51ad2 100644 --- a/docs/source/api/preprocessors.rst +++ b/docs/source/api/preprocessors.rst @@ -43,11 +43,13 @@ Metadata and header control .. autoclass:: CSSHTMLHeaderPreprocessor -Removing cells, inputs, and outputs +Removing/Manipulating cells, inputs, and outputs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ClearOutputPreprocessor +.. autoclass:: CoalesceStreamsPreprocessor + .. autoclass:: RegexRemovePreprocessor .. autoclass:: TagRemovePreprocessor @@ -59,5 +61,3 @@ Executing Notebooks :members: .. autoclass:: CellExecutionError - -.. autofunction:: coalesce_streams diff --git a/nbconvert/exporters/exporter.py b/nbconvert/exporters/exporter.py index 309a22fca..ca6739491 100644 --- a/nbconvert/exporters/exporter.py +++ b/nbconvert/exporters/exporter.py @@ -89,8 +89,8 @@ class Exporter(LoggingConfigurable): "nbconvert.preprocessors.TagRemovePreprocessor", "nbconvert.preprocessors.RegexRemovePreprocessor", "nbconvert.preprocessors.ClearOutputPreprocessor", + "nbconvert.preprocessors.CoalesceStreamsPreprocessor", "nbconvert.preprocessors.ExecutePreprocessor", - "nbconvert.preprocessors.coalesce_streams", "nbconvert.preprocessors.SVG2PDFPreprocessor", "nbconvert.preprocessors.LatexPreprocessor", "nbconvert.preprocessors.HighlightMagicsPreprocessor", diff --git a/nbconvert/nbconvertapp.py b/nbconvert/nbconvertapp.py index 0bb71326b..cd305afbf 100755 --- a/nbconvert/nbconvertapp.py +++ b/nbconvert/nbconvertapp.py @@ -118,6 +118,14 @@ def validate(self, obj, value): """Clear output of current file and save in place, overwriting the existing notebook. """, ), + "coalesce-streams": ( + { + "NbConvertApp": {"use_output_suffix": False, "export_format": "notebook"}, + "FilesWriter": {"build_directory": ""}, + "CoalesceStreamsPreprocessor": {"enabled": True}, + }, + """Coalesce consecutive stdout and stderr outputs into one stream (within each cell).""", + ), "no-prompt": ( { "TemplateExporter": { diff --git a/nbconvert/preprocessors/__init__.py b/nbconvert/preprocessors/__init__.py index 44bfb2e55..d752edf90 100644 --- a/nbconvert/preprocessors/__init__.py +++ b/nbconvert/preprocessors/__init__.py @@ -5,9 +5,7 @@ from .base import Preprocessor from .clearmetadata import ClearMetadataPreprocessor from .clearoutput import ClearOutputPreprocessor - -# decorated function Preprocessors -from .coalescestreams import coalesce_streams +from .coalescestreams import CoalesceStreamsPreprocessor from .convertfigures import ConvertFiguresPreprocessor from .csshtmlheader import CSSHTMLHeaderPreprocessor from .execute import ExecutePreprocessor @@ -24,6 +22,7 @@ "Preprocessor", "ClearMetadataPreprocessor", "ClearOutputPreprocessor", + "CoalesceStreamsPreprocessor", "ConvertFiguresPreprocessor", "CSSHTMLHeaderPreprocessor", "ExecutePreprocessor", diff --git a/nbconvert/preprocessors/coalescestreams.py b/nbconvert/preprocessors/coalescestreams.py index d98b986f7..3da31bd9b 100644 --- a/nbconvert/preprocessors/coalescestreams.py +++ b/nbconvert/preprocessors/coalescestreams.py @@ -1,81 +1,44 @@ """Preprocessor for merging consecutive stream outputs for easier handling.""" +import re # Copyright (c) IPython Development Team. # Distributed under the terms of the Modified BSD License. +from nbconvert.preprocessors import Preprocessor -import functools -import re - -from traitlets.log import get_logger - - -def cell_preprocessor(function): - """ - Wrap a function to be executed on all cells of a notebook - - The wrapped function should have these parameters: - - cell : NotebookNode cell - Notebook cell being processed - resources : dictionary - Additional resources used in the conversion process. Allows - preprocessors to pass variables into the Jinja engine. - index : int - Index of the cell being processed - """ - - @functools.wraps(function) - def wrappedfunc(nb, resources): - get_logger().debug("Applying preprocessor: %s", function.__name__) - for index, cell in enumerate(nb.cells): - nb.cells[index], resources = function(cell, resources, index) - return nb, resources - - return wrappedfunc - +CR_PAT = re.compile(r".*\r(?=[^\n])") -cr_pat = re.compile(r".*\r(?=[^\n])") - -@cell_preprocessor -def coalesce_streams(cell, resources, index): +class CoalesceStreamsPreprocessor(Preprocessor): """ Merge consecutive sequences of stream output into single stream to prevent extra newlines inserted at flush calls - - Parameters - ---------- - cell : NotebookNode cell - Notebook cell being processed - resources : dictionary - Additional resources used in the conversion process. Allows - transformers to pass variables into the Jinja engine. - index : int - Index of the cell being processed """ - outputs = cell.get("outputs", []) - if not outputs: + def preprocess_cell(self, cell, resources, cell_index): + """ + Apply a transformation on each cell. See base.py for details. + """ + outputs = cell.get("outputs", []) + if not outputs: + return cell, resources + + last = outputs[0] + new_outputs = [last] + for output in outputs[1:]: + if ( + output.output_type == "stream" + and last.output_type == "stream" + and last.name == output.name + ): + last.text += output.text + else: + new_outputs.append(output) + last = output + + # process \r characters + for output in new_outputs: + if output.output_type == "stream" and "\r" in output.text: + output.text = CR_PAT.sub("", output.text) + + cell.outputs = new_outputs return cell, resources - - last = outputs[0] - new_outputs = [last] - for output in outputs[1:]: - if ( - output.output_type == "stream" - and last.output_type == "stream" - and last.name == output.name - ): - last.text += output.text - - else: - new_outputs.append(output) - last = output - - # process \r characters - for output in new_outputs: - if output.output_type == "stream" and "\r" in output.text: - output.text = cr_pat.sub("", output.text) - - cell.outputs = new_outputs - return cell, resources diff --git a/tests/preprocessors/test_coalescestreams.py b/tests/preprocessors/test_coalescestreams.py index ad899013d..f9c804a9d 100644 --- a/tests/preprocessors/test_coalescestreams.py +++ b/tests/preprocessors/test_coalescestreams.py @@ -5,7 +5,7 @@ from nbformat import v4 as nbformat -from nbconvert.preprocessors.coalescestreams import coalesce_streams +from nbconvert.preprocessors.coalescestreams import CoalesceStreamsPreprocessor from .base import PreprocessorTestsBase @@ -13,12 +13,29 @@ class TestCoalesceStreams(PreprocessorTestsBase): """Contains test functions for coalescestreams.py""" + def build_preprocessor(self): + """Make an instance of a preprocessor""" + preprocessor = CoalesceStreamsPreprocessor() + preprocessor.enabled = True + return preprocessor + + def test_constructor(self): + """Can a CoalesceStreamsPreprocessor be constructed?""" + self.build_preprocessor() + + def process_outputs(self, outputs): + """Process outputs""" + cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)] + nb = nbformat.new_notebook(cells=cells) + res = self.build_resources() + preprocessor = self.build_preprocessor() + nb, res = preprocessor(nb, res) + return nb.cells[0].outputs + def test_coalesce_streams(self): - """coalesce_streams preprocessor output test""" + """Test the output of a CoalesceStreamsPreprocessor""" nb = self.build_notebook() - res = self.build_resources() - nb, res = coalesce_streams(nb, res) - outputs = nb.cells[0].outputs + outputs = self.process_outputs(nb.cells[0].outputs) self.assertEqual(outputs[0].text, "a") self.assertEqual(outputs[1].output_type, "display_data") self.assertEqual(outputs[2].text, "cd") @@ -26,38 +43,30 @@ def test_coalesce_streams(self): def test_coalesce_sequenced_streams(self): """Can the coalesce streams preprocessor merge a sequence of streams?""" - outputs = [ - nbformat.new_output(output_type="stream", name="stdout", text="0"), - nbformat.new_output(output_type="stream", name="stdout", text="1"), - nbformat.new_output(output_type="stream", name="stdout", text="2"), - nbformat.new_output(output_type="stream", name="stdout", text="3"), - nbformat.new_output(output_type="stream", name="stdout", text="4"), - nbformat.new_output(output_type="stream", name="stdout", text="5"), - nbformat.new_output(output_type="stream", name="stdout", text="6"), - nbformat.new_output(output_type="stream", name="stdout", text="7"), - ] - cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)] - - nb = nbformat.new_notebook(cells=cells) - res = self.build_resources() - nb, res = coalesce_streams(nb, res) - outputs = nb.cells[0].outputs + outputs = self.process_outputs( + [ + nbformat.new_output(output_type="stream", name="stdout", text="0"), + nbformat.new_output(output_type="stream", name="stdout", text="1"), + nbformat.new_output(output_type="stream", name="stdout", text="2"), + nbformat.new_output(output_type="stream", name="stdout", text="3"), + nbformat.new_output(output_type="stream", name="stdout", text="4"), + nbformat.new_output(output_type="stream", name="stdout", text="5"), + nbformat.new_output(output_type="stream", name="stdout", text="6"), + nbformat.new_output(output_type="stream", name="stdout", text="7"), + ] + ) self.assertEqual(outputs[0].text, "01234567") def test_coalesce_replace_streams(self): """Are \\r characters handled?""" - outputs = [ - nbformat.new_output(output_type="stream", name="stdout", text="z"), - nbformat.new_output(output_type="stream", name="stdout", text="\ra"), - nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"), - nbformat.new_output(output_type="stream", name="stdout", text="\nz"), - nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"), - nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"), - ] - cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)] - - nb = nbformat.new_notebook(cells=cells) - res = self.build_resources() - nb, res = coalesce_streams(nb, res) - outputs = nb.cells[0].outputs + outputs = self.process_outputs( + [ + nbformat.new_output(output_type="stream", name="stdout", text="z"), + nbformat.new_output(output_type="stream", name="stdout", text="\ra"), + nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"), + nbformat.new_output(output_type="stream", name="stdout", text="\nz"), + nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"), + nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"), + ] + ) self.assertEqual(outputs[0].text, "a\nb\nc\nd")