diff --git a/.github/workflows/ci-tests-colab.yml b/.github/workflows/ci-tests-colab.yml index fd4cbcee..1536e823 100644 --- a/.github/workflows/ci-tests-colab.yml +++ b/.github/workflows/ci-tests-colab.yml @@ -89,7 +89,6 @@ jobs: # install geckodriver driver_path=$(python -c ' - import shutil from pathlib import Path import geckodriver_autoinstaller diff --git a/.github/workflows/ci-tests-jupyter.yml b/.github/workflows/ci-tests-jupyter.yml index 10f5f0c6..0371b937 100644 --- a/.github/workflows/ci-tests-jupyter.yml +++ b/.github/workflows/ci-tests-jupyter.yml @@ -122,7 +122,6 @@ jobs: # install geckodriver driver_path=$(python -c ' - import shutil from pathlib import Path import geckodriver_autoinstaller diff --git a/davos/core/config.py b/davos/core/config.py index 00c76ac0..9fe07dbb 100644 --- a/davos/core/config.py +++ b/davos/core/config.py @@ -20,6 +20,7 @@ import os import pprint +import shlex import shutil import site import sys @@ -29,6 +30,8 @@ from io import StringIO from os.path import expandvars from pathlib import Path +from locale import getpreferredencoding +from subprocess import CalledProcessError, check_output from davos.core.exceptions import ( DavosConfigError, @@ -199,6 +202,7 @@ def __init__(self): self._conda_envs_dirs = None self._default_pip_executable = self._find_default_pip_executable() self._ipy_showsyntaxerror_orig = None + self._jupyter_interface = _get_jupyter_interface() self._repr_formatter = pprint.PrettyPrinter() if sys.version_info.minor >= 8: # sort_dicts constructor param added in Python 3.8, defaults @@ -558,6 +562,75 @@ def _block_greedy_ipython_completer(): raise Exception +def _get_jupyter_interface(): + """ + Determines whether the notebook is being run through the "classic" + Jupyter notebook interface or JupyterLab. Used to set the value of + `davos.config._jupyter_interface`. + + Returns + ------- + interface : str + "notebook" for classic Jupyter notebooks (or unknown); "lab" for + JupyterLab. + + Notes + ----- + 1. This distinction is needed because recent versions of the + `jupyterlab` package no longer depend on `notebook`, so if the + user is running JupyterLab, some `jupyter notebook ...` shell + commands davos runs internally may not be available and will need + to be run as `jupyter lab ...` commands instead. + 2. "notebook" is treated as a strong default assumption and returned + if the interface cannot be determined, for a few reasons: + - Only more recent JupyterLab versions have dropped `notebook` as + a dependency, so it's less likely the user will have JupyterLab + without `notebook` than vice versa. + - IDEs tend to run simple notebook servers rather than JupyterLab + for custom interfaces, but the command to launch the server may + not be the immediate parent process in that case and trying to + check all processes introduces a cascade of other issues. + - Colab notebooks also use the "classic" notebook server and make + up a fairly large percentage of Davos uses, but the Colab VM + environment is changed frequently and without notice, so it's + more likely to break or otherwise mess with users/packages' + ability to query running processes and/or the notebook server, + causing this function to return whatever is chosen as the + fallback/default value. + 3. `subprocess.check_output` is called directly rather than using + `davos.core.core.run_shell_command` like most other davos + functions that run shell commands. In IPython environments, + `run_shell_command` internally calls + `IPython.utils.process.system`, which for some strange reason + truncates the stdout from this particular command at 80 columns + rather than wrapping it like it does with seemingly every other + command. The shell commands we need to get from the output + of `ps` can be quite long because they include multiple absolute + paths, and the info we care about may not be in the first 80 + characters. + """ + cmd = f'ps -o command= -p {os.getppid()}' + try: + parent_proc_cmd = check_output(shlex.split(cmd), + encoding=getpreferredencoding()) + except (FileNotFoundError, CalledProcessError): + # FileNotFoundError: `ps` command not available + # CalledProcessError: command failed for any other reason + interface = 'notebook' + else: + # when launched normally from the command line, the 2nd item in + # the list should be the notebook/lab executable, but safer to + # check more generally in case the user has something unusual + # like a custom script they called to launch the server + for item in parent_proc_cmd.split(): + if item.endswith(('notebook', 'lab')): + interface = item.split('-')[-1] + break + else: + interface = 'notebook' + return interface + + def _get_stdlib_modules(): """ Get names of standard library modules. diff --git a/davos/core/config.pyi b/davos/core/config.pyi index 10321c5a..e2c5a0fc 100644 --- a/davos/core/config.pyi +++ b/davos/core/config.pyi @@ -36,6 +36,7 @@ class DavosConfig(metaclass=SingletonConfig): _environment: _Environment _ipy_showsyntaxerror_orig: _IpyShowSyntaxErrorPre7 | _IpyShowSyntaxErrorPost7 | None _ipython_shell: IpythonShell | None + _jupyter_interface: Literal['notebook', 'lab'] _noninteractive: bool _pip_executable: str _project: AbstractProject | ConcreteProject | None @@ -102,4 +103,5 @@ class DavosConfig(metaclass=SingletonConfig): def _find_default_pip_executable(self) -> str: ... def _block_greedy_ipython_completer() -> None: ... +def _get_jupyter_interface() -> Literal['notebook', 'lab']: ... def _get_stdlib_modules() -> frozenset[str]: ... diff --git a/davos/core/core.py b/davos/core/core.py index 527c18ba..e4c001da 100644 --- a/davos/core/core.py +++ b/davos/core/core.py @@ -30,6 +30,7 @@ import importlib import itertools import sys +import warnings from contextlib import contextmanager, redirect_stdout from io import StringIO from pathlib import Path @@ -50,7 +51,7 @@ OnionParserError, ParserNotImplementedError, SmugglerError, - TheNightIsDarkAndFullOfTErrors + TheNightIsDarkAndFullOfErrors ) from davos.core.parsers import pip_parser from davos.core.regexps import ( @@ -967,8 +968,8 @@ def smuggle_wrapper(*args, **kwargs): # invalidate sys.meta_path finder caches so the global # working set is regenerated based on the updated sys.path. # Note: after pretty extensive spot checking, I haven't - # managed found a case where this is actually since - # migrating to importlib.metadata instead of pkg_resources, + # managed to find a case where this is actually necessary + # since migrating from pkg_resources to importlib.metadata, # but the docs recommend it and the overhead is extremely # minor, so probably worth including in case the user or # notebook environment has implemented some unusual custom @@ -1041,7 +1042,7 @@ def smuggle( pkg_name = name.split('.')[0] if pkg_name == 'davos': - raise TheNightIsDarkAndFullOfTErrors("Don't do that.") + raise TheNightIsDarkAndFullOfErrors("Don't do that.") onion = Onion(pkg_name, installer=installer, args_str=args_str, **installer_kwargs) @@ -1102,6 +1103,7 @@ def smuggle( failed_reloads = [] for dep_name in prev_imported_pkgs: dep_modules_old = {} + top_level_names_old = [] for mod_name in tuple(sys.modules.keys()): # remove submodules of previously imported packages so # new versions get imported when main package is @@ -1113,6 +1115,23 @@ def smuggle( # run, which crashes it... (-_-* ) if mod_name.startswith(f'{dep_name}.'): dep_modules_old[mod_name] = sys.modules.pop(mod_name) + # when reloading package below, importlib.reload + # doesn't seem to automatically follow and + # recursively reload submodules/subpackages loaded + # into the top-level module via relative import + # (e.g., `from . import submodule`) based on their + # *new* locations, if different from their old + # locations. So if a previously smuggled package + # came from the user's main Python environment, and + # the just-smuggled version is now in a project + # directory, the old subpackage/submodule object + # will be re-used in the new top-level module's + # namespace unless we explicitly remove them here + # and force their loaders' paths to be recomputed + submod_name = mod_name[len(dep_name) + 1:] + if submod_name in sys.modules[dep_name].__dict__: + top_level_names_old.append(submod_name) + del sys.modules[dep_name].__dict__[submod_name] # get (but don't pop) top-level package to that it can be # reloaded (must exist in sys.modules) @@ -1122,7 +1141,9 @@ def smuggle( except (ImportError, RuntimeError): # if we aren't able to reload the module, put the old # version's submodules we removed back in sys.modules - # for now and prepare to show a warning post-execution. + # for now, add their names back to the top-level + # module's __dict__, and prepare to show a warning + # post-execution. # This way: # 1. the user still has a working module until they # restart the runtime @@ -1130,6 +1151,10 @@ def smuggle( # we try to reload/import other modules that # import it sys.modules.update(dep_modules_old) + for submod_name in top_level_names_old: + sys.modules[dep_name].__dict__[submod_name] = ( + dep_modules_old[f'{dep_name}.{submod_name}'] + ) failed_reloads.append(dep_name) if any(failed_reloads): @@ -1158,6 +1183,30 @@ def smuggle( raise SmugglerError(msg) else: prompt_restart_rerun_buttons(failed_reloads) + # if the function above returns, the user has chosen to + # continue running the notebook rather than restarting + # to properly reload the package. Issue a warning to let + # them know to proceed with caution + if len(failed_reloads) == 1: + failed_reloads_str = failed_reloads[0] + verb = 'was' + failed_ver_string = f'{failed_reloads_str}.__version__' + else: + verb = 'were' + failed_ver_string = "These packages' '__version__' attributes" + if len(failed_reloads) == 2: + failed_reloads_str = " and ".join(failed_reloads) + else: + failed_reloads_str = ( + f"{', '.join(failed_reloads[:-1])}, and " + f"{failed_reloads[-1]}" + ) + + msg = ( + f"{failed_reloads_str} {verb} partially reloaded. " + f"{failed_ver_string} may be misleading." + ) + warnings.warn(msg, RuntimeWarning, stacklevel=3) if ( config._project is None and diff --git a/davos/core/exceptions.py b/davos/core/exceptions.py index 46b35e09..131260c4 100644 --- a/davos/core/exceptions.py +++ b/davos/core/exceptions.py @@ -201,7 +201,7 @@ class SmugglerError(DavosError): """Base class for errors raised during the smuggle phase.""" -class TheNightIsDarkAndFullOfTErrors(SmugglerError): +class TheNightIsDarkAndFullOfErrors(SmugglerError): """A little Easter egg for anyone who tries to `smuggle davos`.""" diff --git a/davos/core/exceptions.pyi b/davos/core/exceptions.pyi index 1882211b..5879625a 100644 --- a/davos/core/exceptions.pyi +++ b/davos/core/exceptions.pyi @@ -29,7 +29,7 @@ class ProjectNotebookNotFoundError(DavosProjectError, FileNotFoundError): ... class SmugglerError(DavosError): ... -class TheNightIsDarkAndFullOfTErrors(SmugglerError): ... +class TheNightIsDarkAndFullOfErrors(SmugglerError): ... class InstallerError(SmugglerError, CalledProcessError): show_output: bool diff --git a/davos/core/project.py b/davos/core/project.py index f86cc65a..ee1a3e26 100644 --- a/davos/core/project.py +++ b/davos/core/project.py @@ -52,6 +52,7 @@ import warnings from os.path import expandvars from pathlib import Path +from subprocess import CalledProcessError from urllib.request import urlopen from urllib.parse import parse_qs, unquote, urlencode, urljoin, urlparse @@ -256,12 +257,12 @@ def remove(self, yes=False): if not yes: if config.noninteractive: raise DavosProjectError( - "To remove a project when noninteractive mode is " - "enabled, you must explicitly pass 'yes=True'." + "To remove a project when noninteractive mode is enabled, " + "you must explicitly pass 'yes=True'." ) prompt = f"Remove project {self.name!r} and all installed packages?" confirmed = prompt_input(prompt, default='n') - if not confirmed: + if not confirmed and not config.suppress_stdout: print(f"{self.name} not removed") return shutil.rmtree(self.project_dir) @@ -645,8 +646,18 @@ def get_notebook_path(): kernel_filepath = ipykernel.connect.get_connection_file() kernel_id = kernel_filepath.split('/kernel-')[-1].split('.json')[0] - running_nbservers_stdout = run_shell_command('jupyter notebook list', - live_stdout=False) + nbserver_list_cmd = f'jupyter {config._jupyter_interface} list' + try: + running_nbservers_stdout = run_shell_command(nbserver_list_cmd, + live_stdout=False) + except CalledProcessError as e: + # raise RuntimeError so it's caught by `use_default_project` and + # the fallback project is used + raise RuntimeError( + "Shell command to get running Jupyter servers " + f"({nbserver_list_cmd}) failed" + ) from e + for line in running_nbservers_stdout.splitlines(): # should only need to exclude first line ("Currently running # servers:"), but handle safely in case output format changes in @@ -773,6 +784,12 @@ def prune_projects(yes=False): the interpreter is shut down -- they're only checked for and dealt with here as a fallback in case one somehow sneaks through. """ + if config.noninteractive and not yes: + raise DavosProjectError( + "To remove projects when noninteractive mode is enabled, you must " + "explicitly pass 'yes=True'." + ) + # dict of projects to remove -- keys: "safe"-formatted project # directory names; values: corresponding notebook filepaths to_remove = {} @@ -848,7 +865,7 @@ def prune_projects(yes=False): clear_output(wait=False) # print final status for all projects processed print(template.format(*statuses)) - else: + elif not config.suppress_stdout: print("No unused projects found.") diff --git a/davos/implementations/js_functions.py b/davos/implementations/js_functions.py index eb496045..65a09d53 100644 --- a/davos/implementations/js_functions.py +++ b/davos/implementations/js_functions.py @@ -135,7 +135,7 @@ def __setattr__(self, name, value): * Value sent to the notebook kernel's stdin socket if the * given button is clicked and sendResult is true. Used to * forward user input information to Python. JS types are - * converted ty Python types, within reason (Boolean -> bool, + * converted to Python types, within reason (Boolean -> bool, * Object -> dict, Array -> list, null -> None, * undefined -> '', etc.). If omitted, the return value of * onClick will be used instead. diff --git a/paper/figs/example1.pdf b/paper/figs/example1.pdf index 5b8a02f6..a130bf94 100644 Binary files a/paper/figs/example1.pdf and b/paper/figs/example1.pdf differ diff --git a/paper/figs/example2.pdf b/paper/figs/example2.pdf index 425cd1ed..843adab6 100644 Binary files a/paper/figs/example2.pdf and b/paper/figs/example2.pdf differ diff --git a/paper/figs/example3.pdf b/paper/figs/example3.pdf index 308c7663..0f7f3464 100644 Binary files a/paper/figs/example3.pdf and b/paper/figs/example3.pdf differ diff --git a/paper/figs/example4.pdf b/paper/figs/example4.pdf index 506e3e93..715ecc85 100644 Binary files a/paper/figs/example4.pdf and b/paper/figs/example4.pdf differ diff --git a/paper/figs/example5.pdf b/paper/figs/example5.pdf index aa4f9357..b8a86719 100644 Binary files a/paper/figs/example5.pdf and b/paper/figs/example5.pdf differ diff --git a/paper/figs/example6.pdf b/paper/figs/example6.pdf index 66d36e6a..87022a88 100644 Binary files a/paper/figs/example6.pdf and b/paper/figs/example6.pdf differ diff --git a/paper/figs/example7.pdf b/paper/figs/example7.pdf index 4160c931..c2829cd2 100644 Binary files a/paper/figs/example7.pdf and b/paper/figs/example7.pdf differ diff --git a/paper/figs/example8.pdf b/paper/figs/example8.pdf index 399f076e..218a3152 100644 Binary files a/paper/figs/example8.pdf and b/paper/figs/example8.pdf differ diff --git a/paper/figs/illustrative_example.pdf b/paper/figs/illustrative_example.pdf index d150192a..10fe043b 100644 Binary files a/paper/figs/illustrative_example.pdf and b/paper/figs/illustrative_example.pdf differ diff --git a/paper/figs/shareable_code_2d.pdf b/paper/figs/shareable_code_2d.pdf new file mode 100644 index 00000000..17e56f70 Binary files /dev/null and b/paper/figs/shareable_code_2d.pdf differ diff --git a/paper/figs/snippet1.pdf b/paper/figs/snippet1.pdf index cb84eb81..fa630408 100644 Binary files a/paper/figs/snippet1.pdf and b/paper/figs/snippet1.pdf differ diff --git a/paper/figs/snippet5.pdf b/paper/figs/snippet5.pdf index 77f8e28a..706a1680 100644 Binary files a/paper/figs/snippet5.pdf and b/paper/figs/snippet5.pdf differ diff --git a/paper/figs/source/make_shareable_code_base.ipynb b/paper/figs/source/make_shareable_code_base.ipynb new file mode 100644 index 00000000..f9c30dbd --- /dev/null +++ b/paper/figs/source/make_shareable_code_base.ipynb @@ -0,0 +1,100 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M0Qv4nlz2p7L", + "outputId": "f47a5803-36ff-4cae-9897-6669c02d306f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting davos\n", + " Downloading davos-0.2.2-py3-none-any.whl (99 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.7/99.7 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from davos) (23.1)\n", + "Installing collected packages: davos\n", + "Successfully installed davos-0.2.2\n" + ] + } + ], + "source": [ + "%pip install davos\n", + "import davos" + ] + }, + { + "cell_type": "code", + "source": [ + "from matplotlib smuggle pyplot as plt\n", + "smuggle seaborn as sns" + ], + "metadata": { + "id": "re5FDpAE2sOi" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "plt.figure(figsize=(7, 7))\n", + "plt.plot([0, 7], [0, 7], '--', linewidth=2, color='lightgray')\n", + "plt.plot([1, 2, 2, 3, 4, 5, 6], [1, 2, 3, 3, 4, 5, 6], 'ko', markersize=10)\n", + "\n", + "plt.xticks([])\n", + "plt.yticks([])\n", + "plt.xlim([0, 7.1])\n", + "plt.ylim([0, 7.1])\n", + "\n", + "sns.despine(top=True, right=True)\n", + "\n", + "plt.xlabel('Setup cost', fontsize=18)\n", + "plt.ylabel('Reproducibility', fontsize=18);\n", + "\n", + "plt.savefig('shareable_code_base.pdf', bbox_inches='tight')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 605 + }, + "id": "FtPs8GRd2xaY", + "outputId": "928cf58a-1b52-4489-a9df-817808eadee7" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + } + ] +} \ No newline at end of file diff --git a/paper/figs/source/shareable_code_base.pdf b/paper/figs/source/shareable_code_base.pdf new file mode 100644 index 00000000..b3a25312 Binary files /dev/null and b/paper/figs/source/shareable_code_base.pdf differ diff --git a/paper/main.bib b/paper/main.bib index e81c1b20..552ae4dd 100644 --- a/paper/main.bib +++ b/paper/main.bib @@ -1,3 +1,35 @@ +@misc{skle22, + author = {scikit-learn developers}, + howpublished = {\url{https://scikit-learn.org/1.1/model_persistence.html}}, + month = {May}, + title = {{scikit-learn User Guide: 9. Model persistence}}, + year = {2022}} + +@techreport{HeimCann19, + author = {Christian Heimes and Brett Cannon}, + institution = {Python Software Foundation}, + month = {May}, + number = {594}, + title = {Removing dead batteries from the standard library}, + type = {PEP}, + year = {2019}} + +@techreport{Smit17, + author = {Eric V. Smith}, + institution = {Python Software Foundation}, + month = {June}, + number = {557}, + title = {Data Classes}, + type = {PEP}, + year = {2017}} + +@misc{MIND23, + author = {{MIND Team}}, + howpublished = {\url{https://mindsummerschool.org}}, + month = {August}, + title = {{Methods in Neuroscience at Dartmouth (MIND) Computational Summer School}}, + year = {2023}} + @misc{BickEtal07, author = {I Bicking and B G{\'{a}}bor and {Python Packaging Authority}}, howpublished = {\url{https://github.com/pypa/virtualenv}}, @@ -83,7 +115,7 @@ @misc{cond15 @techreport{CoghStuf13, author = {Nick Coghlan and Donald Stufft}, - institution = {{Python} Software Foundation}, + institution = {Python Software Foundation}, month = {March}, number = {440}, title = {Version {I}dentification and {D}ependency {S}pecification}, diff --git a/paper/main.pdf b/paper/main.pdf index a52b6cc5..51af8efd 100644 Binary files a/paper/main.pdf and b/paper/main.pdf differ diff --git a/paper/main.tex b/paper/main.tex index 61896d69..587aab10 100644 --- a/paper/main.tex +++ b/paper/main.tex @@ -10,6 +10,9 @@ \geometry{left=1in, right=1in, top=1in, bottom=1in, headsep=0pt} +\newcommand{\todo}[1]{\textcolor{red}{\textbf{TODO}: #1}} +\newcommand{\stoppedhere}{\bigskip\bigskip\textcolor{red}{\textbf{========== TODO: finish editing from here to end ==========}}\bigskip\bigskip} + \journal{SoftwareX} \begin{document} @@ -66,9 +69,9 @@ \section*{Current code version} \hline \textbf{Nr.} & \textbf{Code metadata description} & \textbf{Metadata value} \\ \hline -C1 & Current code version & v0.2.0 \\ +C1 & Current code version & v0.2.4 \\ \hline -C2 & Permanent link to code/repository used for this code version & \url{https://github.com/ContextLab/davos/tree/v0.2.0} \\ +C2 & Permanent link to code/repository used for this code version & \url{https://github.com/ContextLab/davos/tree/v0.2.4} \\ \hline C3 & Code Ocean compute capsule & \\ \hline @@ -79,7 +82,7 @@ \section*{Current code version} C6 & Software code languages, tools, and services used & Python, JavaScript, PyPI/pip, IPython, Jupyter, ipykernel, PyZMQ.\newline Additional tools used for tests: pytest, Selenium, Requests, mypy, GitHub Actions \\ \hline C7 & Compilation requirements, operating environments, and - dependencies & Dependencies:~Python $\geq 3.6$, packaging, setuptools.\newline Supported OSes: MacOS, Linux, Unix-like.\newline Supported IPython environments: Jupyter Notebooks, JupyterLab, Google Colaboratory, Binder, IDE-based notebook editors. \\ + dependencies & Dependencies:~Python $\geq 3.6$, packaging, setuptools.\newline Supported OSes: MacOS, Linux, Unix-like.\newline Supported IPython environments: Jupyter Notebooks, JupyterLab, Google Colaboratory, Binder, IDE-based notebook editors, IPython shell. \\ \hline C8 & Link to developer documentation/manual & \url{https://github.com/ContextLab/davos\#readme} \\ \hline @@ -221,14 +224,14 @@ \section{Motivation and significance} \section{Software description} The Davos package is named after Davos Seaworth, a smuggler referred -to as ``the Onion Knight" from the series \textit{A Song of Ice and Fire} by +to as ``the Onion Knight'' from the series \textit{A Song of Ice and Fire} by George R. R. Martin~\cite{Mart98}. The \texttt{smuggle} keyword provided by Davos is a play on Python's \texttt{import} keyword: whereas importing can load a package into the Python workspace within the existing rules and frameworks provided by the Python language, ``smuggling'' provides an alternative that expands the scope and reach of ``importing.'' Like the character Davos Seaworth (who became famous for smuggling onions through a -blockade on his homeland), we use ``onion'' comments to precisely control how +blockade on his homeland), the Davos package uses ``onion comments'' to precisely control how packages are smuggled into the Python workspace. \begin{figure}[tp] @@ -237,8 +240,8 @@ \section{Software description} \caption{\small \textbf{Package structure.} The Davos package comprises two interdependent subpackages. The \texttt{davos.core} subpackage includes modules for parsing \texttt{smuggle} statements - and onion comments, installing and validating packages, isolating and managing and - configuring Davos's behavior. The + and onion comments, installing and validating packages, isolating and managing + installed packages, and configuring Davos's behavior. The \texttt{davos.implementations} subpackage includes environment-specific modifications and features that are needed to support the core functionality across different notebook-based @@ -250,7 +253,7 @@ \section{Software description} \end{figure} -\subsection{Software architecture} +\subsection{Software architecture}\label{sec:architecture} The Davos package consists of two interdependent subpackages (see Fig.~\ref{fig:package-structure}). The first, @@ -336,7 +339,7 @@ \subsubsection{The onion comment}\label{subsec:onion} \end{center} Occasionally, a package's distribution name (i.e., the name used when installing it) may differ from its top-level module name (i.e., the name -used when importing it). In such cases, an onion comment may be used to ensure +used when importing it). In such cases, an onion comment can be used to ensure that Davos installs the proper package if it cannot be found locally: \begin{center} \includegraphics[width=0.9\textwidth]{figs/snippet2} @@ -346,7 +349,7 @@ \subsubsection{The onion comment}\label{subsec:onion} how, where, and when smuggled packages are installed. Critically, if an onion comment includes a version specifier~\cite{CoghStuf13}, Davos will ensure that the version of the package loaded into the notebook matches the specific -version requested, or satisfies the given version constraints. If the smuggled +version requested (or satisfies the given version constraints). If the smuggled package exists locally, Davos will extract its version information from its metadata and compare it to the specifier provided. If the two are incompatible (or no local installation is found), Davos will download, install, and load a @@ -361,22 +364,22 @@ \subsubsection{The onion comment}\label{subsec:onion} \end{center} Davos processes onion comments internally before forwarding arguments to the installer program. In addition to preventing shared notebooks from executing -arbitrary code in a user's shell, this enables Davos to adapt its behavior +arbitrary code in a user's shell, this enables Davos to adjust its behavior based on how particular flags will affect the behavior of the installer program. For example, including \texttt{pip}'s \texttt{--no-input} flag will also temporarily enable Davos's non-interactive mode (see Sec.~\ref{subsec:config}). -Similarly, if an onion comment contains either the \texttt{-I}/\texttt{--ignore-installed}, -\texttt{-U}/\texttt{--upgrade}, or \texttt{--force-reinstall} flag, Davos will -skip checking for a local copy of the smuggled package before installing a -new one: +Similarly, if an onion comment contains either \texttt{-I}/\texttt{--ignore-installed}, +\texttt{-U}/\texttt{--upgrade}, or \texttt{--force-reinstall}, Davos will +install and load a new copy of the smuggled package without first checking +for it locally: \begin{center} \includegraphics[width=0.9\textwidth]{figs/snippet5} \end{center} Since the purpose of an onion comment is to describe how a smuggled package should be installed (if necessary) so that it can be loaded and used -immediately, options that would cause the package not to be installed (such as -\texttt{-h}/\texttt{--help} or \texttt{--dry-run}) are disallowed. Additionally, -when using a Davos project to isolate smuggled packages (the default behavior; +immediately, options that would normally cause the package not to be installed +(such as \texttt{-h}/\texttt{--help} or \texttt{--dry-run}) are disallowed. Additionally, +when using a Davos ``project'' to isolate smuggled packages (the default behavior; see Sec.~\ref{subsec:projects}), onion comments may not contain options that would change the package's installation location (such as \texttt{-t}/\texttt{--target}, \texttt{--root}, or \texttt{--prefix}). However, if @@ -387,132 +390,78 @@ \subsubsection{The onion comment}\label{subsec:onion} \subsubsection{Projects}\label{subsec:projects} -Standard approaches to installing packages from within a notebook can alter the local Python environment in potentially unexpected and undesired ways. For example, running a notebook that installs its dependencies via system shell commands (prefixed with ``\texttt{!}'') or IPython magic commands (prefixed with ``\texttt{\%}'') may cause other existing packages in the user's environment to be uninstalled and replaced with alternate versions. This can lead to incompatibilities between installed packages, affect the behavior of the user's other scripts or notebooks, or even interfere with system applications. - -To prevent Davos-enhanced notebooks from having unwanted side-effects on the user's environment, Davos automatically isolates packages installed via \texttt{smuggle} statements using a custom scheme called ``projects.'' Functionally, a Davos project is similar to a standard Python virtual environment (e.g., created with the standard library's \texttt{venv} module or a third-party tool like \texttt{virtualenv}~\cite{BickEtal07}): it consists of a directory (within a hidden \texttt{.davos} folder in the user's home directory) that houses third-party packages needed for a particular project or task. However, unlike standard virtual environments, Davos projects do not need to be manually activated and deactivated, do not contain separate Python or \texttt{pip} executables, and \textit{extend} the user's main Python environment rather than replace it. - -When Davos is imported into a notebook, a notebook-specific project directory is automatically created (if it does not exist already). -%When Davos is imported into a notebook, a notebook-specific project directory is automatically created (if it does not exist already), named for the absolute path to the notebook file. - - -Notebook-specific projects are named for the absolute path to the notebook file. - - - %Davos projects function similarly to simplified versions of standard Python virtual environments (e.g., created with the standard library's \texttt{venv} module or a third-party tool like \texttt{virtualenv}~\cite{BickEtal07}) with a few differences: they do not need to be manually activated and deactivated, they do not contain separate Python or \texttt{pip} executables, and they \textit{extend} the main Python environment rather than replace it. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% ADD THIS EITHER TO START OF PROJECTS SUBSUBSECTION OR IN IMPACT SECTION % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%A common way of avoiding this is to create a virtual environment in which to run the notebook, and instruct anyone with whom the notebook is shared to do the same. While effective, this added requirement introduces additional -% -% -%(or group of related notebooks) that require different -% -% -%\begin{itemize} -% \item{common solution is to use a virtual environment} -% \item{this introduces complexity} -% \item{davos's solution is projects} -%\end{itemize} - -\bigskip\bigskip\textcolor{red}{\textbf{========== TODO: finish editing from here to end ==========}}\bigskip\bigskip - -%Because Davos can install new packages, running the code in a -%Davos-enhanced notebook might (in principle) affect the behavior of -%\textit{other} Python-based software (e.g., other notebooks, scripts, etc.) by -%altering which packages are installed in the runtime environment. This could -%lead to undesired consequences. For example, suppose Person A develops a -%notebook (Notebook A) for their research project. We will assume that Notebook -%A does not use Davos to manage project dependencies. If Person A -%runs a Davos-enhanced Notebook B, e.g., sent by another developer, -%might this unexpectedly affect the behavior of Notebook A? - -%To prevent unwanted -%changes to the user's Python environment, Davos incorporates its -%own virtual environment-like scheme for isolating packages it installs. When Davos -%is imported, a new virtual environment (folder) is created automatically. The -%folder's name may be customized to support multi-notebook projects. Any -%\texttt{smuggle}d packages that were not available in the notebook's runtime -%environment are installed to the current project folder. The runtime -%environment remains unaffected by Davos's behavior (see -%Sec.~\ref{subsec:projects}). -%By default, projects are notebook-specific, but can also be shared by multiple notebooks, and can be managed interactively from within a Davos-enhanced notebook. - -%%%%%%%% JEREMY VERSION %%%%%%%%% -%Installing new packages in a notebook using standard approaches (e.g., system commands) affect the -%runtime environment. This could lead to undesired behaviors. For example, running a notebook that -%installs new packages in the user’s primary system environment might alter their main system installation -%and/or containerized environment in unexpected ways (e.g., changing package versions, causing conflicts -%with other packages, etc.). To protect against undesired changes to the runtime environment, Davos -%incorporates its own virtual environment for managing packages it installs. When Davos is imported, -%a new virtual environment (folder) is created automatically. The folder’s name may be customized to -%support multi-notebook projects. Any smuggled packages that were not available in the notebook’s -%runtime environment are installed to the current project folder. The runtime environment remains unaffected -%by Davos’s behavior (see Sec. 2.2.3). - -We implemented a ``project'' system in Davos to protect against the -above scenario. By default, importing Davos creates a new project -folder in the user's home directory (contained within a hidden \texttt{.davos} -folder). The default project name is computed to uniquely identify each -notebook according to its filename and path. Any packages that were not -originally available in the notebook's runtime environment are installed to the -notebook's project directory. When external libraries are \texttt{smuggle}d, -Davos temporally appends the current project directory to the search -path. Because the user's system path remains unchanged, and because none of the -runtime environment's packages are altered, the user's system and runtime -environment remain unaffected (aside from installing the Davos package -itself to the runtime environment). - -Each notebook's project may be customized by setting \texttt{davos.project} to -any string that can be used as a valid folder name in the user's operating -system. By customizing the project name, users can build multi-notebook -projects that share the same core set of dependencies without needing to -duplicate each package for each notebook in the project. - -Finally, if the user \textit{does} wish to modify their runtime environment, -this may be done by setting \texttt{davos.project} to \texttt{None}. Doing so -will cause any packages installed by Davos to affect the user's -runtime environment. This is generally not recommended, as it can lead to -unintended consequences for other code that shares the runtime environment. +Standard approaches to installing packages from within a notebook can alter the local Python environment in potentially unexpected and undesired ways. +For example, running a notebook that installs its dependencies via system shell commands (prefixed with ``\texttt{!}'') or IPython magic commands (prefixed with ``\texttt{\%}'') may cause other existing packages in the user's environment to be uninstalled and replaced with alternate versions. +This can lead to incompatibilities between installed packages, affect the behavior of the user's other scripts or notebooks, or even interfere with system applications. + +To prevent Davos-enhanced notebooks from having unwanted side effects on the user's environment, any packages installed via \texttt{smuggle} statements are automatically isolated using a custom, virtual environment-like system called ``projects.'' +Davos projects are similar to standard Python virtual environments (e.g., created with the standard library's \texttt{venv} module or a third-party tool like \texttt{virtualenv}~\cite{BickEtal07}) but with a few noteworthy differences that make them generally lighter-weight and simpler to use. +Like a standard virtual environment, a Davos project consists of a directory (within a hidden \texttt{.davos} folder in the user's home directory) that houses third-party packages needed for a particular Python project, workflow, or task. +However, unlike standard virtual environments, Davos projects do not need to be manually created, activated, or deactivated, and function to \textit{extend} the user's existing Python environment rather than replace it. + +When Davos is imported into a notebook, a project directory for that notebook is automatically created (if it does not exist already). +When \texttt{smuggle} statements within that notebook are then executed, any packages (or specific versions of packages) that are not already available in the user's Python environment are installed into the notebook's project directory (along with any missing dependencies of those packages). +During each \texttt{smuggle} statement's execution, Davos also temporarily prepends the notebook's project directory to the module search path so that these project-installed packages are visible when searching for smuggled packages locally, and prioritized over those in the user's main environment. + +Thus, rather than constructing fully separate Python environments from scratch, Davos projects work by supplementing the user's existing environment with any additional packages (or specific package versions) needed to satisfy the dependencies of their corresponding notebooks. +In some cases, this might include every package smuggled into a notebook (e.g., if the notebook is run inside a freshly created, empty virtual environment). +In other cases, the user's environment may already provide all required packages, and the notebook's project directory will go unused (in which case it will be deleted automatically when the notebook kernel is shut down). +But regardless of the extent to which the existing environment is augmented, Davos's project system ensures that all smuggled packages are installed locally and loaded successfully at runtime, while the contents of the user's Python environment are never altered. + +Additionally, because \texttt{smuggle} statements in a given notebook are evaluated every time it is run, this system also ensures that the notebook's requirements will remain satisfied even if the user's Python environment changes. +For example, suppose a user has \texttt{NumPy}~\cite{HarrEtal20} v1.24.3 installed in their current Python environment and runs a Davos-enhanced notebook that smuggles \texttt{NumPy} with ``\texttt{numpy==1.24.3}'' specified in an onion comment (see Sec.~\ref{subsec:onion}). +Since the user's existing version of the package satisfies this requirement, Davos will happily load it into the notebook. +But if the user later upgrades their environment's \texttt{NumPy} version to v1.25.0 (perhaps as a result of installing a different package that depends on it) and subsequently re-runs this notebook, the local version will longer satisfy this requirement, so Davos will install \texttt{NumPy} v1.24.3 into the notebook's project directory and load that version instead. +From then on, any further changes to the user's \texttt{Numpy} installation would have no effect on Davos's behavior in this particular notebook, as a satisfactory version now exists in its project directory. +(If the version specified in the onion comment were changed, Davos would update the version installed in the project directory accordingly.) +For efficiency, Davos projects will generally not duplicate dependencies already satisfied by the user's Python environment. +However, if desired, adding \texttt{pip}'s \texttt{--ignore-installed} flag to an onion comment in the notebook will cause Davos to install the smuggled package into the project directory whether or not it already exists locally. + +By default, each Davos-enhanced notebook will create and use its own notebook-specific project named for the absolute path to the notebook file. +However, before smuggling its required packages, a notebook may be set to instead use an arbitrarily named, notebook-agnostic project by assigning any (non-empty) string to \texttt{davos.project} (see Sec.~\ref{subsec:config}). +This provides a convenient way for multiple related notebooks that share a common set of requirements to use the same Davos project, by setting \texttt{davos.project} to the same string in each one. +It is also possible (though typically not recommended) to disable Davos's project system entirely and install smuggled packages directly into the user's Python environment by setting \texttt{davos.project} to \texttt{None}. + +When accessed (unless its value has been set to \texttt{None}), \texttt{davos.project} will return a \texttt{Project} object that represents the project used by the current notebook (strings assigned to \texttt{davos.project} are converted to \texttt{Project}s internally). This object supports methods for interacting with the current project, including locating its directory on the file system, listing all installed packages' names and versions, changing the project's name, and deleting its contents altogether. +\texttt{Project} instances can also be created and managed programmatically, and Davos provides additional utilities for viewing and working with all existing projects (see Secs.~\ref{subsec:config} and \ref{subsec:toplevel}). \subsubsection{Configuring and querying Davos}\label{subsec:config} -Davos's behavior may be customized by modifying a set of attributes attached to -the \texttt{davos} module object that is added to the workspace when Davos is -imported. These attributes may be modified, displayed, or checked -programmatically at runtime (see Sec.~\ref{sec:illustrative-example} for an -illustrative example or Sec.~\ref{subsec:implementation} for implementation -details and additional information). These include: +After importing Davos into a notebook, the top-level \texttt{davos} module exposes a set of attributes whose values determine various aspects of Davos's behavior. +The majority of these are writeable options that can be modified to customize how, where, and when Davos installs smuggled packages (see Sec.~\ref{sec:illustrative-example} for an illustrative example). +These include: \begin{itemize} \item \texttt{.active}: This attribute controls whether support for \texttt{smuggle} statements and onion comments is enabled (\texttt{True}) or disabled (\texttt{False}). When Davos is first imported, - the \texttt{.active} attribute is set to \texttt{True}. + \texttt{davos.active} is set to \texttt{True} (see Sec.~\ref{subsec:implementation} for implementation details and additional information). \item \texttt{.auto\_rerun}: This attribute controls how Davos behaves when attempting to \texttt{smuggle} a new - version of a package that was previously imported and cannot be + version of a package that was previously loaded (via an \texttt{import} or \texttt{smuggle} statement) and cannot be reloaded. This can happen if the package includes extension modules that dynamically link C or C++ objects to the Python interpreter, and the code that generates those objects was changed between the - previously imported and to-be-smuggled versions. If this attribute + previously loaded and to-be-smuggled versions. If this attribute is set to \texttt{True}, Davos will automatically restart - the notebook kernel and rerun all code up to (and including) the + the notebook kernel and re-run all code up to (and including) the current \texttt{smuggle} statement. If set to \texttt{False} (the default), Davos will instead issue a warning, pause execution, and - prompt the user to either restart and rerun the notebook, or - continue running with the previously imported package version until + prompt the user to either restart and re-run the notebook, or + continue running with the previously loaded package version until the next time the kernel is restarted manually. Note that, as of - this writing, the \texttt{.auto\_rerun} attribute is not supported - in Google Colaboratory notebooks. + this writing, setting \texttt{davos.auto\_rerun} to \texttt{True} is not + supported in Google Colaboratory notebooks. \item \texttt{.confirm\_install}: If set to \texttt{True} (default: \texttt{False}), Davos will require user confirmation - before installing a smuggled package that does not yet exist in the - user's environment. + before installing a smuggled package that is not already + available locally. This is primarily useful if the user has disabled + Davos's ``project'' system for isolating smuggled packages (see + Sec.~\ref{subsec:projects}) but still wants to carefully control what + packages are installed into their main Python environment. \item \texttt{.noninteractive}: Setting this attribute to \texttt{True} (default: \texttt{False}) enables non-in\-ter\-act\-ive @@ -525,47 +474,90 @@ \subsubsection{Configuring and querying Davos}\label{subsec:config} \item \texttt{.pip\_executable}: This attribute's value specifies the path to the \texttt{pip} executable used to install smuggled - packages. The default is programmatically determined from the Python - environment and falls back to \texttt{sys.executable -m pip} if no + packages. The default is programmatically determined from the user's Python + environment and falls back to \texttt{ -m pip} if no executable can be found. +\item \texttt{.project}: This attribute's value is a \texttt{Project} instance representing the Davos project associated with the current notebook. + As described in Section~\ref{subsec:projects}, Davos projects serve to isolate packages installed by \texttt{smuggle} statements from the user's main Python environment, and the \texttt{Project} class provides an interface for inspecting and managing projects at runtime. + This attribute's default value is a notebook-specific project named for the absolute path to the notebook file. + To change the project used in the current notebook (e.g., in order to use the same project in multiple related notebooks), this attribute may be assigned a different \texttt{Project} instance or, for simplicity, the name of the desired project as a string or \texttt{pathlib.Path} (either of which will be converted to a \texttt{Project} on assignment). + Alternatively, setting \texttt{davos.project} to \texttt{None} will disable project-based isolation for the current notebook and cause Davos to install any missing packages directly into the main Python environment. + This attribute can be reset to its default value using the top-level \texttt{use\_default\_project()} function (see Sec.~\ref{subsec:toplevel}). + For more information about Davos projects, see Section \ref{subsec:projects}. + \item \texttt{.suppress\_stdout}: If this attribute is set to \texttt{True} (default: \texttt{False}), Davos suppresses printed (console) outputs from both itself and the installer program. - This can be useful when smuggling packages that need to install many - dependencies and/or generate extensive output. However, if the installer - program throws an error, both its stdout and stderr streams will be + This can be useful when smuggling packages that require installing many + dependencies and/or generate extensive output when built from source + distributions. Note that if this option is enabled and the installer + program throws an error, both its stdout and stderr streams will still be displayed alongside the Python traceback to allow for debugging. -\item \texttt{.project}: \textcolor{red}{\textbf{TODO: fix this}} This attribute is a string that specifies the name of -the ``project'' associated with the current notebook. As described in -Section~\ref{subsec:projects}, a notebook's project determines where and how -any \texttt{smuggle}d dependencies are installed if they are not available in -the current runtime environment. By default, this attribute is named according -to the current notebook's absolute file path. However, the project name may be -customized to enable shared dependency installations across notebooks (see -Sec.~\ref{subsec:projects}). - \end{itemize} -\noindent Davos namespace also defines the \texttt{davos.configure()} function, -which allows setting multiple configuration options simultaneously. In addition -to the above configurable attributes, the \texttt{davos} object also includes -several read-only attributes that contain potentially useful information about -the current environment or Davos's behavior: +\noindent The attributes above can be modified directly or via the \texttt{davos.configure()} function, which allows setting multiple options simultaneously (see Sec.~\ref{subsec:toplevel} for more information or Sec.~\ref{sec:illustrative-example} for example usage). +In addition to these writeable options, the top-level \texttt{davos} module also provides several read-only attributes that can be displayed in the notebook or checked programmatically at runtime, and contain potentially useful information about the notebook environment or Davos's internal state: \begin{itemize} -\item \texttt{.environment}: This attribute's value is a string describing the notebook -environment Davos was imported into. As of the current version (0.2.0), this -attribute will be set to either \texttt{“IPython<7.0"}, \texttt{“IPython>=7.0”}, or \texttt{“Colaboratory”}. +\item \texttt{.all\_projects}: This attribute contains a list of all Davos projects that exist on the user's local system (see Sec.~\ref{subsec:projects} for more information about Davos projects). + Each item in this list is either a \texttt{Project} or \texttt{AbstractProject} instance. + \texttt{AbstractProject}s represent notebook-specific projects whose associated notebooks no longer exist. + They support all the same functionality as \texttt{Project} objects (including methods for inspecting, renaming, and deleting them) and serve primarily to help users identify and clean up extraneous projects left behind after deleting Davos-enhanced notebooks (e.g., see Sec.~\ref{subsec:toplevel}). + +\item \texttt{.environment}: This attribute's value is a string denoting the set of environment-dependent ``helper functions'' used by Davos in the current notebook. + As described in Section \ref{sec:architecture}, Davos internally chooses between interchangeable implementations of certain core features based on various properties of the notebook's frontend and IPython kernel. + As of this writing, three unique combinations of helper functions are required to support existing notebook environments, ergo this attribute has three possible values: \texttt{"IPython<7.0"}, \texttt{"IPython>=7.0"}, or \texttt{"Colaboratory"}. + However, this attribute could take on additional values in the future, as new notebook interfaces are created and IPython's internals are updated, and additional versions of helper functions are added to Davos to support them. + +\item \texttt{.ipython\_shell}: This attribute contains the global IPython \texttt{InteractiveShell} instance underlying the notebook kernel session. - \item \texttt{.ipython\_shell}: This attribute contains the global IPython \texttt{InteractiveShell} instance underlying the notebook kernel session. +\item \texttt{.smuggled}: This attribute's value is a Python dictionary that functions as a cache of \texttt{smuggle} statements executed during the current notebook kernel session. + The dictionary's keys are names of smuggled packages, and its values are arguments passed to the installer program via onion comments. + Entries appear in order of the \texttt{smuggle} statements' execution. + +\end{itemize} + +\noindent The current values of all \texttt{davos} attributes may be viewed at once within a notebook by displaying the \texttt{davos.config} object. - \item \texttt{.smuggled}: This attribute is set to a Python dictionary that functions as a cache of any \texttt{smuggle} commands run during the current -session. The dictionary's keys are package names and the values are arguments passed via the corresponding smuggle statement's onion comment. - \item \texttt{.all\_projects}: This attribute contains a list of all local projects (i.e., projects with virtual environment directories located in \texttt{\$HOME/.davos/projects}). See Section~\ref{subsec:projects} for additional information about Davos projects. +\subsubsection{Other top-level Davos functions}\label{subsec:toplevel} + +The Davos package also provides a handful of functions available in the top-level \texttt{davos} namespace. +Some of these functions serve primarily as conveniences, while others provide additional functionality: + +\begin{itemize} + +\item \texttt{configure(**kwargs)}: This function provides an alternate way of assigning values to the writeable attributes listed in Section \ref{subsec:config} and can be used to configure multiple options at once (see Sec.~\ref{sec:illustrative-example} for example usage). + The function accepts attribute names as keyword-only arguments to which their desired values are passed. + If any of the options passed are incompatible (e.g., both \texttt{confirm\_install=True} and \texttt{noninteractive=True} are passed) or assignment to any of the specified attributes fails for any reason, none of the given options will be modified. + +\item \texttt{get\_project(project\_name, create=False)}: This function can be passed the name of a Davos project (\texttt{project\_name}) to get the \texttt{Project} or \texttt{AbstractProject} instance representing it. + The optional \texttt{create} argument determines the function's behavior when no project with the given name exists: if \texttt{create=False} (the default), the function will return \texttt{None}; if \texttt{create=True}, a project with the given name will be created and returned. + +\item \texttt{prune\_projects(yes=False)}: This function allows users to quickly ``clean up'' their local Davos projects by deleting notebook-specific projects whose corresponding notebooks no longer exist (i.e., \texttt{AbstractProject}s). + As with standard virtual environments, periodically removing unused project directories can be useful for reclaiming disk space from dependencies of code that is no longer in use. + By default, this function will interactively display a list of all unused projects and allow the user to choose whether or not to delete each one. + Alternatively, passing \texttt{yes=True} will immediately remove all unused projects without prompting for confirmation. + Note that if Davos's non-interactive mode is enabled (see Sec.~\ref{subsec:config}), \texttt{yes=True} must be explicitly passed, otherwise the function will raise an exception. + This serves as a safeguard against accidentally deleting projects since non-interactive mode disables all user input and confirmation. + Also note that this function will not delete notebook-agnostic projects (i.e., manually created projects whose names are not notebook filepaths), as they are not linked to specific notebooks whose existence determines whether or not they are still needed. + These (and any) projects may be deleted individually by calling their \texttt{Project} objects' \texttt{.remove()} method. + +\item \texttt{require\_python(version\_spec, warn=False, extra\_msg=None, prereleases=None)}: Through \texttt{smuggle} statements and onion comments, Davos can automatically ensure that all Python packages needed to run a notebook are installed, and that the same versions of those packages are used no matter when or by whom the notebook is run. + However, because Davos operates at runtime, one thing it cannot do automatically is install and switch to a specific version of Python itself. + Distributing shared code along with a precise Python version for running it requires a heavier-weight solution, such as a Conda environment or Docker container (see Fig.~\ref{fig:code-sharing}). + Yet a Davos-enhanced notebook may still \texttt{smuggle} certain packages that depend on users having a particular Python version or range of versions (e.g., even just within the standard library, the \texttt{dataclass} module was first added in Python 3.7 \cite{Smit17} and at least 19 modules are slated for removal in Python 3.13 \cite{HeimCann19}). + The \texttt{davos.require\_python()} function can be added to the top of a Davos-enhanced notebook to communicate to users that the notebook's code should be run with a specific or constrained Python version (see Sec.~\ref{sec:illustrative-example} for example usage). + The function may be passed a version identifier (e.g., \texttt{"3.10.5"}) or any valid version specifier \cite{CoghStuf13} (e.g., \texttt{"\raisebox{0.5ex}{\texttildelow}=3.11"}, \texttt{">=3.9;<3.12"}, etc.) and will raise an exception if the user's Python version is incompatible. + Alternatively, a ``soft'' or suggested constraint can be imposed by passing \texttt{warn=True} to issue a warning rather than raise an error. + Additional information can be added to the default error/warning message (e.g., the specific reason for this requirement) via the \texttt{extra\_msg} argument, and the optional \texttt{prereleases} argument can be used to explicitly allow (\texttt{True}) or disallow (\texttt{False}) pre-release versions (by default, the policy is determined by the value of \texttt{version\_spec}). + +\item \texttt{use\_default\_project()}: By default, each Davos-enhanced notebook will create and use a notebook-specific project named based on its absolute path. + If a user manually changes the project used by the current notebook (i.e., by setting the value of the \texttt{davos.project} attribute; see Sec.~\ref{subsec:config}), this function can be called to switch back to using the notebook's default project and reset \texttt{davos.project} to its default value. + See Section \ref{subsec:projects} for more information about Davos projects and Section \ref{sec:illustrative-example} for an illustrative example. \end{itemize} @@ -584,8 +576,8 @@ \subsection{Implementation details}\label{subsec:implementation} IPython preprocesses all executed code as plain text before it is sent to the Python compiler, in order to handle special constructs like -\texttt{\%magic} and \texttt{!shell} commands. Davos uses -this process to transform \texttt{smuggle} statements into +\texttt{!}-prefixed shell commands and \texttt{\%}-prefixed ``magic'' commands. Davos uses +this same process to invisibly transform \texttt{smuggle} statements into syntactically valid Python code. The Davos parser uses a regular expression to match lines of code containing \texttt{smuggle} statements (and, optionally, onion comments), extract relevant @@ -611,23 +603,21 @@ \subsection{Implementation details}\label{subsec:implementation} overwritten and no longer refers to the \texttt{smuggle()} function. It will also deregister the Davos parser from the set of input transformers run when each notebook cell is -executed. While the overhead added by the Davos parser is -minimal, this may be useful, for example, when optimizing or precisely -profiling code. +executed. \begin{figure}[tp] \centering \includegraphics[width=\textwidth]{figs/flow_chart} \caption{\small \textbf{\texttt{smuggle()} function algorithm.} At a high level, the \texttt{smuggle()} function may be conceptualized as -following two basic steps. First (left), Davos ensures that the -correct version of the desired package has been installed, carrying -out the installation automatically if needed. Second (right), -Davos imports the package and updates the current runtime environment.} + following two basic steps. First (left), Davos ensures that the + correct version of the desired package is available locally, installing + it automatically (into the notebook's project directory) if needed. Second (right), + Davos loads the package into the notebook and updates the current + runtime environment.} \label{fig:flow-chart} \end{figure} - \section{Illustrative Example}\label{sec:illustrative-example} \begin{figure}[tp] @@ -639,164 +629,202 @@ \section{Illustrative Example}\label{sec:illustrative-example} \label{fig:illustrative-example} \end{figure} -Across different versions of a given package, particular modules, functions, +%The example code throughout Section \ref{subsec:onion} illustrates how Davos is most typically used: +%By including a series of \texttt{smuggle} statements and onion comments with version specifiers or other options in an IPython notebook, researchers can share their code and its dependencies in a single file that can be easily run without any additional tools or setup, creates and manages its own isolated environment, automatically installs its required packages at runtime, and ensures that the package versions with which it is run do not change unexpectedly. +The example code throughout Section \ref{subsec:onion} illustrates how Davos is most typically used: a series of smuggle statements and onion comments with version specifiers or other options collectively describes and automatically constructs a reproducible environment for running the code that follows it. +When added to the top of a Jupyter notebook, this allows researchers to bundle their code and its dependencies into a single file that can be easily shared and run without any additional tools or setup, automatically installs its required packages at runtime, isolates them from the user's main Python environment, and ensures their versions do not change unexpectedly over time. +In this section, we have contrived a more complex scenario to highlight some of Davos's more advanced features, and illustrate how they may be used to handle certain challenges that can arise when writing, running, and sharing reproducible scientific code. + +Across different versions of a given package, various modules, functions, and other objects may be updated, removed, renamed, or otherwise altered. In addition to changing the behaviors of active computations, these changes can render saved objects created using one version of a package incompatible with other versions of the same package. For example, the popular -\texttt{pandas}~\cite{McKi10} library used to include the \texttt{Panel} data -structure for storing 3-dimensional arrays. Since version 0.20.0, however, the -\texttt{Panel} class has been deprecated, and in version 0.25.0, it was removed +\texttt{pandas}~\cite{McKi10} library originally included the \texttt{Panel} data +structure for storing 3-dimensional arrays. In version 0.20.0, however, the +\texttt{Panel} class was deprecated, and in version 0.25.0, it was removed entirely. Suppose a user had a dataset stored in a \texttt{Panel} object (created using an older version of \texttt{pandas}) and had saved it to their disk (e.g., for later reuse or to share with other users) by serializing the \texttt{Panel} with Python's \texttt{pickle} protocol. The \texttt{pickle} -protocol is a popular built-in method of persisting data in Python, allowing +protocol is a popular built-in method of persisting data in Python that allows users to save, share, and load arbitrary objects. However, in order to -successfully ``unpickle'' (i.e., load and restore) a ``pickled'' (i.e., saved) -object, the object's class must be defined in and importable from the same -module as when it was saved. Thus, because of the \texttt{Panel} class's +successfully ``unpickle'' (i.e., load and restore) a ``pickled'' (i.e., previously saved) +object, that object's class must be defined in and importable from the same +module as it was when the object was originally saved. Thus, because of the \texttt{Panel} class's removal, the user's dataset could not be read by any version of \texttt{pandas} -from 0.25.0 or beyond. These incompatibilities are also not limited solely to +from 0.25.0 onward. These incompatibilities are also not limited solely to traditional forms of data. For example, saved model states and other objects may reference modules, functions, attributes, classes, or other objects that may not be identical (or even present) across all versions of their associated -package. +packages. The example provided in Figure~\ref{fig:illustrative-example} demonstrates how -the Davos package can be used to circumvent these incompatibilities by -carefully controlling which versions of each package are used in different -parts of the notebook. The example shows how a dataset and model that require +Davos can be used to circumvent these incompatibilities by +temporarily switching between different versions of the same package within a single runtime. +%carefully controlling which versions of each package are used in different parts of the notebook. +The example shows how a dataset and model that require now-incompatible components of the \texttt{pandas} and -\texttt{scikit-learn}~\cite{PedrEtal11} packages may be loaded in (using older +\texttt{scikit-learn}~\cite{PedrEtal11} libraries can be loaded in (using older versions of each package) and used alongside more recent versions of each package that provide new and improved functionality. When included at the top of a Jupyter notebook, the code in Figure~\ref{fig:illustrative-example} ensures that these objects will be loaded successfully and analyzed using the -same set of package versions, no matter when or by whom the notebook is run. +same set of package versions no matter when or by whom the notebook is run. -After installing and importing Davos (lines 1--2), we first \texttt{smuggle} two +After installing and importing Davos (lines 1--2), we first use the \texttt{davos.require\_python()} function to constrain the Python version used to run the notebook (see Sec.~\ref{subsec:toplevel}). +As described above, the example code in Figure \ref{fig:illustrative-example} loads two different versions of the \texttt{pandas} library: first, an older version needed to access a dataset saved in an outmoded format, then a newer one to use throughout the remainder of the notebook. +We therefore want to make sure upfront (in line 6) that the notebook's Python version falls within the range of versions that both of these two versions of \texttt{pandas} support. +%Line 6 therefore ensures upfront that the notebook's Python version falls within the overlap between the ranges of Python versions that these two versions of \texttt{pandas} support. +If it does not, the function in line 6 will raise an error that includes a message to this effect (lines 4--5). +\begin{center} +\includegraphics[width=0.9\textwidth]{figs/example1} +\end{center} + +Next, in lines 8--9, we \texttt{smuggle} two utilities for interacting with local files in the code below. The -\texttt{smuggle} statement in line 4 loads the \texttt{is\_file()} +\texttt{smuggle} statement in line 8 loads the \texttt{is\_file()} function from the Python standard library's \texttt{os.path} module. Standard library modules are included with all Python distributions, so this line is functionally equivalent to an \texttt{import} statement and does not need or benefit from an onion -comment. Line 5 loads the \texttt{joblib} package~\cite{Varo10}, -installing it first, if necessary. Since \texttt{joblib}'s I/O +comment (since there is no chance the module will need to be installed). +Line 9 then loads the \texttt{joblib} package~\cite{Varo10}, +installing it into the notebook's project directory if necessary. Since \texttt{joblib}'s I/O interface has historically remained stable and backwards-compatible -across releases, requiring that users have a particular exact version -installed would likely be unnecessarily restrictive. However, a -\textit{future} release might introduce some breaking change. The -onion comment in line 5 helps ensure the analysis notebook continues +across releases, requiring a particular exact version +would likely be unnecessarily restrictive. However, it is possible a +\textit{future} release could introduce some breaking change. The +onion comment in line 9 helps ensure that the analysis notebook will continue to run properly in the future by limiting allowable versions to those already released when the code was written: \begin{center} -\includegraphics[width=0.9\textwidth]{figs/example1} +\includegraphics[width=0.9\textwidth]{figs/example2} \end{center} -Line 7 then uses the \texttt{davos.config} object to enable -Davos's \texttt{auto\_rerun} option before smuggling the next -two packages: \texttt{NumPy}~\cite{HarrEtal20} and +It is worth noting, however, that beyond illustrative purposes, the benefit of specifying only a maximum version for \texttt{joblib} rather than an exact version is relatively minor. +The main advantage to relaxing a version constraint in an onion comment (when a package's behavior does not differ meaningfully between versions) is that doing so increases the likelihood that a satisfactory version will already be available in the user's Python environment, and therefore Davos will not need to install a new copy in the notebook's project directory. +For large packages, this can be a worthwhile consideration; however \texttt{joblib} is very lightweight---less than 0.5 MB pre-built, with no required dependencies. +Thus a more conservative approach that guarantees an exact version is used would also be reasonable in this case. + +Line 11 then enables +Davos's \texttt{auto\_rerun} option (see Sec.~\ref{subsec:config}) before smuggling the next +two packages: \texttt{NumPy} and \texttt{pandas}. Because these packages rely heavily on custom C data -types, loading the particular versions from the onion comments may -require restarting the notebook kernel if different versions had been previously -imported during the same interpreter session (see -Sec.~\ref{subsec:config}). +types, loading the particular versions specified in their onion comments may +require restarting the notebook kernel if different versions were previously +imported during the same interpreter session---including internally by other packages. +Enabling \texttt{auto\_rerun} allows Davos to handle kernel restarts automatically and continue running the code seamlessly without user intervention. \begin{center} -\includegraphics[width=0.9\textwidth]{figs/example2} +\includegraphics[width=0.9\textwidth]{figs/example3} \end{center} -Setting the \texttt{auto\_rerun} attribute to \texttt{True} is particularly useful -for managing the installation of \texttt{pandas} in the next -lines: +In the case of \texttt{NumPy}, whether or not a kernel restart is necessary will depend on the user's existing Python environment. +The \texttt{joblib} package has an optional dependency on \texttt{NumPy} for memoizing and parallelizing array operations, and will \texttt{import numpy} internally to enable these features if the package is available. +If the user already has \texttt{NumPy} installed in their Python environment when \texttt{joblib} is smuggled in line 9, their installed version is different from the one specified in the onion comment on line 12, and there were changes made to \texttt{NumPy}'s C extensions between those two versions, then Davos will automatically restart the kernel and re-run the lines above. +The newly smuggled version would then be used both in the notebook itself and by \texttt{joblib} internally. +% (Note that outside the context of an illustrative example, one could avoid a kernel restart here altogether simply by smuggling \texttt{NumPy} before \texttt{joblib}.) + +The primary reason for enabling the \texttt{auto\_rerun} option, however, is to manage the installation of \texttt{pandas} in the next set of lines: \begin{center} -\includegraphics[width=0.9\textwidth]{figs/example3} +\includegraphics[width=0.9\textwidth]{figs/example4} \end{center} -If we suppose that the data contained in \texttt{data-old.pkl} is +If we suppose that the ``\texttt{data-old.pkl}'' file contains a dataset stored in a pickled \texttt{Panel} object, then we must use a version of -\texttt{pandas} prior to 0.25.0 (i.e., the version in which the \texttt{Panel} -class was removed) to be able to load it in. Line 11 ensures -that an older version of \texttt{pandas} will be imported, enabling -the data to be read in (and, in line 13, written to a CSV -file, which is compatible with newer \texttt{pandas} versions). +\texttt{pandas} prior to v0.25.0 (i.e., the version in which the \texttt{Panel} +class was removed) to be able to read it. Line 15 ensures +that a sufficiently old version of \texttt{pandas} will be imported, enabling +the data to be successfully loaded in line 16 and (in line 17) written to a CSV +file, which can be read by any \texttt{pandas} version. Newer versions of \texttt{pandas} have brought substantial improvements -including better performance, bug fixes, and additional functionality. Although +including performance enhancements, bug fixes, and additional functionality. Although the original dataset had to be read in using an older version of the package, we can take advantage of these more recent updates by smuggling \texttt{pandas} -a second time on line 15 (whose onion comment specifies that version 1.3.5 -should be installed and loaded). Since a different version of \texttt{pandas} -had already been loaded by the Python interpreter (on line 11), the notebook -kernel must be restarted in order to replace the old version's custom C -extensions with those from the new version. The \texttt{auto\_rerun} flag set -on line 7 enables Davos to trigger this process automatically so that -the code can continue running without user intervention, and converting the -dataset to a CSV file in lines 10--13 ensures that the older version of -\texttt{pandas} does not need to be reinstalled. - -Next, line 17 uses the \texttt{davos.configure()} function to disable +a second time in line 19 (whose onion comment specifies that version 1.3.5 +should be installed and loaded). Since a different \texttt{pandas} version +has already been loaded by the Python interpreter (line 15) and there have been +substantial changes to the library (including its extension modules) +between that version and v1.3.5, the notebook +kernel must be restarted in order to fully unload the old version in favor of +the new one. +When Davos automatically does so and re-runs the code above, having now converted the dataset to a CSV file means the old version does not need to be reinstalled (line 14). + +Next, line 21 uses the \texttt{davos.configure()} function to disable the \texttt{auto\_rerun} option and simultaneously enable two other options: \texttt{suppress\_stdout} and \texttt{noninteractive}. With -these options enabled, lines 18--19 \texttt{smuggle} +these options enabled, lines 22--23 \texttt{smuggle} \texttt{TensorFlow}~\cite{AbadEtal15}, a powerful end-to-end platform for building and working with machine learning models, and \texttt{UMAP}~\cite{McInEtal18}, a package that implements a family -of related manifold learning techniques. The onion comment in line 19 +of related manifold learning techniques. The onion comment in line 23 also specifies that \texttt{UMAP} should be installed with the optional requirements needed for its ``plot'' and ``parametric\_umap'' features. Together, these two packages depend on 36 other unique -packages, most of which have dependencies of their own. And if many of +packages, most of which have dependencies of their own. If many of these are not already installed in the user's environment, lines -18--19 could take several minutes to run. Enabling the +22--23 could take several minutes to run. Enabling the \texttt{noninteractive} option ensures that the installation will continue automatically without user input during that time. Enabling \texttt{suppress\_stdout} also suppresses console outputs while installing these packages and their many dependencies to prevent other potentially important outputs from being buried. \begin{center} -\includegraphics[width=0.9\textwidth]{figs/example4} +\includegraphics[width=0.9\textwidth]{figs/example5} \end{center} -After re-enabling these two options (line 20), we next \texttt{smuggle} +After re-enabling these two options (line 24), we next \texttt{smuggle} specific versions of three plotting packages: \texttt{Matplotlib}~\cite{Hunt07}, \texttt{seaborn}~\cite{Wask21}, and -\texttt{Quail}~\cite{HeusEtal17} (lines 22--24). Because the first two +\texttt{Quail}~\cite{HeusEtal17} (lines 26--28). Because the first two are requirements of \texttt{UMAP}'s optional ``plot'' feature, they -will have already been installed by line 19, though possibly as +will have already been installed (if necessary) by line 23, though possibly as different versions than those specified in the onion comments on lines -22 and 23. If the installed and specified versions are the same, these +26 and 28. If the installed and specified versions are the same, these \texttt{smuggle} statements will function like standard \texttt{import} -statements to load the packages into the notebook namespace. If they +statements to load the packages into the notebook's namespace. If they differ, Davos will download the requested versions in place -of the installed versions before doing so. +of the installed versions, ensuring that they are used both in the notebook itself and by \texttt{UMAP} internally. \begin{center} -\includegraphics[width=0.9\textwidth]{figs/example5} +\includegraphics[width=0.9\textwidth]{figs/example6} \end{center} -Line 24 uses an onion comment to specify that \texttt{Quail} should be -installed directly from a specific GitHub commit (\texttt{6c847a4}). -This ability to load packages directly from GitHub repositories can -enable developers to more easily use forked or modified versions of other -packages in their notebooks, even if those versions have not been -officially released. - -In lines 26--29, we demonstrate another aspect of Davos's -functionality that supports more advanced installation scenarios. The -\texttt{ipywidgets}~\cite{FredEtal15} package provides an API for -creating various JavaScript widgets with Python code, and the \texttt{widgetsnbextension} package provides -the machinery needed by the notebook frontend to display them. +The onion comment in line 28 specifies that \texttt{Quail} should be +installed from a fork of its GitHub repository (\texttt{myfork}), in its state as of a specific commit (\texttt{6c847a4}). +This ability to load packages directly from remote (or local) Git repositories can +enable developers to more easily use forked or customized versions of other +packages in their code, even if those versions have not been +officially released. Targeting specific VCS references (e.g., commits, tags, etc.) can also provide even finer-grained control over smuggled package versions than is possible with traditional version specifiers. + +In lines 30--37, we demonstrate another aspect of Davos's functionality that supports more advanced installation scenarios. +%The \texttt{ipywidgets}~\cite{FredEtal15} package (also known as Jupyter Widgets) provides a Python API for creating various JavaScript widgets within a notebook, and the \texttt{widgetsnbextension} package provides the JavaScript machinery needed by the the notebook frontend to display them. +The \texttt{ipywidgets}~\cite{FredEtal15} package (also known as Jupyter Widgets) provides a Python API for creating interactive JavaScript widgets within a notebook. +It depends on the \texttt{widgetsnbextension} package, which provides the JavaScript machinery needed by the notebook frontend to display these widgets. +%A complication is that, while \texttt{ipywidgets} must be installed in a location that is accessible from the IPython kernel (i.e., the Python runtime of the notebook itself), \texttt{widgetsnbextension} must be made accessible to the Jupyter notebook server, which is a separate Python runtime. +%A complication is that, while \texttt{ipywidgets} must be installed in an environment accessible from the IPython kernel (i.e., importable into the notebook itself), \texttt{widgetsnbextension} must be installed in the environment that houses the Jupyter notebook server. +A complication is that \texttt{ipywidgets} must be installed in a location that is accessible from the IPython kernel (i.e., the Python runtime within the notebook itself), while \texttt{widgetsnbextension} must be installed in the environment that houses the Jupyter notebook server (a separate Python runtime that serves and manages the notebook frontend client). +In many basic setups, the IPython kernel and notebook server exist in the same environment. +However, a common ``advanced'' approach entails running the notebook server from a base environment, with additional environments each providing their own separate, interchangeable IPython kernels. + +Lines 30--37 account for both of these possibilities programmatically: \begin{center} -\includegraphics[width=0.9\textwidth]{figs/example6} +\includegraphics[width=0.9\textwidth]{figs/example7} \end{center} -A complication is that \texttt{ipywidgets} must be installed in the -same environment as the IPython kernel, whereas -\texttt{widgetsnbextension} must be installed in the environment that -houses the Jupyter notebook server. In many basic setups, these two -environments are the same. However, a common ``advanced'' approach -entails running the notebook server from a base environment, with -additional environments each providing their own separate, -interchangeable IPython kernels. To accommodate this multi-environment -scenario, on lines 26 and 28, we use the \texttt{pip\_executable} option to control which environments each -package should be installed to. Once these two packages are installed -and imported, line 31 smuggles \texttt{tqdm}~\cite{daCoEtal22}, which -display progress bars to provide status updates for running code. In +First, in line 30, we set the \texttt{davos.project} attribute to \texttt{None} to temporarily +%disable Davos's project isolation system and +allow installing smuggled packages outside of the notebook's project directory. +As noted in Section \ref{subsec:projects}, this is typically discouraged, as doing so can risk interfering with the user's Python environment if existing package versions are overwritten. +In this particular case, however, a combination of factors make this relatively safe and inconsequential. +First, the package we need to install directly into the notebook server environment (\texttt{widgetsnbextension}) is smuggled without an accompanying onion comment (line 34), meaning that Davos will not replace any version the user may already have installed. +Second, the package has no dependencies of its own, so if Davos does install it, no other packages could potentially be installed or updated as a side effect. +Third, the package itself provides no functionality outside of rendering Jupyter widgets, so its presence would not alter any other code's expected behavior. + +Next, in lines 31--33, we change the \texttt{pip} executable Davos uses to install smuggled packages (see Sec.~\ref{subsec:config}), storing the default executable's path in a variable before doing so. +When Davos's project system is disabled, using a \texttt{pip} executable from a particular Python environment will cause smuggled packages to be installed into (and subsequently loaded from) that environment. +The default \texttt{pip\_executable} will install packages into the environment used to run the IPython kernel. +Here, the new value assigned to \texttt{davos.pip\_executable} in line 33 is the output of running ``\texttt{command -v pip}'' as a \texttt{!}-prefixed IPython system shell command in line 32 (``\texttt{command -v}'' outputs the path to an executable, similar to ``\texttt{which}'' but more portable). +Since IPython system shell command are always executed in the notebook server environment, this command's output will be the path to that environment's \texttt{pip} executable---which may or may not be different from the kernel environment's. + +After smuggling the \texttt{widgetsnbextension} package in line 34, we use the \texttt{davos.use\_default\_project()} function in line 35 to revert to installing package into the notebook's project directory, restore the default value of \texttt{davos.pip\_executable} in line 36, and \texttt{smuggle} the specified version of \texttt{ipywidgets} in line 37. +With these two packages now installed +and imported, line 39 smuggles \texttt{tqdm}~\cite{daCoEtal22}, which +displays progress bars to provide status updates for running code. In Jupyter notebooks, the \texttt{tqdm.notebook} module can be imported to enable more aesthetically pleasing progress bars that are displayed via \texttt{ipywidgets}, if that package is installed and @@ -804,6 +832,8 @@ \section{Illustrative Example}\label{sec:illustrative-example} important to \texttt{smuggle} \texttt{tqdm} after ensuring the \texttt{ipywidgets} package was available. +\stoppedhere + Next, we load in the reformatted dataset (line 33) and pre-trained model (line 35) that we wish to use in our analysis. In our hypothetical example, we can suppose that the model was provided as a @@ -813,11 +843,11 @@ \section{Illustrative Example}\label{sec:illustrative-example} word counts are passed to a topic model~\cite{BleiEtal03} using a pretrained \texttt{LatentDirichletAllocation} instance. \begin{center} -\includegraphics[width=0.9\textwidth]{figs/example7} +\includegraphics[width=0.9\textwidth]{figs/example8} \end{center} Let us suppose that the \texttt{Pipeline} object had been saved by its original creator using the \texttt{joblib} package, as -\texttt{scikit-learn}'s documentation recommends. Because +\texttt{scikit-learn}'s documentation recommends \cite{skle22}. Because \texttt{joblib} uses the \texttt{pickle} protocol internally, the ability to save and load pre-trained models is not guaranteed across different \texttt{scikit-learn} versions. For example, suppose that @@ -841,6 +871,8 @@ \section{Illustrative Example}\label{sec:illustrative-example} \textit{analyze} and manipulate the data and model output using the latest approaches and implementations. +\todo{mention notebook reproducibility, cell order, multiple package versions re: reviewer's comment; note importance of running lines 14--19 \& 38--40 in single cell} + \section{Impact} @@ -891,10 +923,12 @@ \section{Impact} research studies~\citep{MannEtal23a, OwenMann23, ZimaEtal23}, Davos is being used by both students and instructors in programming and methods courses such as Storytelling with Data~\cite{Mann21a} (an open course on data science, -visualization, and communication) and Laboratory in Psychological +visualization, and communication), Laboratory in Psychological Science~\cite{Mann22} (an open course on experimental and statistical methods -for psychology research) to simplify distributing lessons and submitting -assignments, as well as in online demos such as +for psychology research), and the Methods in Neuroscience at Dartmouth (MIND) +Computational Summer School~\cite{MIND23} (a week-long intensive course on +computational neuroscience methods) to simplify distributing lessons +and submitting assignments, as well as in online demos such as \texttt{abstract2paper}~\cite{Mann21b} (an example application of GPT-Neo~\cite{GaoEtal20, BlacEtal21}) to share ready-to-run code that installs dependencies automatically. The 2023 offering of Neuromatch @@ -974,6 +1008,8 @@ \subsection{Pitfalls and limitations} software would therefore need to use existing non-Davos approaches to managing those requirements. +\textcolor{red}{\textbf{TODO: add note about default/fallback project for non-traditional notebook interfaces}} + \section{Conclusions} The Davos package supports reproducible research by providing @@ -1014,7 +1050,11 @@ \section*{Acknowledgements} We acknowledge useful feedback and discussion from the students of JRM's \textit{Storytelling with Data} course (Winter, 2022 offering) -who used preliminary versions of our package in several assignments. +who used preliminary versions of our package in several assignments, +and the students of the Methods in Neuroscience at Dartmouth (MIND) +Computational Summer School (2023 offering) who used our package +during several workshops and tutorials. + \bibliographystyle{elsarticle-num} \bibliography{main} diff --git a/tests/test_core.ipynb b/tests/test_core.ipynb index baad2ff6..4bcc0c9c 100644 --- a/tests/test_core.ipynb +++ b/tests/test_core.ipynb @@ -2680,7 +2680,7 @@ "source": [ "def test_smuggle_davos_raises():\n", " \"\"\"trying to smuggle davos itself should raise an error\"\"\"\n", - " with raises(davos.core.exceptions.TheNightIsDarkAndFullOfTErrors):\n", + " with raises(davos.core.exceptions.TheNightIsDarkAndFullOfErrors):\n", " smuggle davos" ] },