From 045d8e31bc75ee266f70c21a4dbbe281fe21f84e Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Mon, 20 Jan 2025 13:53:18 +0000 Subject: [PATCH 1/2] rework into newer extension style --- nbgitpuller/__init__.py | 78 +------------------------------ nbgitpuller/_compat.py | 40 ---------------- nbgitpuller/application.py | 29 ++++++++++++ nbgitpuller/handlers.py | 11 +++-- nbgitpuller/pull.py | 8 ++-- nbgitpuller/templates/status.html | 2 +- tests/repohelpers.py | 2 +- 7 files changed, 43 insertions(+), 127 deletions(-) delete mode 100644 nbgitpuller/_compat.py create mode 100644 nbgitpuller/application.py diff --git a/nbgitpuller/__init__.py b/nbgitpuller/__init__.py index 47973f96..48ae2161 100644 --- a/nbgitpuller/__init__.py +++ b/nbgitpuller/__init__.py @@ -1,82 +1,8 @@ -from .version import __version__ # noqa -from .pull import GitPuller # noqa -from jupyter_server.utils import url_path_join -from tornado.web import StaticFileHandler -import os +from .application import NbGitPuller def _jupyter_server_extension_points(): - """ - This function is detected by `notebook` and `jupyter_server` because they - are explicitly configured to inspect the nbgitpuller module for it. That - explicit configuration is passed via setup.py's declared data_files. - - Returns a list of dictionaries with metadata describing where to find the - `_load_jupyter_server_extension` function. - """ return [{ 'module': 'nbgitpuller', + 'app': NbGitPuller }] - - -def _load_jupyter_server_extension(app): - """ - This function is a hook for `notebook` and `jupyter_server` that we use to - register additional endpoints to be handled by nbgitpuller. - - Note that as this function is used as a hook for both notebook and - jupyter_server, the argument passed may be a NotebookApp or a ServerApp. - - Related documentation: - - notebook: https://jupyter-notebook.readthedocs.io/en/stable/extending/handlers.htmland - - notebook: https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Distributing%20Jupyter%20Extensions%20as%20Python%20Packages.html#Example---Server-extension - - jupyter_server: https://jupyter-server.readthedocs.io/en/latest/developers/extensions.html - """ - # identify base handler by app class - # must do this before importing from .handlers - from ._compat import get_base_handler - - get_base_handler(app) - - from .handlers import ( - SyncHandler, - UIHandler, - LegacyInteractRedirectHandler, - LegacyGitSyncRedirectHandler, - ) - - web_app = app.web_app - base_url = url_path_join(web_app.settings['base_url'], 'git-pull') - handlers = [ - (url_path_join(base_url, 'api'), SyncHandler), - (base_url, UIHandler), - (url_path_join(web_app.settings['base_url'], 'git-sync'), LegacyGitSyncRedirectHandler), - (url_path_join(web_app.settings['base_url'], 'interact'), LegacyInteractRedirectHandler), - ( - url_path_join(base_url, 'static', '(.*)'), - StaticFileHandler, - {'path': os.path.join(os.path.dirname(__file__), 'static')} - ) - ] - # FIXME: See note on how to stop relying on settings to pass information: - # https://github.com/jupyterhub/nbgitpuller/pull/242#pullrequestreview-854968180 - # - web_app.settings['nbapp'] = app - web_app.add_handlers('.*', handlers) - - -# For compatibility with both notebook and jupyter_server, we define -# _jupyter_server_extension_paths alongside _jupyter_server_extension_points. -# -# "..._paths" is used by notebook and still supported by jupyter_server as of -# jupyter_server 1.13.3, but was renamed to "..._points" in jupyter_server -# 1.0.0. -# -_jupyter_server_extension_paths = _jupyter_server_extension_points - -# For compatibility with both notebook and jupyter_server, we define both -# load_jupyter_server_extension alongside _load_jupyter_server_extension. -# -# "load..." is used by notebook and "_load..." is used by jupyter_server. -# -load_jupyter_server_extension = _load_jupyter_server_extension diff --git a/nbgitpuller/_compat.py b/nbgitpuller/_compat.py deleted file mode 100644 index 036967c9..00000000 --- a/nbgitpuller/_compat.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Import base Handler classes from Jupyter Server or Notebook - -Must be called before importing .handlers to ensure the correct base classes -""" -import warnings - -_JupyterHandler = None - - -def get_base_handler(app=None): - """Get the base JupyterHandler class to use - - Inferred from app class (either jupyter_server or notebook app) - """ - global _JupyterHandler - if _JupyterHandler is not None: - return _JupyterHandler - if app is None: - warnings.warn( - "Guessing base JupyterHandler class. Specify an app to ensure the right JupyterHandler is used.", - stacklevel=2, - ) - from jupyter_server.base.handlers import JupyterHandler - return JupyterHandler - - top_modules = {cls.__module__.split(".", 1)[0] for cls in app.__class__.mro()} - if "jupyter_server" in top_modules: - from jupyter_server.base.handlers import JupyterHandler - - _JupyterHandler = JupyterHandler - return _JupyterHandler - if "notebook" in top_modules: - from notebook.base.handlers import IPythonHandler - - _JupyterHandler = IPythonHandler - return _JupyterHandler - - warnings.warn(f"Failed to detect base JupyterHandler class for {app}.", stacklevel=2) - from jupyter_server.base.handlers import JupyterHandler - return JupyterHandler diff --git a/nbgitpuller/application.py b/nbgitpuller/application.py new file mode 100644 index 00000000..409cec30 --- /dev/null +++ b/nbgitpuller/application.py @@ -0,0 +1,29 @@ +from .version import __version__ # noqa +from .pull import GitPuller # noqa +from jupyter_server.extension.application import ExtensionApp +import os + + +class NbGitPuller(ExtensionApp): + name = 'git-pull' + load_other_extensions = True + + static_paths = [ + os.path.join(os.path.dirname(__file__), 'static') + ] + + def initialize_handlers(self): + from .handlers import ( + SyncHandler, + UIHandler, + LegacyInteractRedirectHandler, + LegacyGitSyncRedirectHandler, + ) + + # Extend the self.handlers trait + self.handlers.extend([ + (rf'/{self.name}/api', SyncHandler), + (rf'/{self.name}', UIHandler), + (rf'/{self.name}/git-sync', LegacyGitSyncRedirectHandler), + (rf'/{self.name}/interact', LegacyInteractRedirectHandler), + ]) diff --git a/nbgitpuller/handlers.py b/nbgitpuller/handlers.py index d170d1cb..90949379 100644 --- a/nbgitpuller/handlers.py +++ b/nbgitpuller/handlers.py @@ -7,12 +7,11 @@ import os from queue import Queue, Empty import jinja2 +from jupyter_server.base.handlers import JupyterHandler +from jupyter_server.extension.handler import ExtensionHandlerMixin from .pull import GitPuller from .version import __version__ -from ._compat import get_base_handler - -JupyterHandler = get_base_handler() jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader( @@ -20,10 +19,12 @@ ), ) -class SyncHandler(JupyterHandler): +class SyncHandler(ExtensionHandlerMixin, JupyterHandler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.log.info(f'Config {self.config}') + # We use this lock to make sure that only one sync operation # can be happening at a time. Git doesn't like concurrent use! if 'git_lock' not in self.settings: @@ -84,7 +85,7 @@ async def get(self): self.set_header('content-type', 'text/event-stream') self.set_header('cache-control', 'no-cache') - gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp']) + gp = GitPuller(repo, repo_dir, branch, depth=depth, **self.config) q = Queue() diff --git a/nbgitpuller/pull.py b/nbgitpuller/pull.py index f5a7dab1..dd8aae16 100644 --- a/nbgitpuller/pull.py +++ b/nbgitpuller/pull.py @@ -69,18 +69,18 @@ def _depth_default(self): where the GitPuller class hadn't been loaded already.""" return int(os.environ.get('NBGITPULLER_DEPTH', 1)) - def __init__(self, git_url, repo_dir, **kwargs): + def __init__(self, git_url, repo_dir, branch, **kwargs): assert git_url self.git_url = git_url - self.branch_name = kwargs.pop("branch") + self.repo_dir = repo_dir + self.branch_name = branch if self.branch_name is None: self.branch_name = self.resolve_default_branch() elif not self.branch_exists(self.branch_name): raise ValueError(f"Branch: {self.branch_name} -- not found in repo: {self.git_url}") - self.repo_dir = repo_dir newargs = {k: v for k, v in kwargs.items() if v is not None} super(GitPuller, self).__init__(**newargs) @@ -361,7 +361,7 @@ def main(): for line in GitPuller( args.git_url, args.repo_dir, - branch=args.branch_name if args.branch_name else None + args.branch_name if args.branch_name else None ).pull(): print(line) diff --git a/nbgitpuller/templates/status.html b/nbgitpuller/templates/status.html index fd19756e..7439f73f 100644 --- a/nbgitpuller/templates/status.html +++ b/nbgitpuller/templates/status.html @@ -35,7 +35,7 @@ {% block script %} {{super()}} - + {% endblock %} {% block stylesheet %} diff --git a/tests/repohelpers.py b/tests/repohelpers.py index 98bfd110..93781240 100644 --- a/tests/repohelpers.py +++ b/tests/repohelpers.py @@ -8,7 +8,7 @@ from uuid import uuid4 from packaging.version import Version as V -from nbgitpuller import GitPuller +from nbgitpuller.pull import GitPuller class Repository: From 6f71925aac498df31653a1b0aa3fe9e887318edf Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Mon, 20 Jan 2025 13:56:49 +0000 Subject: [PATCH 2/2] autorun support --- README.md | 4 ++++ jupyter_git_pull_config.py | 34 ++++++++++++++++++++++++++++++++++ nbgitpuller/application.py | 29 +++++++++++++++++++++++++++++ nbgitpuller/pull.py | 31 +++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+) create mode 100644 jupyter_git_pull_config.py diff --git a/README.md b/README.md index 7326b4fd..c9fe7119 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ information. pip install nbgitpuller ``` +### Configuration + +Copy `jupyter_git_pull_config.py` to one of your Jupyter configuration paths (as determined from `jupyter --paths`) and edit it to meet your needs. + ## Example This example shows how to use the [nbgitpuller link generator] diff --git a/jupyter_git_pull_config.py b/jupyter_git_pull_config.py new file mode 100644 index 00000000..3719c4db --- /dev/null +++ b/jupyter_git_pull_config.py @@ -0,0 +1,34 @@ +# May be set to a list of URLs described as Python regular expressions (using re.fullmatch()) +# where it is permitted to autorun scripts from the pulled project as a pre-initialisation +# step. +# +# WARNING: Enable this only if you understand and accept the risks of AUTORUN.INF. +# ---- +# c.NbGitPuller.autorun_allow = [ +# r'https://github\.com/org/name\.git', +# r'https://github\.com/org-two/name-two\.git' +# ] +# ---- +# +# To allow all sources (*not* recommended) use: +# ---- +# c.NbGitPuller.autorun_allow = True +# ---- +# +# The default is 'False' which means the autorun functionality is completely disabled +#c.NbGitPuller.autorun_allow = False + +# List of scripts to search for when attempting to autorun. The first match will +# be run with a single argument of 'init' or 'update' depending on what nbgitpuller +# is doing. +# ---- +# c.NbGitPuller.autorun_script = [ +# '.nbgitpuller.script', +# '.different.script' +# ] +# ---- +# +# The 'script' must be executable and when checked out on a 'exec' (ie. not a 'noexec') mountpoint +# +# The default is the empty list. +#c.NbGitPuller.autorun_script = [] diff --git a/nbgitpuller/application.py b/nbgitpuller/application.py index 409cec30..9b878b3c 100644 --- a/nbgitpuller/application.py +++ b/nbgitpuller/application.py @@ -1,6 +1,8 @@ from .version import __version__ # noqa from .pull import GitPuller # noqa from jupyter_server.extension.application import ExtensionApp +from traitlets import Bool, CRegExp, List, Unicode, Union +from traitlets.config import Configurable import os @@ -12,6 +14,33 @@ class NbGitPuller(ExtensionApp): os.path.join(os.path.dirname(__file__), 'static') ] + autorun_allow = Union( + [Bool(), List(CRegExp())], + default_value=False, + config=True, + help=""" + List of URLs described as Python regular expressions (using re.fullmatch()) where + it is permitted to autorun scripts from the pulled project as a pre-initialisation + step. Enable this only if you understand and accept the risks of AUTORUN.INF. + + When set to boolean True, all URLs are allowed, whilst False (default) autorun + is disabled completely. + """ + ) + + autorun_script = List( + Unicode(), + default_value=[], + config=True, + help=""" + List of scripts to search for when attempting to autorun. The first match will + be run with a single argument of 'init' or 'update' depending on what nbgitpuller + is doing. + + Enable this only if you understand and accept the risks of AUTORUN.INF. + """ + ) + def initialize_handlers(self): from .handlers import ( SyncHandler, diff --git a/nbgitpuller/pull.py b/nbgitpuller/pull.py index dd8aae16..3d87ca3a 100644 --- a/nbgitpuller/pull.py +++ b/nbgitpuller/pull.py @@ -1,4 +1,5 @@ import os +import re import subprocess import logging import time @@ -81,6 +82,9 @@ def __init__(self, git_url, repo_dir, branch, **kwargs): elif not self.branch_exists(self.branch_name): raise ValueError(f"Branch: {self.branch_name} -- not found in repo: {self.git_url}") + self.autorun_allow = kwargs.pop('autorun_allow', False) + self.autorun_script = kwargs.pop('autorun_script', []) + newargs = {k: v for k, v in kwargs.items() if v is not None} super(GitPuller, self).__init__(**newargs) @@ -143,6 +147,30 @@ def pull(self): else: yield from self.update() + def autorun(self, operation="method"): + """ + Search for and execute the autorun script. + """ + + if not self.autorun_allow: + return + if not any(( re.fullmatch(pattern, self.git_url) for pattern in self.autorun_allow )): + logging.info('autorun skipped, URL does not match any rules') + return + + script = next(( s for s in self.autorun_script if os.access(os.path.join(self.repo_dir, s), os.X_OK)), None) + if not script: + logging.info('autorun skipped, no matching (executable) script') + return + + try: + for line in execute_cmd([ os.path.join(self.repo_dir, script), operation ], cwd=self.repo_dir, close_fds=True): + yield line + except subprocess.CalledProcessError: + m = f"Problem autorunning {script}" + logging.exception(m) + raise ValueError(m) + def initialize_repo(self): """ Clones repository @@ -154,6 +182,7 @@ def initialize_repo(self): clone_args.extend(['--branch', self.branch_name]) clone_args.extend(["--", self.git_url, self.repo_dir]) yield from execute_cmd(clone_args) + yield from self.autorun('init') logging.info('Repo {} initialized'.format(self.repo_dir)) def reset_deleted_files(self): @@ -343,6 +372,8 @@ def update(self): yield from self.ensure_lock() yield from self.merge() + yield from self.autorun('update') + def main(): """