diff --git a/docs/source/api/jupyter_server.services.kernelspecs.monitors.rst b/docs/source/api/jupyter_server.services.kernelspecs.monitors.rst new file mode 100644 index 0000000000..cfee291d10 --- /dev/null +++ b/docs/source/api/jupyter_server.services.kernelspecs.monitors.rst @@ -0,0 +1,25 @@ +jupyter\_server.services.kernelspecs.monitors package +===================================================== + +Submodules +---------- + + +.. automodule:: jupyter_server.services.kernelspecs.monitors.polling_monitor + :members: + :undoc-members: + :show-inheritance: + + +.. automodule:: jupyter_server.services.kernelspecs.monitors.watchdog_monitor + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: jupyter_server.services.kernelspecs.monitors + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/jupyter_server.services.kernelspecs.rst b/docs/source/api/jupyter_server.services.kernelspecs.rst index 3f210d0f55..7217dd6ef4 100644 --- a/docs/source/api/jupyter_server.services.kernelspecs.rst +++ b/docs/source/api/jupyter_server.services.kernelspecs.rst @@ -1,6 +1,14 @@ jupyter\_server.services.kernelspecs package ============================================ +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + jupyter_server.services.kernelspecs.monitors + Submodules ---------- @@ -10,6 +18,12 @@ Submodules :undoc-members: :show-inheritance: + +.. automodule:: jupyter_server.services.kernelspecs.kernelspec_cache + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index c28807b8e2..7ea18a19b8 100644 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -342,6 +342,10 @@ def terminal_manager(self): def kernel_spec_manager(self): return self.settings["kernel_spec_manager"] + @property + def kernel_spec_cache(self): + return self.settings["kernel_spec_cache"] + @property def config_manager(self): return self.settings["config_manager"] diff --git a/jupyter_server/kernelspecs/handlers.py b/jupyter_server/kernelspecs/handlers.py index 301973223a..cdb089036e 100644 --- a/jupyter_server/kernelspecs/handlers.py +++ b/jupyter_server/kernelspecs/handlers.py @@ -26,7 +26,7 @@ def initialize(self): @authorized async def get(self, kernel_name, path, include_body=True): """Get a kernelspec resource.""" - ksm = self.kernel_spec_manager + ksc = self.kernel_spec_cache if path.lower().endswith(".png"): self.set_header("Cache-Control", f"max-age={60*60*24*30}") ksm = self.kernel_spec_manager @@ -50,7 +50,7 @@ async def get(self, kernel_name, path, include_body=True): ) ) try: - kspec = await ensure_async(ksm.get_kernel_spec(kernel_name)) + kspec = await ensure_async(ksc.get_kernel_spec(kernel_name)) self.root = kspec.resource_dir except KeyError as e: raise web.HTTPError(404, "Kernel spec %s not found" % kernel_name) from e diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 9ecd875d65..0e7d4bad90 100644 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -108,6 +108,7 @@ AsyncMappingKernelManager, MappingKernelManager, ) +from jupyter_server.services.kernelspecs.kernelspec_cache import KernelSpecCache from jupyter_server.services.sessions.sessionmanager import SessionManager from jupyter_server.utils import ( check_pid, @@ -235,6 +236,7 @@ def __init__( authorizer=None, identity_provider=None, kernel_websocket_connection_class=None, + kernel_spec_cache=None, ): """Initialize a server web application.""" if identity_provider is None: @@ -272,6 +274,7 @@ def __init__( authorizer=authorizer, identity_provider=identity_provider, kernel_websocket_connection_class=kernel_websocket_connection_class, + kernel_spec_cache=kernel_spec_cache, ) handlers = self.init_handlers(default_services, settings) @@ -296,6 +299,7 @@ def init_settings( authorizer=None, identity_provider=None, kernel_websocket_connection_class=None, + kernel_spec_cache=None, ): """Initialize settings for the web application.""" _template_path = settings_overrides.get( @@ -373,6 +377,7 @@ def init_settings( "contents_manager": contents_manager, "session_manager": session_manager, "kernel_spec_manager": kernel_spec_manager, + "kernel_spec_cache": kernel_spec_cache, "config_manager": config_manager, "authorizer": authorizer, "identity_provider": identity_provider, @@ -1494,6 +1499,16 @@ def _default_session_manager_class(self): return "jupyter_server.gateway.managers.GatewaySessionManager" return SessionManager + kernel_spec_cache_class = Type( + default_value=KernelSpecCache, + klass=KernelSpecCache, + config=True, + help=""" + The kernel spec cache class to use. Must be a subclass + of `jupyter_server.services.kernelspecs.kernelspec_cache.KernelSpecCache`. + """, + ) + kernel_websocket_connection_class = Type( klass=BaseKernelWebsocketConnection, config=True, @@ -1892,6 +1907,11 @@ def init_configurables(self): kernel_manager=self.kernel_manager, contents_manager=self.contents_manager, ) + self.kernel_spec_cache = self.kernel_spec_cache_class( + parent=self, + log=self.log, + kernel_spec_manager=self.kernel_spec_manager, + ) self.config_manager = self.config_manager_class( parent=self, log=self.log, @@ -2053,6 +2073,7 @@ def init_webapp(self): authorizer=self.authorizer, identity_provider=self.identity_provider, kernel_websocket_connection_class=self.kernel_websocket_connection_class, + kernel_spec_cache=self.kernel_spec_cache, ) if self.certfile: self.ssl_options["certfile"] = self.certfile diff --git a/jupyter_server/services/kernelspecs/handlers.py b/jupyter_server/services/kernelspecs/handlers.py index e1ed186fa7..2a01846b55 100644 --- a/jupyter_server/services/kernelspecs/handlers.py +++ b/jupyter_server/services/kernelspecs/handlers.py @@ -62,12 +62,12 @@ class MainKernelSpecHandler(KernelSpecsAPIHandler): @authorized async def get(self): """Get the list of kernel specs.""" - ksm = self.kernel_spec_manager + ksc = self.kernel_spec_cache km = self.kernel_manager model = {} model["default"] = km.default_kernel_name model["kernelspecs"] = specs = {} - kspecs = await ensure_async(ksm.get_all_specs()) + kspecs = await ensure_async(ksc.get_all_specs()) for kernel_name, kernel_info in kspecs.items(): try: if is_kernelspec_model(kernel_info): @@ -94,10 +94,10 @@ class KernelSpecHandler(KernelSpecsAPIHandler): @authorized async def get(self, kernel_name): """Get a kernel spec model.""" - ksm = self.kernel_spec_manager + ksc = self.kernel_spec_cache kernel_name = url_unescape(kernel_name) try: - spec = await ensure_async(ksm.get_kernel_spec(kernel_name)) + spec = await ensure_async(ksc.get_kernel_spec(kernel_name)) except KeyError as e: raise web.HTTPError(404, "Kernel spec %s not found" % kernel_name) from e if is_kernelspec_model(spec): diff --git a/jupyter_server/services/kernelspecs/kernelspec_cache.py b/jupyter_server/services/kernelspecs/kernelspec_cache.py new file mode 100644 index 0000000000..754ce9c3ab --- /dev/null +++ b/jupyter_server/services/kernelspecs/kernelspec_cache.py @@ -0,0 +1,259 @@ +"""Cache handling for kernel specs.""" +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + + +import os +import sys +from abc import ABC, ABCMeta, abstractmethod +from typing import Any, Dict, Optional, Union + +# See compatibility note on `group` keyword in https://docs.python.org/3/library/importlib.metadata.html#entry-points +if sys.version_info < (3, 10): # pragma: no cover + from importlib_metadata import EntryPoint, entry_points +else: # pragma: no cover + from importlib.metadata import EntryPoint, entry_points + +from jupyter_client.kernelspec import KernelSpec, KernelSpecManager +from traitlets.config import LoggingConfigurable +from traitlets.traitlets import CBool, Instance, Unicode, default + +from jupyter_server.utils import ensure_async + +# Simplify the typing. Cache items are essentially dictionaries of strings +# to either strings or dictionaries. The items themselves are indexed by +# the kernel_name (case-insensitive). +CacheItemType = Dict[str, Union[str, Dict]] + + +class KernelSpecCache(LoggingConfigurable): + """The primary (singleton) instance for managing KernelSpecs. + + This class contains the configured KernelSpecManager instance upon + which it uses to populate the cache (when enabled) or as a pass-thru + (when disabled). + + Note that the KernelSpecManager returns different formats from methods + get_all_specs() and get_kernel_spec(). The format in which cache entries + are stored is that of the get_all_specs() results. As a result, some + conversion between formats is necessary, depending on which method is called. + """ + + cache_enabled_env = "JUPYTER_KERNELSPEC_CACHE_ENABLED" + cache_enabled = CBool( + config=True, + help="""Enable Kernel Specification caching. (JUPYTER_KERNELSPEC_CACHE_ENABLED env var)""", + ) + + @default("cache_enabled") + def _cache_enabled_default(self): + return os.getenv(self.cache_enabled_env, "false").lower() in ("true", "1") + + kernel_spec_manager = Instance("jupyter_client.kernelspec.KernelSpecManager") + + monitor_name_env = "JUPYTER_KERNELSPEC_MONITOR_NAME" + monitor_name = Unicode( + help="""The name of the entry_point used to monitor changes to kernelspecs. +(JUPYTER_KERNELSPEC_MONITOR_NAME env var)""", + ).tag(config=True) + + @default("monitor_name") + def _monitor_name_default(self): + return os.getenv(self.monitor_name_env, "polling-monitor") + + # The kernelspec cache consists of a dictionary mapping the kernel name to the actual + # kernelspec data (CacheItemType). + cache_items: Dict[str, CacheItemType] + cache_misses: int + + def __init__(self, kernel_spec_manager: KernelSpecManager, **kwargs: Any) -> None: + """Initialize the cache.""" + super().__init__(**kwargs) + self.kernel_spec_manager = kernel_spec_manager + self.kernel_spec_monitor = None + # initialize cache vars + self.cache_items = {} + self.cache_misses = 0 + if self.cache_enabled: + # Remove configurable traits that have no bearing on monitors + kwargs.pop("cache_enabled", None) + kwargs.pop("monitor_name", None) + self.kernel_spec_monitor = KernelSpecMonitorBase.create_instance(self, **kwargs) + self.kernel_spec_monitor.initialize() + + def __del__(self): + if self.kernel_spec_monitor is not None: + self.kernel_spec_monitor.destroy() + self.kernel_spec_monitor = None + + async def get_kernel_spec(self, kernel_name: str) -> Optional[KernelSpec]: + """Get the named kernel specification. + + This method is equivalent to calling KernelSpecManager.get_kernel_spec(). If + caching is enabled, it will pull the item from the cache. If no item is + returned (as will be the case if caching is disabled) it will defer to the + currently configured KernelSpecManager. If an item is returned (and caching + is enabled), it will be added to the cache. + """ + kernelspec = self.get_item(kernel_name) + if not kernelspec: + kernelspec = await ensure_async(self.kernel_spec_manager.get_kernel_spec(kernel_name)) + if kernelspec: + self.put_item(kernel_name, kernelspec) + return kernelspec + + async def get_all_specs(self) -> Dict[str, CacheItemType]: + """Get all available kernel specifications. + + This method is equivalent to calling KernelSpecManager.get_all_specs(). If + caching is enabled, it will pull all items from the cache. If no items are + returned (as will be the case if caching is disabled) it will defer to the + currently configured KernelSpecManager. If items are returned (and caching + is enabled), they will be added to the cache. + + Note that the return type of this method is not a dictionary or list of + KernelSpec instances, but rather a dictionary of kernel-name to kernel-info + dictionaries are returned - as is the case with the respective return values + of the KernelSpecManager methods. + """ + kernelspecs = self.get_all_items() + if not kernelspecs: + kernelspecs = await ensure_async(self.kernel_spec_manager.get_all_specs()) + if kernelspecs: + self.put_all_items(kernelspecs) + return kernelspecs + + # Cache-related methods + def get_item(self, kernel_name: str) -> Optional[KernelSpec]: + """Retrieves a named kernel specification from the cache. + + If cache is disabled or the item is not in the cache, None is returned; + otherwise, a KernelSpec instance of the item is returned. + """ + kernelspec: Optional[KernelSpec] = None + if self.cache_enabled: + cache_item = self.cache_items.get(kernel_name.lower()) + if cache_item: # Convert to KernelSpec + # In certain conditions, like when the kernelspec is fetched prior to its removal from the cache, + # we can encounter a FileNotFoundError. In those cases, treat as a cache miss as well. + try: + kernelspec = KernelSpecCache.cache_item_to_kernel_spec(cache_item) + except FileNotFoundError: + pass + if not kernelspec: + self.cache_misses += 1 + self.log.debug( + "Cache miss ({misses}) for kernelspec: {kernel_name}".format( + misses=self.cache_misses, kernel_name=kernel_name + ) + ) + return kernelspec + + def get_all_items(self) -> Dict: # type is Dict[str, CacheItemType] + """Retrieves all kernel specification from the cache. + + If cache is disabled or no items are in the cache, an empty dictionary is returned; + otherwise, a dictionary of kernel-name to specifications (kernel infos) are returned. + """ + items = {} + if self.cache_enabled: + for kernel_name in self.cache_items: + cache_item = self.cache_items.get(kernel_name) + items[kernel_name] = cache_item + if not items: + self.cache_misses += 1 + return items + + def put_item(self, kernel_name: str, cache_item: Union[KernelSpec, CacheItemType]) -> None: + """Adds or updates a kernel specification in the cache. + + This method can take either a KernelSpec (if called directly from the `get_kernel_spec()` + method, or a CacheItemItem (if called from a cache-related method) as that is the type + in which the cache items are stored. + """ + if self.cache_enabled: + self.log.info(f"KernelSpecCache: adding/updating kernelspec: {kernel_name}") + if type(cache_item) is KernelSpec: + cache_item = KernelSpecCache.kernel_spec_to_cache_item(cache_item) + self.cache_items[kernel_name.lower()] = cache_item # type: ignore + + def put_all_items(self, kernelspecs: Dict[str, Union[KernelSpec, CacheItemType]]) -> None: + """Adds or updates a dictionary of kernel specification in the cache.""" + for kernel_name, cache_item in kernelspecs.items(): + self.put_item(kernel_name, cache_item) + + def remove_item(self, kernel_name: str) -> Optional[CacheItemType]: + """Removes the cache item corresponding to kernel_name from the cache.""" + cache_item = None + if self.cache_enabled and kernel_name.lower() in self.cache_items: + cache_item = self.cache_items.pop(kernel_name.lower()) + self.log.info(f"KernelSpecCache: removed kernelspec: {kernel_name}") + return cache_item + + def remove_all_items(self) -> None: + """Removes all items from the cache.""" + if self.cache_enabled: + self.cache_items.clear() + self.log.info("KernelSpecCache: all items removed from cache") + + @staticmethod + def kernel_spec_to_cache_item(kernelspec: KernelSpec) -> CacheItemType: + """Converts a KernelSpec instance to a CacheItemType for storage into the cache.""" + cache_item = {"spec": kernelspec.to_dict(), "resource_dir": kernelspec.resource_dir} + return cache_item + + @staticmethod + def cache_item_to_kernel_spec(cache_item: CacheItemType) -> KernelSpec: + """Converts a CacheItemType to a KernelSpec instance for user consumption.""" + assert type(cache_item["spec"]) is dict + kernel_spec = KernelSpec(resource_dir=cache_item["resource_dir"], **cache_item["spec"]) + return kernel_spec + + +class KernelSpecMonitorMeta(ABCMeta, type(LoggingConfigurable)): # type: ignore + pass + + +class KernelSpecMonitorBase( # type:ignore[misc] + ABC, LoggingConfigurable, metaclass=KernelSpecMonitorMeta +): + GROUP_NAME = "jupyter_server.kernelspec_monitors" + + @classmethod + def create_instance( + cls, kernel_spec_cache: KernelSpecCache, **kwargs: Any + ) -> "KernelSpecMonitorBase": + """Creates an instance of the monitor class configured on the KernelSpecCache instance.""" + + kernel_spec_cache = kernel_spec_cache + entry_point_name = kernel_spec_cache.monitor_name + eps = entry_points(group=KernelSpecMonitorBase.GROUP_NAME, name=entry_point_name) + if eps: + ep: EntryPoint = eps[entry_point_name] + monitor_class = ep.load() + monitor_instance: KernelSpecMonitorBase = monitor_class(kernel_spec_cache, **kwargs) + if not isinstance(monitor_instance, KernelSpecMonitorBase): + msg = ( + f"Entrypoint '{kernel_spec_cache.monitor_name}' of " + f"group '{KernelSpecMonitorBase.GROUP_NAME}' is not a " + f"subclass of '{KernelSpecMonitorBase.__name__}'" + ) + raise RuntimeError(msg) + else: + msg = ( + f"Entrypoint '{kernel_spec_cache.monitor_name}' of " + f"group '{KernelSpecMonitorBase.GROUP_NAME}' cannot be located." + ) + raise RuntimeError(msg) + + return monitor_instance + + @abstractmethod + def initialize(self) -> None: + """Initializes the monitor.""" + pass + + @abstractmethod + def destroy(self) -> None: + """Destroys the monitor.""" + pass diff --git a/jupyter_server/services/kernelspecs/monitors/__init__.py b/jupyter_server/services/kernelspecs/monitors/__init__.py new file mode 100644 index 0000000000..54ae7aaafe --- /dev/null +++ b/jupyter_server/services/kernelspecs/monitors/__init__.py @@ -0,0 +1,2 @@ +from .watchdog_monitor import KernelSpecWatchdogMonitor # noqa +from .polling_monitor import KernelSpecPollingMonitor # noqa diff --git a/jupyter_server/services/kernelspecs/monitors/polling_monitor.py b/jupyter_server/services/kernelspecs/monitors/polling_monitor.py new file mode 100644 index 0000000000..ab4bf896f2 --- /dev/null +++ b/jupyter_server/services/kernelspecs/monitors/polling_monitor.py @@ -0,0 +1,94 @@ +"""KernelSpec watchdog monitor used by KernelspecCache.""" +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import json +import os +from hashlib import md5 +from typing import Any, Dict + +from overrides import overrides +from traitlets.traitlets import Float, default + +from ..kernelspec_cache import KernelSpecCache, KernelSpecMonitorBase + + +class KernelSpecPollingMonitor(KernelSpecMonitorBase): # type:ignore[misc] + """Polling monitor that uses a periodic poll period to reload the kernelspec cache.""" + + interval_env = "JUPYTER_POLLING_MONITOR_INTERVAL" + interval = Float( + config=True, + help="""The interval (in seconds) at which kernelspecs are updated in the cache. +(JUPYTER_POLLING_MONITOR_INTERVAL env var)""", + ) + + @default("interval") + def _interval_default(self): + return float(os.getenv(self.interval_env, "30.0")) + + _pcallback = None + + # Keep track of hash values for each entry placed into the cache. This will lessen + # the churn and noise when publishing events + hash_values: Dict[str, str] + + def __init__(self, kernel_spec_cache: KernelSpecCache, **kwargs: Any): + """Initialize the handler.""" + super().__init__(**kwargs) + self.kernel_spec_cache: KernelSpecCache = kernel_spec_cache + self.kernel_spec_manager = self.kernel_spec_cache.kernel_spec_manager + self.hash_values = {} + self.log.info(f"Starting {self.__class__.__name__} with interval: {self.interval} ...") + + @overrides + def initialize(self) -> None: + """Initializes the cache and starts the periodic poller.""" + if self.kernel_spec_cache.cache_enabled: + self.poll() + self.start() + + @overrides + def destroy(self) -> None: + self.stop() + + def poll(self): + diff_kernelspecs = {} + kernelspecs = self.kernel_spec_manager.get_all_specs() + for kernel_name, entry in kernelspecs.items(): + hash_val = md5(json.dumps(entry).encode("utf-8")).hexdigest() + cached_hash_val = self.hash_values.get(kernel_name, "") + if hash_val != cached_hash_val: + diff_kernelspecs[kernel_name] = entry + self.hash_values[kernel_name] = hash_val + + self.log.debug( + f"{self.__class__.__name__} num fetched: {len(kernelspecs.keys())}, " + f"num cached: {len(diff_kernelspecs.keys())}" + ) + self.kernel_spec_cache.put_all_items(diff_kernelspecs) + + # Determine items to remove by calculating what kernelspec names are in the previous + # set and not in the current set + current_set: set = set(kernelspecs.keys()) + previous_set: set = set(self.hash_values.keys()) + to_be_removed = previous_set.difference(current_set) + for kernel_name in to_be_removed: + self.hash_values.pop(kernel_name, None) + self.kernel_spec_cache.remove_item(kernel_name) + + def start(self): + """Start the polling of the kernel.""" + if self._pcallback is None: + from tornado.ioloop import PeriodicCallback + + self._pcallback = PeriodicCallback( + self.poll, + 1000 * self.interval, + ) + self._pcallback.start() + + def stop(self): + """Stop the kernel polling.""" + if self._pcallback is not None: + self._pcallback.stop() + self._pcallback = None diff --git a/jupyter_server/services/kernelspecs/monitors/watchdog_monitor.py b/jupyter_server/services/kernelspecs/monitors/watchdog_monitor.py new file mode 100644 index 0000000000..108868aadb --- /dev/null +++ b/jupyter_server/services/kernelspecs/monitors/watchdog_monitor.py @@ -0,0 +1,131 @@ +"""KernelSpec watchdog monitor used by KernelspecCache.""" +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import os +from typing import Any, Set, Tuple + +from overrides import overrides +from watchdog.events import FileSystemEventHandler +from watchdog.observers import Observer + +from ..kernelspec_cache import KernelSpecCache, KernelSpecMonitorBase + + +class KernelSpecWatchdogMonitor(KernelSpecMonitorBase): # type:ignore[misc] + """Watchdog handler that filters on specific files deemed representative of a kernel specification.""" + + def __init__(self, kernel_spec_cache: KernelSpecCache, **kwargs: Any): + """Initialize the handler.""" + super().__init__(**kwargs) + self.kernel_spec_cache: KernelSpecCache = kernel_spec_cache + self.kernel_spec_manager = self.kernel_spec_cache.kernel_spec_manager + self.observed_dirs: Set[str] = set() # Tracks which directories are being watched + self.observer: Any = None + + @overrides + def initialize(self) -> None: + """Initializes the cache and starts the observer.""" + + if self.kernel_spec_cache.cache_enabled: + self.observer = Observer() + kernelspecs = self.kernel_spec_manager.get_all_specs() + self.kernel_spec_cache.put_all_items(kernelspecs) + # Following adds, see if any of the manager's kernel dirs are not observed and add them + for kernel_dir in self.kernel_spec_manager.kernel_dirs: + if kernel_dir not in self.observed_dirs: + if os.path.exists(kernel_dir): + self.log.info( + "KernelSpecCache: observing directory: {kernel_dir}".format( + kernel_dir=kernel_dir + ) + ) + self.observed_dirs.add(kernel_dir) + self.observer.schedule(WatchDogHandler(self), kernel_dir, recursive=True) + else: + self.log.warning( + "KernelSpecCache: kernel_dir '{kernel_dir}' does not exist" + " and will not be observed.".format(kernel_dir=kernel_dir) + ) + self.observer.start() + + @overrides + def destroy(self) -> None: + self.observer = None + + +class WatchDogHandler(FileSystemEventHandler): + # Events related to these files trigger the management of the KernelSpec cache. Should we find + # other files qualify as indicators of a kernel specification's state (like perhaps detached parameter + # files in the future) should be added to this list - at which time it should become configurable. + watched_files = ["kernel.json"] + + def __init__(self, monitor: "KernelSpecWatchdogMonitor", **kwargs: Any): + """Initialize the handler.""" + super().__init__(**kwargs) + self.kernel_spec_cache = monitor.kernel_spec_cache + self.log = monitor.kernel_spec_cache.log + + def dispatch(self, event): + """Dispatches events pertaining to kernelspecs to the appropriate methods. + + The primary purpose of this method is to ensure the action is occurring against + a file in the list of watched files and adds some additional attributes to + the event instance to make the actual event handling method easier. + """ + + if os.path.basename(event.src_path) in self.watched_files: + super().dispatch(event) + + def on_created(self, event): + """Fires when a watched file is created. + + This will trigger a call to the configured KernelSpecManager to fetch the instance + associated with the created file, which is then added to the cache. + """ + resource_dir, kernel_name = WatchDogHandler._extract_info(event.src_path) + try: + kernelspec = self.kernel_spec_cache.kernel_spec_manager.get_kernel_spec(kernel_name) + self.kernel_spec_cache.put_item(kernel_name, kernelspec) + except Exception as e: + self.log.warning( + f"The following exception occurred creating cache entry for: {resource_dir} - continuing... ({e})" + ) + + def on_deleted(self, event): + """Fires when a watched file is deleted, triggering a removal of the corresponding item from the cache.""" + _, kernel_name = WatchDogHandler._extract_info(event.src_path) + self.kernel_spec_cache.remove_item(kernel_name) + + def on_modified(self, event): + """Fires when a watched file is modified. + + This will trigger a call to the configured KernelSpecManager to fetch the instance + associated with the modified file, which is then replaced in the cache. + """ + resource_dir, kernel_name = WatchDogHandler._extract_info(event.src_path) + try: + kernelspec = self.kernel_spec_cache.kernel_spec_manager.get_kernel_spec(kernel_name) + self.kernel_spec_cache.put_item(kernel_name, kernelspec) + except Exception as e: + self.log.warning( + f"The following exception occurred updating cache entry for: {resource_dir} - continuing... ({e})" + ) + + def on_moved(self, event): + """Fires when a watched file is moved. + + This will trigger the update of the existing cached item, replacing its resource_dir entry + with that of the new destination. + """ + _, src_kernel_name = WatchDogHandler._extract_info(event.src_path) + dest_resource_dir, dest_kernel_name = WatchDogHandler._extract_info(event.dest_path) + cache_item = self.kernel_spec_cache.remove_item(src_kernel_name) + if cache_item is not None: + cache_item["resource_dir"] = dest_resource_dir + self.kernel_spec_cache.put_item(dest_kernel_name, cache_item) + + @staticmethod + def _extract_info(dir_name: str) -> Tuple[str, str]: + """Extracts the resource directory and kernel_name from the given dir_name.""" + resource_dir: str = os.path.dirname(dir_name) # includes kernel_name + return resource_dir, os.path.basename(resource_dir) diff --git a/pyproject.toml b/pyproject.toml index c495d7acc4..02af86c854 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,8 @@ dependencies = [ "traitlets>=5.6.0", "websocket-client", "jupyter_events>=0.6.0", - "overrides" + "overrides", + "importlib_metadata>=4.8.3;python_version<\"3.10\"", ] [project.urls] @@ -63,7 +64,9 @@ test = [ "pytest-jupyter[server]>=0.4", "pytest>=7.0", "requests", - "pre-commit" + "pre-commit", + # needed to test kernelspec monitors + "watchdog" ] docs = [ "ipykernel", @@ -83,12 +86,21 @@ docs = [ "tornado", # workaround for an unknown downstream library that is now # missing typing_extensions - "typing_extensions" + "typing_extensions", + # required to build api docs + "watchdog" +] +watchdog-monitor = [ + "watchdog" ] [project.scripts] jupyter-server = "jupyter_server.serverapp:main" +[project.entry-points."jupyter_server.kernelspec_monitors"] +polling-monitor = "jupyter_server.services.kernelspecs.monitors:KernelSpecPollingMonitor" +watchdog-monitor = "jupyter_server.services.kernelspecs.monitors:KernelSpecWatchdogMonitor" + [tool.hatch.envs.docs] features = ["docs"] [tool.hatch.envs.docs.scripts] @@ -256,6 +268,8 @@ unfixable = [ # S603 `subprocess` call: check for execution of untrusted input "jupyter_server/services/contents/filemanager.py" = ["S603", "S607"] "tests/unix_sockets/test_serverapp_integration.py" = ["S603", "S607"] +# S324 Probable use of insecure hash functions in `hashlib`: `md5` (ok - used as checksum for kernelspec cache) +"jupyter_server/services/kernelspecs/monitors/polling_monitor.py" = ["S324"] [tool.pytest.ini_options] addopts = "-raXs --durations 10 --color=yes --doctest-modules" diff --git a/tests/services/kernelspecs/test_kernelspec_cache.py b/tests/services/kernelspecs/test_kernelspec_cache.py new file mode 100644 index 0000000000..5f27d357a9 --- /dev/null +++ b/tests/services/kernelspecs/test_kernelspec_cache.py @@ -0,0 +1,164 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +"""Tests for KernelSpecCache.""" + +import asyncio +import json +import shutil +import sys +from pathlib import Path + +import pytest +from jupyter_client.kernelspec import NoSuchKernel +from traitlets.config import Config + +from jupyter_server.services.kernelspecs.kernelspec_cache import KernelSpecCache + +kernelspec_json = { + "argv": ["cat", "{connection_file}"], + "display_name": "Test kernel: {kernel_name}", +} + + +def _install_kernelspec(kernels_dir: Path, kernel_name: str) -> Path: + """install a sample kernel in a kernels directory""" + kernelspec_dir = kernels_dir / kernel_name + kernelspec_dir.mkdir(parents=True) + json_file = Path(kernelspec_dir) / "kernel.json" + named_json = kernelspec_json.copy() + named_json["display_name"] = str(named_json["display_name"]).format(kernel_name=kernel_name) + with open(str(json_file), "w") as f: + json.dump(named_json, f) + return kernelspec_dir + + +def _modify_kernelspec(kernelspec_dir: str, kernel_name: str) -> None: + json_file = Path(kernelspec_dir) / "kernel.json" + kernel_json = kernelspec_json.copy() + kernel_json["display_name"] = f"{kernel_name} modified!" + with open(str(json_file), "w") as f: + json.dump(kernel_json, f) + + +@pytest.fixture +def other_kernelspec_location(jp_env_jupyter_path): + other_location = Path(jp_env_jupyter_path) / "kernels" + other_location.mkdir() + return other_location + + +@pytest.fixture +def setup_kernelspecs(jp_environ, jp_kernel_dir): + # Only populate factory info + _install_kernelspec(jp_kernel_dir, "test1") + _install_kernelspec(jp_kernel_dir, "test2") + _install_kernelspec(jp_kernel_dir, "test3") + + +MONITORS = ["watchdog-monitor", "polling-monitor"] + + +@pytest.fixture(params=MONITORS) +def kernel_spec_cache( + jp_environ, setup_kernelspecs, request, is_enabled, jp_configurable_serverapp +): + config = Config( + { + "ServerApp": { + "KernelSpecManager": { + "ensure_native_kernel": False, + }, + "KernelSpecCache": { + "cache_enabled": is_enabled, + "monitor_name": request.param, + }, + } + } + ) + # Increase polling frequency to avoid long test delays + if request.param == "polling-monitor" and is_enabled: + config["ServerApp"]["KernelSpecPollingMonitor"]["interval"] = 1.0 + + app = jp_configurable_serverapp(config=config) + yield app.kernel_spec_cache + app.kernel_spec_cache = None + app.clear_instance() + + +def get_delay_factor(kernel_spec_cache: KernelSpecCache) -> float: + if kernel_spec_cache.cache_enabled: + if kernel_spec_cache.monitor_name == "polling-monitor": + return 2.0 + elif kernel_spec_cache.monitor_name == "watchdog-monitor": + # watchdog on Windows appears to be a bit slower + return 2.0 if sys.platform.startswith("win") else 1.0 + return 0.5 + + +@pytest.fixture(params=[False, True]) # Add types as needed +def is_enabled(request): + return request.param + + +async def test_get_all_specs(kernel_spec_cache): + kspecs = await kernel_spec_cache.get_all_specs() + assert len(kspecs) == 4 # The 3 we create, plus the echo kernel that jupyter_core adds + + +async def test_get_named_spec(kernel_spec_cache): + kspec = await kernel_spec_cache.get_kernel_spec("test2") + assert kspec.display_name == "Test kernel: test2" + + +async def test_get_modified_spec(kernel_spec_cache): + kspec = await kernel_spec_cache.get_kernel_spec("test2") + assert kspec.display_name == "Test kernel: test2" + + # Modify entry + _modify_kernelspec(kspec.resource_dir, "test2") + await asyncio.sleep(get_delay_factor(kernel_spec_cache)) # sleep to allow cache to update item + kspec = await kernel_spec_cache.get_kernel_spec("test2") + assert kspec.display_name == "test2 modified!" + + +async def test_add_spec(kernel_spec_cache, jp_kernel_dir, other_kernelspec_location): + with pytest.raises(NoSuchKernel): + await kernel_spec_cache.get_kernel_spec("added") # this will increment cache_miss + + _install_kernelspec(other_kernelspec_location, "added") + # this will increment cache_miss prior to load + kspec = await kernel_spec_cache.get_kernel_spec("added") + + assert kspec.display_name == "Test kernel: added" + # Cache misses should be 2, one for prior to adding the spec, the other after discovering its addition + assert kernel_spec_cache.cache_misses == (2 if kernel_spec_cache.cache_enabled else 0) + + # Add another to an existing observed directory, no cache miss here + _install_kernelspec(jp_kernel_dir, "added2") + await asyncio.sleep( + get_delay_factor(kernel_spec_cache) + ) # sleep to allow cache to add item (no cache miss in this case) + kspec = await kernel_spec_cache.get_kernel_spec("added2") + + assert kspec.display_name == "Test kernel: added2" + assert kernel_spec_cache.cache_misses == (2 if kernel_spec_cache.cache_enabled else 0) + + +async def test_remove_spec(kernel_spec_cache): + kspec = await kernel_spec_cache.get_kernel_spec("test2") + assert kspec.display_name == "Test kernel: test2" + + assert kernel_spec_cache.cache_misses == 0 + shutil.rmtree(kspec.resource_dir) + await asyncio.sleep(get_delay_factor(kernel_spec_cache)) # sleep to allow cache to remove item + with pytest.raises(NoSuchKernel): + await kernel_spec_cache.get_kernel_spec("test2") + + assert kernel_spec_cache.cache_misses == (1 if kernel_spec_cache.cache_enabled else 0) + + +async def test_get_missing(kernel_spec_cache): + with pytest.raises(NoSuchKernel): + await kernel_spec_cache.get_kernel_spec("missing") + + assert kernel_spec_cache.cache_misses == (1 if kernel_spec_cache.cache_enabled else 0)