From a8d86040e0b54fb0b5f29e8a6235b22f898e3ab4 Mon Sep 17 00:00:00 2001
From: Ben Knoll <benknoll@umn.edu>
Date: Wed, 19 Apr 2023 14:22:31 -0400
Subject: [PATCH] release/v3.2.1 (#174)

* Small changes, documentation updates

* Fixed print_all_processors_metadata importing modules at cli initialization
---
 .github/workflows/ci.yml                      |  4 +-
 .../biomedicus_client/{pipeline => }/_run.py  |  4 +-
 .../src/biomedicus_client/cli.py              | 15 +++++---
 .../{pipeline => }/default_pipeline.py        | 38 +++++++++----------
 .../pipeline_confs/__init__.py                | 21 ++++++++++
 .../biomedicus_default_pipeline.yml           |  0
 .../rtf_to_text_pipeline.yml                  |  0
 .../scaleout_pipeline.yml}                    |  0
 .../{pipeline => }/rtf_to_text.py             | 25 ++++++------
 .../{pipeline => }/sources.py                 | 10 ++++-
 pyproject.toml                                |  6 ++-
 python/biomedicus/cli.py                      | 12 ++++--
 .../biomedicus/deployment/confs}/__init__.py  | 11 ++++--
 .../biomedicus_deploy.yml}                    |  0
 .../rtf_to_text_deploy.yml}                   |  0
 .../scaleout_deploy.yml}                      |  0
 .../deployment/default_deployment.py          | 28 +++++++-------
 python/biomedicus/deployment/rtf_to_text.py   | 29 +++++++-------
 .../biomedicus/examples/tutorial/__init__.py  |  0
 .../examples/tutorial/medications.py          | 20 ++++++++++
 .../examples/{ => tutorial}/sql_pipeline.py   |  5 ++-
 .../{ => tutorial}/sql_pipeline_rtf.py        | 30 ++++++++-------
 .../{ => tutorial}/sql_pipeline_rtf_only.py   | 32 +++++++++-------
 .../print_all_processors_metadata.py          |  6 +--
 python/tests/scaleout/test_scaleout.py        |  4 +-
 tools/docker/Dockerfile                       |  2 +-
 26 files changed, 191 insertions(+), 111 deletions(-)
 rename biomedicus_client/src/biomedicus_client/{pipeline => }/_run.py (95%)
 rename biomedicus_client/src/biomedicus_client/{pipeline => }/default_pipeline.py (76%)
 create mode 100644 biomedicus_client/src/biomedicus_client/pipeline_confs/__init__.py
 rename biomedicus_client/src/biomedicus_client/{pipeline => pipeline_confs}/biomedicus_default_pipeline.yml (100%)
 rename biomedicus_client/src/biomedicus_client/{pipeline => pipeline_confs}/rtf_to_text_pipeline.yml (100%)
 rename biomedicus_client/src/biomedicus_client/{pipeline/scaleout_pipeline_config.yml => pipeline_confs/scaleout_pipeline.yml} (100%)
 rename biomedicus_client/src/biomedicus_client/{pipeline => }/rtf_to_text.py (83%)
 rename biomedicus_client/src/biomedicus_client/{pipeline => }/sources.py (95%)
 rename {biomedicus_client/src/biomedicus_client/pipeline => python/biomedicus/deployment/confs}/__init__.py (56%)
 rename python/biomedicus/deployment/{biomedicus_deploy_config.yml => confs/biomedicus_deploy.yml} (100%)
 rename python/biomedicus/deployment/{rtf_to_text_deploy_config.yml => confs/rtf_to_text_deploy.yml} (100%)
 rename python/biomedicus/deployment/{scaleout_deploy_config.yml => confs/scaleout_deploy.yml} (100%)
 create mode 100644 python/biomedicus/examples/tutorial/__init__.py
 create mode 100644 python/biomedicus/examples/tutorial/medications.py
 rename python/biomedicus/examples/{ => tutorial}/sql_pipeline.py (96%)
 rename python/biomedicus/examples/{ => tutorial}/sql_pipeline_rtf.py (66%)
 rename python/biomedicus/examples/{ => tutorial}/sql_pipeline_rtf_only.py (66%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 456a49a4..26d7fde5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,14 +44,14 @@ jobs:
         pip install flake8 pytest
         pip install git+https://github.com/nlpie/mtap@main#egg=mtap
         pip install ./biomedicus_client
-        pip install .[test] --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install .[test,stanza] --extra-index-url https://download.pytorch.org/whl/cpu
     - name: Install dependencies (release)
       if: ${{ startsWith(github.head_ref, 'release') && success() }}
       run: |
         python -m pip install --upgrade pip setuptools wheel
         pip install flake8 pytest
         SETUPTOOLS_SCM_PRETEND_VERSION=${GITHUB_HEAD_REF##*/} pip install ./biomedicus_client
-        pip install .[test] --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install .[test,stanza] --extra-index-url https://download.pytorch.org/whl/cpu
     - name: Lint with flake8
       run: |
         pip install flake8
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/_run.py b/biomedicus_client/src/biomedicus_client/_run.py
similarity index 95%
rename from biomedicus_client/src/biomedicus_client/pipeline/_run.py
rename to biomedicus_client/src/biomedicus_client/_run.py
index 2eaf7294..dc4dc842 100644
--- a/biomedicus_client/src/biomedicus_client/pipeline/_run.py
+++ b/biomedicus_client/src/biomedicus_client/_run.py
@@ -16,12 +16,12 @@
 from argparse import ArgumentParser
 from typing import List
 
+from biomedicus_client.sources import WatcherSource, rtf_source, RtfHandler, TxtHandler
 from mtap import events_client
 from mtap.pipeline import FilesInDirectoryProcessingSource
 
+from biomedicus_client import default_pipeline
 from biomedicus_client.cli_tools import Command
-from biomedicus_client.pipeline import default_pipeline
-from biomedicus_client.pipeline.sources import WatcherSource, RtfHandler, rtf_source, TxtHandler
 
 
 class RunCommand(Command):
diff --git a/biomedicus_client/src/biomedicus_client/cli.py b/biomedicus_client/src/biomedicus_client/cli.py
index 890b2051..3168bdb9 100644
--- a/biomedicus_client/src/biomedicus_client/cli.py
+++ b/biomedicus_client/src/biomedicus_client/cli.py
@@ -15,13 +15,18 @@
 
 import logging
 
+from biomedicus_client import pipeline_confs
+from biomedicus_client._run import RunCommand
 from biomedicus_client.cli_tools import create_parser, WriteConfigsCommand
-from biomedicus_client.pipeline import RunCommand, default_pipeline, rtf_to_text
+from biomedicus_client.rtf_to_text import RunRtfToTextCommand
+
+__all__ = ('main',)
+
 
 CLIENT_CONFIGS = {
-    'pipeline': default_pipeline.default_pipeline_config,
-    'scaleout_pipeline': default_pipeline.scaleout_pipeline_config,
-    'rtf_only_pipeline': rtf_to_text.default_rtf_to_text_pipeline_config
+    'pipeline': pipeline_confs.DEFAULT,
+    'scaleout_pipeline': pipeline_confs.SCALEOUT,
+    'rtf_only_pipeline': pipeline_confs.RTF_TO_TEXT
 }
 
 
@@ -29,7 +34,7 @@ def main(args=None):
     parser = create_parser(
         WriteConfigsCommand(CLIENT_CONFIGS),
         RunCommand(),
-        rtf_to_text.RunRtfToTextCommand()
+        RunRtfToTextCommand()
     )
     conf = parser.parse_args(args)
     logging.basicConfig(level=conf.log_level)
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/default_pipeline.py b/biomedicus_client/src/biomedicus_client/default_pipeline.py
similarity index 76%
rename from biomedicus_client/src/biomedicus_client/pipeline/default_pipeline.py
rename to biomedicus_client/src/biomedicus_client/default_pipeline.py
index b3a36723..66074f26 100644
--- a/biomedicus_client/src/biomedicus_client/pipeline/default_pipeline.py
+++ b/biomedicus_client/src/biomedicus_client/default_pipeline.py
@@ -17,17 +17,16 @@
 from pathlib import Path
 from typing import Optional, Union
 
-from importlib_resources import files
+from importlib_resources import as_file
 from mtap import Pipeline, LocalProcessor, RemoteProcessor
 from mtap.serialization import SerializerRegistry, SerializationProcessor
 
-__all__ = ['default_pipeline_config', 'scaleout_pipeline_config', 'create', 'from_args', 'argument_parser']
+__all__ = ['create', 'from_args', 'argument_parser']
 
-default_pipeline_config = files('biomedicus_client.pipeline').joinpath('biomedicus_default_pipeline.yml')
-scaleout_pipeline_config = files('biomedicus_client.pipeline').joinpath('scaleout_pipeline_config.yml')
+from biomedicus_client import pipeline_confs
 
 
-def create(config: Optional[Union[str, Path]] = None,
+def create(config: Optional[Union[str, bytes, Path]] = None,
            *, events_addresses: Optional[str] = None,
            rtf: bool = False,
            rtf_address: str = "localhost:50200",
@@ -39,27 +38,26 @@ def create(config: Optional[Union[str, Path]] = None,
     """The biomedicus default pipeline for processing clinical documents.
 
     Args
-        config (Optional[Union[str, Path]]): A path to an MTAP pipeline configuration YAML file to
-            use instead of the default.
+        config: A path to an MTAP pipeline configuration YAML file to use instead of the default.
 
     Keyword Args
-        events_addresses (Optional[str]): The address (or addresses, comma separated) for the
-            events service.
-        rtf (bool): Whether to include the rtf processor at the start of the pipeline. The rtf
-            processor will convert RTF data stored in the "rtf" Binary on the event to the
-            "plaintext" Document.
-        rtf_address (str): The address of the remote rtf processor.
-        serializer (Optional[str]): An optional serializer (examples: 'json', 'yml', 'pickle').
-        output_directory (Optional[Path]): Where the serializer should output the serialized files.
-        address (Optional[str]): An optional address to use for all processors.
+        events_addresses: The address (or addresses, comma separated) for the events service.
+        rtf: Whether to include the rtf processor at the start of the pipeline. The rtf processor will convert RTF data
+            stored in the "rtf" Binary on the event to the "plaintext" Document.
+        rtf_address: The address of the remote rtf processor.
+        serializer: An optional serializer (examples: 'json', 'yml', 'pickle').
+        output_directory: Where the serializer should output the serialized files.
+        address: An optional address to use for all processors.
 
     Returns
         Pipeline
 
     """
     if config is None:
-        config = default_pipeline_config
-    pipeline = Pipeline.from_yaml_file(config)
+        with as_file(pipeline_confs.DEFAULT) as config:
+            pipeline = Pipeline.from_yaml_file(config)
+    else:
+        pipeline = Pipeline.from_yaml_file(config)
 
     if events_addresses is not None:
         pipeline.events_address = events_addresses
@@ -73,6 +71,9 @@ def create(config: Optional[Union[str, Path]] = None,
         pipeline.append(ser_comp)
 
     if rtf:
+        if rtf_address is None:
+            rtf_address = 'localhost:50200'
+
         rtf_processor = RemoteProcessor(processor_name='biomedicus-rtf',
                                         address=rtf_address,
                                         params={'output_document_name': 'plaintext'})
@@ -126,7 +127,6 @@ def argument_parser() -> ArgumentParser:
     )
     parser.add_argument(
         '--rtf-address',
-        default="localhost:50200",
         help="The address (or addresses, comma separated) for the rtf to text converter processor."
     )
     parser.add_argument(
diff --git a/biomedicus_client/src/biomedicus_client/pipeline_confs/__init__.py b/biomedicus_client/src/biomedicus_client/pipeline_confs/__init__.py
new file mode 100644
index 00000000..6b7c7fd7
--- /dev/null
+++ b/biomedicus_client/src/biomedicus_client/pipeline_confs/__init__.py
@@ -0,0 +1,21 @@
+#  Copyright 2023 Regents of the University of Minnesota.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Provides importlib_resources Traversable objects for built-in pipeline configuration files."""
+from importlib_resources import files
+
+__all__ = ('DEFAULT', 'SCALEOUT', 'RTF_TO_TEXT')
+
+DEFAULT = files(__name__).joinpath("biomedicus_default_pipeline.yml")
+SCALEOUT = files(__name__).joinpath("scaleout_pipeline.yml")
+RTF_TO_TEXT = files(__name__).joinpath("rtf_to_text_pipeline.yml")
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/biomedicus_default_pipeline.yml b/biomedicus_client/src/biomedicus_client/pipeline_confs/biomedicus_default_pipeline.yml
similarity index 100%
rename from biomedicus_client/src/biomedicus_client/pipeline/biomedicus_default_pipeline.yml
rename to biomedicus_client/src/biomedicus_client/pipeline_confs/biomedicus_default_pipeline.yml
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/rtf_to_text_pipeline.yml b/biomedicus_client/src/biomedicus_client/pipeline_confs/rtf_to_text_pipeline.yml
similarity index 100%
rename from biomedicus_client/src/biomedicus_client/pipeline/rtf_to_text_pipeline.yml
rename to biomedicus_client/src/biomedicus_client/pipeline_confs/rtf_to_text_pipeline.yml
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/scaleout_pipeline_config.yml b/biomedicus_client/src/biomedicus_client/pipeline_confs/scaleout_pipeline.yml
similarity index 100%
rename from biomedicus_client/src/biomedicus_client/pipeline/scaleout_pipeline_config.yml
rename to biomedicus_client/src/biomedicus_client/pipeline_confs/scaleout_pipeline.yml
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/rtf_to_text.py b/biomedicus_client/src/biomedicus_client/rtf_to_text.py
similarity index 83%
rename from biomedicus_client/src/biomedicus_client/pipeline/rtf_to_text.py
rename to biomedicus_client/src/biomedicus_client/rtf_to_text.py
index 05a77c95..04ad18b6 100644
--- a/biomedicus_client/src/biomedicus_client/pipeline/rtf_to_text.py
+++ b/biomedicus_client/src/biomedicus_client/rtf_to_text.py
@@ -15,19 +15,17 @@
 
 from argparse import ArgumentParser, Namespace
 from os import PathLike
-
-from importlib_resources import files
 from pathlib import Path
 from typing import Union, Optional, List
 
+from biomedicus_client.sources import rtf_source
+from importlib_resources import as_file
 from mtap import Pipeline, LocalProcessor, EventProcessor, processor, events_client
 
+from biomedicus_client import pipeline_confs
 from biomedicus_client.cli_tools import Command
-from biomedicus_client.pipeline.sources import rtf_source
-
-__all__ = ['default_rtf_to_text_pipeline_config', 'create', 'from_args', 'argument_parser', 'RunRtfToTextCommand']
 
-default_rtf_to_text_pipeline_config = files('biomedicus_client.pipeline').joinpath('rtf_to_text_pipeline.yml')
+__all__ = ['create', 'from_args', 'argument_parser', 'RunRtfToTextCommand']
 
 
 @processor('write-plaintext')
@@ -57,8 +55,10 @@ def create(config: Optional[Union[str, PathLike]] = None,
 
     """
     if config is None:
-        config = default_rtf_to_text_pipeline_config
-    pipeline = Pipeline.from_yaml_file(config)
+        with as_file(pipeline_confs.RTF_TO_TEXT) as config:
+            pipeline = Pipeline.from_yaml_file(config)
+    else:
+        pipeline = Pipeline.from_yaml_file(config)
 
     if events_addresses is not None:
         pipeline.events_address = events_addresses
@@ -78,18 +78,15 @@ def argument_parser():
 
     """
     parser = ArgumentParser(add_help=False)
-    parser.add_argument('--config', default=None,
-                        help='Path to the pipeline configuration file.')
+    parser.add_argument('--config', default=None, help='Path to the pipeline configuration file.')
     parser.add_argument('--output_directory', '-o', default='output', help="The output directory to write txt out.")
-    parser.add_argument('--events-addresses', default=None,
-                        help="The address for the events service.")
+    parser.add_argument('--events-addresses', default=None, help="The address for the events service.")
     return parser
 
 
 def from_args(args: Namespace) -> Pipeline:
     if not isinstance(args, Namespace):
-        raise ValueError('"args" parameter should be the parsed arguments from '
-                         '"rtf_to_text.argument_parser()"')
+        raise ValueError('"args" parameter should be the parsed arguments from "rtf_to_text.argument_parser()"')
     return create(**vars(args))
 
 
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/sources.py b/biomedicus_client/src/biomedicus_client/sources.py
similarity index 95%
rename from biomedicus_client/src/biomedicus_client/pipeline/sources.py
rename to biomedicus_client/src/biomedicus_client/sources.py
index e0d26b0a..30aec765 100644
--- a/biomedicus_client/src/biomedicus_client/pipeline/sources.py
+++ b/biomedicus_client/src/biomedicus_client/sources.py
@@ -16,14 +16,22 @@
 import fnmatch
 import time
 from pathlib import Path
+from typing import Generator, Iterator
 
 from mtap import Event
 from mtap.pipeline import ProcessingSource
 from mtap.types import EventsClient
 from watchdog.events import FileSystemEventHandler, FileSystemEvent
 
+__all__ = [
+    'rtf_source',
+    'RtfHandler',
+    'TxtHandler',
+    'WatcherSource'
+]
 
-def rtf_source(input_directory: Path, extension_glob: str, events_client: EventsClient):
+
+def rtf_source(input_directory: Path, extension_glob: str, events_client: EventsClient) -> Iterator[Event]:
     input_directory = Path(input_directory)
     for path in input_directory.rglob(extension_glob):
         with path.open('rb', errors=None) as f:
diff --git a/pyproject.toml b/pyproject.toml
index 4a0ad383..ba9db3a0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,12 +34,11 @@ classifiers = [
     'Topic :: Text Processing :: Linguistic'
 ]
 dependencies = [
-    "biomedicus_client==3.2.0",  # We get mtap, tqdm, and importlib_resources transitively from the client
+    "biomedicus_client==3.2.1",  # We get mtap, tqdm, and importlib_resources transitively from the client
     "numpy==1.24.2",
     "pyyaml==6.0",
     "regex==2023.3.23",
     "torch==2.0.0",
-    "stanza==1.5.0",
     "requests==2.28.2",
     "watchdog==3.0.0",
 ]
@@ -52,6 +51,9 @@ test = [
 docs = [
     "sphinx==6.1.3",
 ]
+stanza = [
+    "stanza==1.5.0",
+]
 
 [project.scripts]
 b9 = "biomedicus.cli:main"
diff --git a/python/biomedicus/cli.py b/python/biomedicus/cli.py
index f53a2407..e46175b5 100644
--- a/python/biomedicus/cli.py
+++ b/python/biomedicus/cli.py
@@ -16,8 +16,9 @@
 import logging
 
 from biomedicus.deployment import (
-    default_deployment,
     DownloadDataCommand,
+    confs as deployment_confs,
+    default_deployment,
     rtf_to_text
 )
 from biomedicus.java_support import RunJavaCommand
@@ -25,10 +26,13 @@
 from biomedicus_client import cli_tools
 from biomedicus_client.cli_tools import WriteConfigsCommand
 
+__all__ = ('main',)
+
+
 SERVER_CONFIGS = {
-    'deploy': default_deployment.deployment_config,
-    'scaleout_deploy': default_deployment.scaleout_deploy_config,
-    'rtf_to_text': rtf_to_text.deployment_config
+    'deploy': deployment_confs.DEFAULT,
+    'scaleout_deploy': deployment_confs.SCALEOUT,
+    'rtf_to_text': deployment_confs.RTF_TO_TEXT
 }
 
 
diff --git a/biomedicus_client/src/biomedicus_client/pipeline/__init__.py b/python/biomedicus/deployment/confs/__init__.py
similarity index 56%
rename from biomedicus_client/src/biomedicus_client/pipeline/__init__.py
rename to python/biomedicus/deployment/confs/__init__.py
index b98b7ae1..88be1016 100644
--- a/biomedicus_client/src/biomedicus_client/pipeline/__init__.py
+++ b/python/biomedicus/deployment/confs/__init__.py
@@ -1,4 +1,4 @@
-#  Copyright 2022 Regents of the University of Minnesota.
+#  Copyright 2023 Regents of the University of Minnesota.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -11,6 +11,11 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-"""Support for running biomedicus pipelines."""
+"""Provides importlib_resources Traversable objects for built-in deployment configuration files."""
+from importlib_resources import files
 
-from biomedicus_client.pipeline._run import RunCommand
+__all__ = ('DEFAULT', 'SCALEOUT', 'RTF_TO_TEXT')
+
+DEFAULT = files(__name__).joinpath("biomedicus_deploy.yml")
+SCALEOUT = files(__name__).joinpath("scaleout_deploy.yml")
+RTF_TO_TEXT = files(__name__).joinpath("rtf_to_text_deploy.yml")
diff --git a/python/biomedicus/deployment/biomedicus_deploy_config.yml b/python/biomedicus/deployment/confs/biomedicus_deploy.yml
similarity index 100%
rename from python/biomedicus/deployment/biomedicus_deploy_config.yml
rename to python/biomedicus/deployment/confs/biomedicus_deploy.yml
diff --git a/python/biomedicus/deployment/rtf_to_text_deploy_config.yml b/python/biomedicus/deployment/confs/rtf_to_text_deploy.yml
similarity index 100%
rename from python/biomedicus/deployment/rtf_to_text_deploy_config.yml
rename to python/biomedicus/deployment/confs/rtf_to_text_deploy.yml
diff --git a/python/biomedicus/deployment/scaleout_deploy_config.yml b/python/biomedicus/deployment/confs/scaleout_deploy.yml
similarity index 100%
rename from python/biomedicus/deployment/scaleout_deploy_config.yml
rename to python/biomedicus/deployment/confs/scaleout_deploy.yml
diff --git a/python/biomedicus/deployment/default_deployment.py b/python/biomedicus/deployment/default_deployment.py
index 985b8abb..af94a7e5 100644
--- a/python/biomedicus/deployment/default_deployment.py
+++ b/python/biomedicus/deployment/default_deployment.py
@@ -12,45 +12,48 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-import logging
 from argparse import ArgumentParser
 from contextlib import contextmanager
 from typing import List, Optional, ContextManager
 
-from importlib_resources import files
+from importlib_resources import as_file
 from mtap.deployment import Deployment
 
+from biomedicus.deployment import confs
 from biomedicus.deployment._data_downloading import check_data
 from biomedicus.java_support import attach_biomedicus_jar
 from biomedicus_client.cli_tools import Command
 
-logger = logging.getLogger(__name__)
-
-deployment_config = files('biomedicus.deployment').joinpath('biomedicus_deploy_config.yml')
-scaleout_deploy_config = files('biomedicus.deployment').joinpath('scaleout_deploy_config.yml')
-
 
 @contextmanager
-def create_deployment(offline: bool = False,
+def create_deployment(config: Optional[str] = None,
+                      offline: bool = False,
                       download_data: bool = False,
                       noninteractive: bool = False,
-                      config: Optional[str] = None,
                       log_level: Optional[str] = None,
                       jvm_classpath: Optional[str] = None,
                       rtf: bool = False,
                       host: Optional[str] = None,
                       startup_timeout: Optional[float] = None,
                       **_) -> ContextManager[Deployment]:
-    config = config if config is not None else deployment_config
-    log_level = log_level if log_level is not None else 'INFO'
     if not offline:
         check_data(download_data, noninteractive=noninteractive)
-    deployment = Deployment.from_yaml_file(config)
+
+    if config is None:
+        with as_file(confs.DEFAULT) as config:
+            deployment = Deployment.from_yaml_file(config)
+    else:
+        deployment = Deployment.from_yaml_file(config)
+
     if host is not None:
         deployment.global_settings.host = host
+
+    log_level = log_level if log_level is not None else 'INFO'
     deployment.global_settings.log_level = log_level
+
     startup_timeout = startup_timeout or 30
     deployment.shared_processor_config.startup_timeout = startup_timeout
+
     with attach_biomedicus_jar(
         deployment.shared_processor_config.java_classpath,
         jvm_classpath
@@ -72,7 +75,6 @@ def argument_parser():
     parser = ArgumentParser(add_help=False)
     parser.add_argument(
         '--config',
-        default=deployment_config,
         help='A path to a deployment configuration file to use instead of the'
              'default deployment configuration.'
     )
diff --git a/python/biomedicus/deployment/rtf_to_text.py b/python/biomedicus/deployment/rtf_to_text.py
index a5fe7ea7..aa8a19dc 100644
--- a/python/biomedicus/deployment/rtf_to_text.py
+++ b/python/biomedicus/deployment/rtf_to_text.py
@@ -13,31 +13,34 @@
 #  limitations under the License.
 from argparse import ArgumentParser, Namespace
 from contextlib import contextmanager
+from importlib_resources import as_file
 from typing import Optional, List, ContextManager
 
-from importlib_resources import files
 from mtap.deployment import Deployment
 
+from biomedicus.deployment import confs
 from biomedicus.java_support import attach_biomedicus_jar
 from biomedicus_client.cli_tools import Command
 
-deployment_config = files('biomedicus.deployment').joinpath('rtf_to_text_deploy_config.yml')
-
 
 @contextmanager
-def create_deployment(config_file: Optional[str] = None,
+def create_deployment(config: Optional[str] = None,
                       jvm_classpath: Optional[str] = None,
                       log_level: Optional[str] = None,
                       startup_timeout: Optional[float] = None,
                       **_) -> ContextManager[Deployment]:
-    if config_file is None:
-        config_file = deployment_config
-    if log_level is None:
-        log_level = 'INFO'
-    deployment = Deployment.from_yaml_file(config_file)
+    if config is None:
+        with as_file(confs.RTF_TO_TEXT) as config:
+            deployment = Deployment.from_yaml_file(config)
+    else:
+        deployment = Deployment.from_yaml_file(config)
+
+    log_level = 'INFO' if log_level is None else log_level
     deployment.global_settings.log_level = log_level
-    if startup_timeout is not None:
-        deployment.shared_processor_config.startup_timeout = startup_timeout
+
+    startup_timeout = startup_timeout or 30
+    deployment.shared_processor_config.startup_timeout = startup_timeout
+
     with attach_biomedicus_jar(
         deployment.shared_processor_config.java_classpath,
         jvm_classpath
@@ -56,7 +59,6 @@ def argument_parser() -> ArgumentParser:
         '--config',
         help='A path to a deployment configuration file to use instead of the'
              'default deployment configuration.',
-        default=deployment_config
     )
     parser.add_argument(
         '--jvm-classpath',
@@ -67,7 +69,8 @@ def argument_parser() -> ArgumentParser:
         help="The log level for pipeline runners."
     )
     parser.add_argument(
-        '--startup-timeout', type=float, default=10,
+        '--startup-timeout',
+        type=float,
         help="The timeout (in seconds) for individual processor services to deploy before failure."
     )
     return parser
diff --git a/python/biomedicus/examples/tutorial/__init__.py b/python/biomedicus/examples/tutorial/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/python/biomedicus/examples/tutorial/medications.py b/python/biomedicus/examples/tutorial/medications.py
new file mode 100644
index 00000000..80af6ae6
--- /dev/null
+++ b/python/biomedicus/examples/tutorial/medications.py
@@ -0,0 +1,20 @@
+from mtap import DocumentProcessor, run_processor
+
+
+class MedicationsProcessor(DocumentProcessor):
+    def process_document(self, document, params):
+        sentences = document.labels['sentences']
+        umls_concepts = document.labels['umls_concepts']
+        with document.get_labeler('medication_sentences') as MedicationSentence:
+            for sentence in sentences:
+                medication_concepts = []
+                for concept in umls_concepts.inside(sentence):
+                    if concept.tui == 'T121':
+                        medication_concepts.append(concept)
+                if len(medication_concepts) > 0:
+                    MedicationSentence(sentence.start_index, sentence.end_index,
+                                       concepts=medication_concepts)
+
+
+if __name__ == '__main__':
+    run_processor(MedicationsProcessor())
diff --git a/python/biomedicus/examples/sql_pipeline.py b/python/biomedicus/examples/tutorial/sql_pipeline.py
similarity index 96%
rename from python/biomedicus/examples/sql_pipeline.py
rename to python/biomedicus/examples/tutorial/sql_pipeline.py
index 7f70756b..2cbb1e4a 100644
--- a/python/biomedicus/examples/sql_pipeline.py
+++ b/python/biomedicus/examples/tutorial/sql_pipeline.py
@@ -18,9 +18,10 @@
 from argparse import ArgumentParser
 import sqlite3
 
-from biomedicus_client.pipeline import default_pipeline
 from mtap import Event, events_client
 
+from biomedicus_client import default_pipeline
+
 if __name__ == '__main__':
     parser = ArgumentParser(add_help=True, parents=[default_pipeline.argument_parser()])
     parser.add_argument('input_file')
@@ -31,12 +32,14 @@
         con = sqlite3.connect(args.input_file)
         cur = con.cursor()
 
+
         def source():
             for name, text in cur.execute("SELECT NAME, TEXT FROM DOCUMENTS"):
                 with Event(event_id=name, client=events) as e:
                     doc = e.create_document('plaintext', text)
                     yield doc
 
+
         count, = next(cur.execute("SELECT COUNT(*) FROM DOCUMENTS"))
         times = pipeline.run_multithread(source(), total=count)
         times.print()
diff --git a/python/biomedicus/examples/sql_pipeline_rtf.py b/python/biomedicus/examples/tutorial/sql_pipeline_rtf.py
similarity index 66%
rename from python/biomedicus/examples/sql_pipeline_rtf.py
rename to python/biomedicus/examples/tutorial/sql_pipeline_rtf.py
index e9abe350..95048e75 100644
--- a/python/biomedicus/examples/sql_pipeline_rtf.py
+++ b/python/biomedicus/examples/tutorial/sql_pipeline_rtf.py
@@ -18,37 +18,41 @@
 from argparse import ArgumentParser
 import sqlite3
 
-from biomedicus_client.pipeline import default_pipeline
 from mtap import Event, events_client
 
+from biomedicus_client import default_pipeline
+
 if __name__ == '__main__':
     parser = ArgumentParser(add_help=True, parents=[default_pipeline.argument_parser()])
     parser.add_argument('input_file')
     args = parser.parse_args()
     args.rtf = True  # Toggles --rtf flag always on.
-    # Can also skip parsing arguments and programmatically create the pipeline, see :func:`default_pipeline.create`.
+    # Can also skip parsing arguments and programmatically create the pipeline,
+    # see :func:`rtf_to_text.create`.
     pipeline = default_pipeline.from_args(args)
     with events_client(pipeline.events_address) as events:
         con = sqlite3.connect(args.input_file)
         cur = con.cursor()
 
         def source():
-            # Note I recommended that RTF documents be stored as BLOBs since most databases do not support
-            # storing text in the standard Windows-1252 encoding of rtf documents.
-            # (RTF documents can actually use different encodings specified by a keyword like \ansicpg1252
-            # at the beginning of the document, but this is uncommon).
-            # If you are storing RTF documents ensure that they are initially read from file using the correct
-            # encoding [i.e. open('file.rtf', 'r', encoding='cp1252')] before storing in the database,
-            # so that special characters are preserved.
+            # Note I recommended that RTF documents be stored as BLOBs since most
+            # databases do not support storing text in the standard Windows-1252
+            # encoding of rtf documents. (RTF documents can actually use different
+            # encodings specified by a keyword like \ansicpg1252 at the beginning of
+            # the document, but this is uncommon).
+            # If you are storing RTF documents ensure that they are initially read from
+            # file using the correct encoding [i.e. open('file.rtf', 'r', encoding='cp1252')]
+            # before storing in the database, so that special characters are preserved.
             for name, text in cur.execute("SELECT NAME, TEXT FROM DOCUMENTS"):
                 with Event(event_id=name, client=events) as e:
-                    e.binaries['rtf'] = text  # or "e.binaries['rtf'] = text.encode('cp1252')" in TEXT column case
+                    e.binaries['rtf'] = text
+                    # or "e.binaries['rtf'] = text.encode('cp1252')" in TEXT column case
                     yield e
 
         count, = next(cur.execute("SELECT COUNT(*) FROM DOCUMENTS"))
-        # Here we're adding the params since we're calling the pipeline with a source that provides Events rather
-        # than documents. This param will tell DocumentProcessors which document they need to process after the
-        # rtf converter creates that document.
+        # Here we're adding the params since we're calling the pipeline with a source that
+        # provides Events rather than documents. This param will tell DocumentProcessors
+        # which document they need to process after the rtf converter creates that document.
         times = pipeline.run_multithread(source(), params={'document_name': 'plaintext'}, total=count)
         times.print()
         con.close()
diff --git a/python/biomedicus/examples/sql_pipeline_rtf_only.py b/python/biomedicus/examples/tutorial/sql_pipeline_rtf_only.py
similarity index 66%
rename from python/biomedicus/examples/sql_pipeline_rtf_only.py
rename to python/biomedicus/examples/tutorial/sql_pipeline_rtf_only.py
index 39d15cc0..25362d65 100644
--- a/python/biomedicus/examples/sql_pipeline_rtf_only.py
+++ b/python/biomedicus/examples/tutorial/sql_pipeline_rtf_only.py
@@ -18,37 +18,43 @@
 from argparse import ArgumentParser
 import sqlite3
 
-from biomedicus_client.pipeline import rtf_to_text
 from mtap import Event, events_client
 
+from biomedicus_client import rtf_to_text
+
 if __name__ == '__main__':
     parser = ArgumentParser(add_help=True, parents=[rtf_to_text.argument_parser()])
     parser.add_argument('input_file')
     args = parser.parse_args()
     args.rtf = True  # Toggles --rtf flag always on.
-    # Can also skip parsing arguments and programmatically create the pipeline, see :func:`rtf_to_text.create`.
+    # Can also skip parsing arguments and programmatically create the pipeline,
+    # see :func:`rtf_to_text.create`.
     pipeline = rtf_to_text.from_args(args)
     with events_client(pipeline.events_address) as events:
         con = sqlite3.connect(args.input_file)
         cur = con.cursor()
 
+
         def source():
-            # Note I recommended that RTF documents be stored as BLOBs since most databases do not support
-            # storing text in the standard Windows-1252 encoding of rtf documents.
-            # (RTF documents can actually use different encodings specified by a keyword like \ansicpg1252
-            # at the beginning of the document, but this is uncommon).
-            # If you are storing RTF documents ensure that they are initially read from file using the correct
-            # encoding [i.e. open('file.rtf', 'r', encoding='cp1252')] before storing in the database,
-            # so that special characters are preserved.
+            # Note I recommended that RTF documents be stored as BLOBs since most
+            # databases do not support storing text in the standard Windows-1252
+            # encoding of rtf documents. (RTF documents can actually use different
+            # encodings specified by a keyword like \ansicpg1252 at the beginning of
+            # the document, but this is uncommon).
+            # If you are storing RTF documents ensure that they are initially read from
+            # file using the correct encoding [i.e. open('file.rtf', 'r', encoding='cp1252')]
+            # before storing in the database, so that special characters are preserved.
             for name, text in cur.execute("SELECT NAME, TEXT FROM DOCUMENTS"):
                 with Event(event_id=name, client=events) as e:
-                    e.binaries['rtf'] = text  # or "e.binaries['rtf'] = text.encode('cp1252')" in TEXT column case
+                    e.binaries['rtf'] = text
+                    # or "e.binaries['rtf'] = text.encode('cp1252')" in TEXT column case
                     yield e
 
+
         count, = next(cur.execute("SELECT COUNT(*) FROM DOCUMENTS"))
-        # Here we're adding the params since we're calling the pipeline with a source that provides Events rather
-        # than documents. This param will tell DocumentProcessors which document they need to process after the
-        # rtf converter creates that document.
+        # Here we're adding the params since we're calling the pipeline with a source that
+        # provides Events rather than documents. This param will tell DocumentProcessors
+        # which document they need to process after the rtf converter creates that document.
         times = pipeline.run_multithread(source(), params={'document_name': 'plaintext'}, total=count)
         times.print()
         con.close()
diff --git a/python/biomedicus/utilities/print_all_processors_metadata.py b/python/biomedicus/utilities/print_all_processors_metadata.py
index 12a74f8b..1ba3b9bf 100644
--- a/python/biomedicus/utilities/print_all_processors_metadata.py
+++ b/python/biomedicus/utilities/print_all_processors_metadata.py
@@ -18,14 +18,14 @@
 from tempfile import NamedTemporaryFile
 from typing import Optional
 
-from biomedicus.dependencies.stanza_parser import StanzaParser
 from biomedicus.java_support import run_java
-from biomedicus.negation.negex import NegexProcessor
-from biomedicus.sentences.bi_lstm import SentenceProcessor
 from biomedicus_client.cli_tools import Command
 
 
 def print_processor_meta(output_file: Optional[str] = None):
+    from biomedicus.dependencies.stanza_parser import StanzaParser
+    from biomedicus.negation.negex import NegexProcessor
+    from biomedicus.sentences.bi_lstm import SentenceProcessor
     if output_file is None:
         output_file = "processors.yaml"
     if os.path.isdir(output_file):
diff --git a/python/tests/scaleout/test_scaleout.py b/python/tests/scaleout/test_scaleout.py
index 5bafd32b..bb9d507d 100644
--- a/python/tests/scaleout/test_scaleout.py
+++ b/python/tests/scaleout/test_scaleout.py
@@ -35,7 +35,7 @@ def listen(p, e=None):
             deploy = Popen([sys.executable, "-m", "biomedicus", "deploy",
                             "--noninteractive",
                             "--log-level", "DEBUG",
-                            "--config", os.path.join(tmpdir, "scaleout_deploy_config.yml"),
+                            "--config", os.path.join(tmpdir, "scaleout_deploy.yml"),
                             "--startup-timeout", str(processor_timeout)],
                            stdout=PIPE, stderr=STDOUT)
             deploy_event = Event()
@@ -49,7 +49,7 @@ def listen(p, e=None):
             output_folder = os.path.join(tmpdir, "output")
             process = run([sys.executable, "-m", "biomedicus_client", "run",
                            "--log-level", "DEBUG",
-                           "--config", os.path.join(tmpdir, "scaleout_pipeline_config.yml"),
+                           "--config", os.path.join(tmpdir, "scaleout_pipeline.yml"),
                            os.fspath(input_folder),
                            "-o", output_folder])
             assert process.returncode == 0
diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile
index 5cb9960d..31cd9aa0 100644
--- a/tools/docker/Dockerfile
+++ b/tools/docker/Dockerfile
@@ -22,7 +22,7 @@ RUN useradd -d /biomedicus -ms /bin/bash biomedicus
 USER biomedicus
 WORKDIR /biomedicus
 
-RUN b9 download-data --with-stanza
+RUN b9 download-data
 
 EXPOSE 50000-51000
 ENTRYPOINT ["b9", "deploy", "--host", "0.0.0.0"]