Skip to content

Commit

Permalink
release/v3.2.1 (#174)
Browse files Browse the repository at this point in the history
* Small changes, documentation updates

* Fixed print_all_processors_metadata importing modules at cli initialization
  • Loading branch information
benknoll-umn authored Apr 19, 2023
1 parent f312c43 commit a8d8604
Show file tree
Hide file tree
Showing 26 changed files with 191 additions and 111 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ jobs:
pip install flake8 pytest
pip install git+https://github.com/nlpie/mtap@main#egg=mtap
pip install ./biomedicus_client
pip install .[test] --extra-index-url https://download.pytorch.org/whl/cpu
pip install .[test,stanza] --extra-index-url https://download.pytorch.org/whl/cpu
- name: Install dependencies (release)
if: ${{ startsWith(github.head_ref, 'release') && success() }}
run: |
python -m pip install --upgrade pip setuptools wheel
pip install flake8 pytest
SETUPTOOLS_SCM_PRETEND_VERSION=${GITHUB_HEAD_REF##*/} pip install ./biomedicus_client
pip install .[test] --extra-index-url https://download.pytorch.org/whl/cpu
pip install .[test,stanza] --extra-index-url https://download.pytorch.org/whl/cpu
- name: Lint with flake8
run: |
pip install flake8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
from argparse import ArgumentParser
from typing import List

from biomedicus_client.sources import WatcherSource, rtf_source, RtfHandler, TxtHandler
from mtap import events_client
from mtap.pipeline import FilesInDirectoryProcessingSource

from biomedicus_client import default_pipeline
from biomedicus_client.cli_tools import Command
from biomedicus_client.pipeline import default_pipeline
from biomedicus_client.pipeline.sources import WatcherSource, RtfHandler, rtf_source, TxtHandler


class RunCommand(Command):
Expand Down
15 changes: 10 additions & 5 deletions biomedicus_client/src/biomedicus_client/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,26 @@

import logging

from biomedicus_client import pipeline_confs
from biomedicus_client._run import RunCommand
from biomedicus_client.cli_tools import create_parser, WriteConfigsCommand
from biomedicus_client.pipeline import RunCommand, default_pipeline, rtf_to_text
from biomedicus_client.rtf_to_text import RunRtfToTextCommand

__all__ = ('main',)


CLIENT_CONFIGS = {
'pipeline': default_pipeline.default_pipeline_config,
'scaleout_pipeline': default_pipeline.scaleout_pipeline_config,
'rtf_only_pipeline': rtf_to_text.default_rtf_to_text_pipeline_config
'pipeline': pipeline_confs.DEFAULT,
'scaleout_pipeline': pipeline_confs.SCALEOUT,
'rtf_only_pipeline': pipeline_confs.RTF_TO_TEXT
}


def main(args=None):
parser = create_parser(
WriteConfigsCommand(CLIENT_CONFIGS),
RunCommand(),
rtf_to_text.RunRtfToTextCommand()
RunRtfToTextCommand()
)
conf = parser.parse_args(args)
logging.basicConfig(level=conf.log_level)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,16 @@
from pathlib import Path
from typing import Optional, Union

from importlib_resources import files
from importlib_resources import as_file
from mtap import Pipeline, LocalProcessor, RemoteProcessor
from mtap.serialization import SerializerRegistry, SerializationProcessor

__all__ = ['default_pipeline_config', 'scaleout_pipeline_config', 'create', 'from_args', 'argument_parser']
__all__ = ['create', 'from_args', 'argument_parser']

default_pipeline_config = files('biomedicus_client.pipeline').joinpath('biomedicus_default_pipeline.yml')
scaleout_pipeline_config = files('biomedicus_client.pipeline').joinpath('scaleout_pipeline_config.yml')
from biomedicus_client import pipeline_confs


def create(config: Optional[Union[str, Path]] = None,
def create(config: Optional[Union[str, bytes, Path]] = None,
*, events_addresses: Optional[str] = None,
rtf: bool = False,
rtf_address: str = "localhost:50200",
Expand All @@ -39,27 +38,26 @@ def create(config: Optional[Union[str, Path]] = None,
"""The biomedicus default pipeline for processing clinical documents.
Args
config (Optional[Union[str, Path]]): A path to an MTAP pipeline configuration YAML file to
use instead of the default.
config: A path to an MTAP pipeline configuration YAML file to use instead of the default.
Keyword Args
events_addresses (Optional[str]): The address (or addresses, comma separated) for the
events service.
rtf (bool): Whether to include the rtf processor at the start of the pipeline. The rtf
processor will convert RTF data stored in the "rtf" Binary on the event to the
"plaintext" Document.
rtf_address (str): The address of the remote rtf processor.
serializer (Optional[str]): An optional serializer (examples: 'json', 'yml', 'pickle').
output_directory (Optional[Path]): Where the serializer should output the serialized files.
address (Optional[str]): An optional address to use for all processors.
events_addresses: The address (or addresses, comma separated) for the events service.
rtf: Whether to include the rtf processor at the start of the pipeline. The rtf processor will convert RTF data
stored in the "rtf" Binary on the event to the "plaintext" Document.
rtf_address: The address of the remote rtf processor.
serializer: An optional serializer (examples: 'json', 'yml', 'pickle').
output_directory: Where the serializer should output the serialized files.
address: An optional address to use for all processors.
Returns
Pipeline
"""
if config is None:
config = default_pipeline_config
pipeline = Pipeline.from_yaml_file(config)
with as_file(pipeline_confs.DEFAULT) as config:
pipeline = Pipeline.from_yaml_file(config)
else:
pipeline = Pipeline.from_yaml_file(config)

if events_addresses is not None:
pipeline.events_address = events_addresses
Expand All @@ -73,6 +71,9 @@ def create(config: Optional[Union[str, Path]] = None,
pipeline.append(ser_comp)

if rtf:
if rtf_address is None:
rtf_address = 'localhost:50200'

rtf_processor = RemoteProcessor(processor_name='biomedicus-rtf',
address=rtf_address,
params={'output_document_name': 'plaintext'})
Expand Down Expand Up @@ -126,7 +127,6 @@ def argument_parser() -> ArgumentParser:
)
parser.add_argument(
'--rtf-address',
default="localhost:50200",
help="The address (or addresses, comma separated) for the rtf to text converter processor."
)
parser.add_argument(
Expand Down
21 changes: 21 additions & 0 deletions biomedicus_client/src/biomedicus_client/pipeline_confs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2023 Regents of the University of Minnesota.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides importlib_resources Traversable objects for built-in pipeline configuration files."""
from importlib_resources import files

__all__ = ('DEFAULT', 'SCALEOUT', 'RTF_TO_TEXT')

DEFAULT = files(__name__).joinpath("biomedicus_default_pipeline.yml")
SCALEOUT = files(__name__).joinpath("scaleout_pipeline.yml")
RTF_TO_TEXT = files(__name__).joinpath("rtf_to_text_pipeline.yml")
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,17 @@

from argparse import ArgumentParser, Namespace
from os import PathLike

from importlib_resources import files
from pathlib import Path
from typing import Union, Optional, List

from biomedicus_client.sources import rtf_source
from importlib_resources import as_file
from mtap import Pipeline, LocalProcessor, EventProcessor, processor, events_client

from biomedicus_client import pipeline_confs
from biomedicus_client.cli_tools import Command
from biomedicus_client.pipeline.sources import rtf_source

__all__ = ['default_rtf_to_text_pipeline_config', 'create', 'from_args', 'argument_parser', 'RunRtfToTextCommand']

default_rtf_to_text_pipeline_config = files('biomedicus_client.pipeline').joinpath('rtf_to_text_pipeline.yml')
__all__ = ['create', 'from_args', 'argument_parser', 'RunRtfToTextCommand']


@processor('write-plaintext')
Expand Down Expand Up @@ -57,8 +55,10 @@ def create(config: Optional[Union[str, PathLike]] = None,
"""
if config is None:
config = default_rtf_to_text_pipeline_config
pipeline = Pipeline.from_yaml_file(config)
with as_file(pipeline_confs.RTF_TO_TEXT) as config:
pipeline = Pipeline.from_yaml_file(config)
else:
pipeline = Pipeline.from_yaml_file(config)

if events_addresses is not None:
pipeline.events_address = events_addresses
Expand All @@ -78,18 +78,15 @@ def argument_parser():
"""
parser = ArgumentParser(add_help=False)
parser.add_argument('--config', default=None,
help='Path to the pipeline configuration file.')
parser.add_argument('--config', default=None, help='Path to the pipeline configuration file.')
parser.add_argument('--output_directory', '-o', default='output', help="The output directory to write txt out.")
parser.add_argument('--events-addresses', default=None,
help="The address for the events service.")
parser.add_argument('--events-addresses', default=None, help="The address for the events service.")
return parser


def from_args(args: Namespace) -> Pipeline:
if not isinstance(args, Namespace):
raise ValueError('"args" parameter should be the parsed arguments from '
'"rtf_to_text.argument_parser()"')
raise ValueError('"args" parameter should be the parsed arguments from "rtf_to_text.argument_parser()"')
return create(**vars(args))


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,22 @@
import fnmatch
import time
from pathlib import Path
from typing import Generator, Iterator

from mtap import Event
from mtap.pipeline import ProcessingSource
from mtap.types import EventsClient
from watchdog.events import FileSystemEventHandler, FileSystemEvent

__all__ = [
'rtf_source',
'RtfHandler',
'TxtHandler',
'WatcherSource'
]

def rtf_source(input_directory: Path, extension_glob: str, events_client: EventsClient):

def rtf_source(input_directory: Path, extension_glob: str, events_client: EventsClient) -> Iterator[Event]:
input_directory = Path(input_directory)
for path in input_directory.rglob(extension_glob):
with path.open('rb', errors=None) as f:
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,11 @@ classifiers = [
'Topic :: Text Processing :: Linguistic'
]
dependencies = [
"biomedicus_client==3.2.0", # We get mtap, tqdm, and importlib_resources transitively from the client
"biomedicus_client==3.2.1", # We get mtap, tqdm, and importlib_resources transitively from the client
"numpy==1.24.2",
"pyyaml==6.0",
"regex==2023.3.23",
"torch==2.0.0",
"stanza==1.5.0",
"requests==2.28.2",
"watchdog==3.0.0",
]
Expand All @@ -52,6 +51,9 @@ test = [
docs = [
"sphinx==6.1.3",
]
stanza = [
"stanza==1.5.0",
]

[project.scripts]
b9 = "biomedicus.cli:main"
Expand Down
12 changes: 8 additions & 4 deletions python/biomedicus/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,23 @@
import logging

from biomedicus.deployment import (
default_deployment,
DownloadDataCommand,
confs as deployment_confs,
default_deployment,
rtf_to_text
)
from biomedicus.java_support import RunJavaCommand
from biomedicus.utilities.print_all_processors_metadata import PrintProcessorMetaCommand
from biomedicus_client import cli_tools
from biomedicus_client.cli_tools import WriteConfigsCommand

__all__ = ('main',)


SERVER_CONFIGS = {
'deploy': default_deployment.deployment_config,
'scaleout_deploy': default_deployment.scaleout_deploy_config,
'rtf_to_text': rtf_to_text.deployment_config
'deploy': deployment_confs.DEFAULT,
'scaleout_deploy': deployment_confs.SCALEOUT,
'rtf_to_text': deployment_confs.RTF_TO_TEXT
}


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 Regents of the University of Minnesota.
# Copyright 2023 Regents of the University of Minnesota.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Support for running biomedicus pipelines."""
"""Provides importlib_resources Traversable objects for built-in deployment configuration files."""
from importlib_resources import files

from biomedicus_client.pipeline._run import RunCommand
__all__ = ('DEFAULT', 'SCALEOUT', 'RTF_TO_TEXT')

DEFAULT = files(__name__).joinpath("biomedicus_deploy.yml")
SCALEOUT = files(__name__).joinpath("scaleout_deploy.yml")
RTF_TO_TEXT = files(__name__).joinpath("rtf_to_text_deploy.yml")
28 changes: 15 additions & 13 deletions python/biomedicus/deployment/default_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,45 +12,48 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from argparse import ArgumentParser
from contextlib import contextmanager
from typing import List, Optional, ContextManager

from importlib_resources import files
from importlib_resources import as_file
from mtap.deployment import Deployment

from biomedicus.deployment import confs
from biomedicus.deployment._data_downloading import check_data
from biomedicus.java_support import attach_biomedicus_jar
from biomedicus_client.cli_tools import Command

logger = logging.getLogger(__name__)

deployment_config = files('biomedicus.deployment').joinpath('biomedicus_deploy_config.yml')
scaleout_deploy_config = files('biomedicus.deployment').joinpath('scaleout_deploy_config.yml')


@contextmanager
def create_deployment(offline: bool = False,
def create_deployment(config: Optional[str] = None,
offline: bool = False,
download_data: bool = False,
noninteractive: bool = False,
config: Optional[str] = None,
log_level: Optional[str] = None,
jvm_classpath: Optional[str] = None,
rtf: bool = False,
host: Optional[str] = None,
startup_timeout: Optional[float] = None,
**_) -> ContextManager[Deployment]:
config = config if config is not None else deployment_config
log_level = log_level if log_level is not None else 'INFO'
if not offline:
check_data(download_data, noninteractive=noninteractive)
deployment = Deployment.from_yaml_file(config)

if config is None:
with as_file(confs.DEFAULT) as config:
deployment = Deployment.from_yaml_file(config)
else:
deployment = Deployment.from_yaml_file(config)

if host is not None:
deployment.global_settings.host = host

log_level = log_level if log_level is not None else 'INFO'
deployment.global_settings.log_level = log_level

startup_timeout = startup_timeout or 30
deployment.shared_processor_config.startup_timeout = startup_timeout

with attach_biomedicus_jar(
deployment.shared_processor_config.java_classpath,
jvm_classpath
Expand All @@ -72,7 +75,6 @@ def argument_parser():
parser = ArgumentParser(add_help=False)
parser.add_argument(
'--config',
default=deployment_config,
help='A path to a deployment configuration file to use instead of the'
'default deployment configuration.'
)
Expand Down
Loading

0 comments on commit a8d8604

Please sign in to comment.