Skip to content

Commit

Permalink
Merge branch 'david-cpu-only-docs' into david-contrib-conda-yaml-1660
Browse files Browse the repository at this point in the history
  • Loading branch information
dagardner-nv committed Oct 19, 2024
2 parents ccd288e + 77c5afc commit b9d5e07
Show file tree
Hide file tree
Showing 62 changed files with 732 additions and 316 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ci_pipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ jobs:
test:
name: Test
runs-on: linux-amd64-gpu-v100-latest-1
timeout-minutes: 60
# Consider lowering this back down to 60 minutes per https://github.com/nv-morpheus/Morpheus/issues/1948
timeout-minutes: 90
container:
credentials:
username: '$oauthtoken'
Expand Down
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@
r'^http://$',
r'^https://$',
r'https://(platform\.)?openai.com',
r'https://code.visualstudio.com'
]

# Add any paths that contain templates here, relative to this directory.
Expand Down
2 changes: 1 addition & 1 deletion examples/gnn_fraud_detection_pipeline/stages/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def load_model(model_dir: str,
n_layers=hyperparameters['n_layers'],
embedding_size=hyperparameters['embedding_size'],
target=hyperparameters['target_node']).to(device)
model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pt')))
model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pt'), weights_only=False))

return model, graph, hyperparameters

Expand Down
2 changes: 1 addition & 1 deletion examples/llm/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import logging

import pymilvus
from langchain.embeddings import HuggingFaceEmbeddings # pylint: disable=no-name-in-module
from langchain_community.embeddings import HuggingFaceEmbeddings

from morpheus_llm.llm.services.llm_service import LLMService
from morpheus_llm.llm.services.nemo_llm_service import NeMoLLMService
Expand Down
2 changes: 1 addition & 1 deletion examples/llm/vdb_upload/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

# pylint: disable=no-name-in-module
from langchain.document_loaders.rss import RSSFeedLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.milvus import Milvus
from langchain_community.embeddings import HuggingFaceEmbeddings

from examples.llm.vdb_upload.vdb_utils import DEFAULT_RSS_URLS
from morpheus.utils.logging_timer import log_time
Expand Down
11 changes: 4 additions & 7 deletions examples/llm/vdb_upload/module/content_extractor_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
import pypdfium2 as libpdfium
from docx import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pydantic import BaseModel # pylint: disable=no-name-in-module
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import ValidationError
from pydantic import field_validator
Expand All @@ -43,9 +44,7 @@ class CSVConverterSchema(BaseModel):
chunk_overlap: int = 102 # Example default value
chunk_size: int = 1024
text_column_names: List[str]

class Config:
extra = "forbid"
model_config = ConfigDict(extra='forbid')


class ContentExtractorSchema(BaseModel):
Expand All @@ -54,6 +53,7 @@ class ContentExtractorSchema(BaseModel):
chunk_size: int = 512
converters_meta: Dict[str, Dict] = Field(default_factory=dict)
num_threads: int = 10
model_config = ConfigDict(extra='forbid')

@field_validator('converters_meta', mode="before")
@classmethod
Expand All @@ -66,9 +66,6 @@ def val_converters_meta(cls, to_validate: Dict[str, Dict]) -> Dict[str, Dict]:
validated_meta[key] = value
return validated_meta

class Config:
extra = "forbid"


logger = logging.getLogger(__name__)

Expand Down
5 changes: 2 additions & 3 deletions examples/llm/vdb_upload/module/file_source_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import mrc
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import ValidationError

Expand Down Expand Up @@ -48,9 +49,7 @@ class FileSourcePipeSchema(BaseModel):
vdb_resource_name: str
watch: bool = False # Flag to watch file changes
watch_interval: float = -5.0 # Interval to watch file changes

class Config:
extra = "forbid"
model_config = ConfigDict(extra='forbid')


FileSourcePipeLoaderFactory = ModuleLoaderFactory("file_source_pipe", "morpheus_examples_llm", FileSourcePipeSchema)
Expand Down
9 changes: 4 additions & 5 deletions examples/llm/vdb_upload/module/rss_source_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@

import mrc
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import ValidationError
from pydantic import validator
from pydantic import field_validator

from morpheus.modules.general.monitor import MonitorLoaderFactory
from morpheus.modules.input.rss_source import RSSSourceLoaderFactory
Expand Down Expand Up @@ -52,8 +53,9 @@ class RSSSourcePipeSchema(BaseModel):
strip_markup: bool = True
vdb_resource_name: str
web_scraper_config: Optional[Dict[Any, Any]] = None
model_config = ConfigDict(extra='forbid')

@validator('feed_input', pre=True)
@field_validator('feed_input')
def validate_feed_input(cls, to_validate): # pylint: disable=no-self-argument
if isinstance(to_validate, str):
return [to_validate]
Expand All @@ -63,9 +65,6 @@ def validate_feed_input(cls, to_validate): # pylint: disable=no-self-argument

raise ValueError('feed_input must be a string or a list of strings')

class Config:
extra = "forbid"


RSSSourcePipeLoaderFactory = ModuleLoaderFactory("rss_source_pipe", "morpheus_examples_llm", RSSSourcePipeSchema)

Expand Down
9 changes: 3 additions & 6 deletions examples/llm/vdb_upload/module/schema_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import mrc
import mrc.core.operators as ops
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import ValidationError

Expand All @@ -39,16 +40,12 @@ class ColumnTransformSchema(BaseModel):
dtype: str
op_type: str
from_: Optional[str] = Field(None, alias="from")

class Config:
extra = "forbid"
model_config = ConfigDict(extra='forbid')


class SchemaTransformSchema(BaseModel):
schema_transform_config: Dict[str, Dict[str, Any]] = Field(default_factory=dict)

class Config:
extra = "forbid"
model_config = ConfigDict(extra='forbid')


SchemaTransformLoaderFactory = ModuleLoaderFactory("schema_transform", "morpheus_examples_llm", SchemaTransformSchema)
Expand Down
5 changes: 2 additions & 3 deletions examples/llm/vdb_upload/module/vdb_resource_tagging_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import mrc
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import ValidationError

from morpheus.messages import ControlMessage
Expand All @@ -27,9 +28,7 @@

class VDBResourceTaggingSchema(BaseModel):
vdb_resource_name: str

class Config:
extra = "forbid"
model_config = ConfigDict(extra='forbid')


VDBResourceTaggingLoaderFactory = ModuleLoaderFactory("vdb_resource_tagging",
Expand Down
7 changes: 3 additions & 4 deletions examples/llm/vdb_upload/module/web_scraper_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import requests_cache
from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pydantic import BaseModel # pylint: disable=no-name-in-module
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import ValidationError

import cudf
Expand All @@ -41,9 +42,7 @@ class WebScraperSchema(BaseModel):
enable_cache: bool = False
cache_path: str = "./.cache/http/RSSDownloadStage.sqlite"
cache_dir: str = "./.cache/llm/rss"

class Config:
extra = "forbid"
model_config = ConfigDict(extra='forbid')


WebScraperLoaderFactory = ModuleLoaderFactory("web_scraper", "morpheus_examples_llm", WebScraperSchema)
Expand Down
5 changes: 3 additions & 2 deletions examples/ransomware_detection/stages/create_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@

import cudf

from common.data_models import FeatureConfig # pylint: disable=no-name-in-module
from common.feature_extractor import FeatureExtractor # pylint: disable=no-name-in-module
from morpheus.cli.register_stage import register_stage
from morpheus.config import Config
from morpheus.config import PipelineModes
Expand All @@ -30,6 +28,9 @@
from morpheus.pipeline.control_message_stage import ControlMessageStage
from morpheus.pipeline.preallocator_mixin import PreallocatorMixin

from common.data_models import FeatureConfig # pylint: disable=no-name-in-module # isort: skip
from common.feature_extractor import FeatureExtractor # pylint: disable=no-name-in-module # isort: skip


@register_stage("create-features", modes=[PipelineModes.FIL])
class CreateFeaturesRWStage(PreallocatorMixin, ControlMessageStage):
Expand Down
3 changes: 2 additions & 1 deletion examples/ransomware_detection/stages/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import mrc
import pandas as pd

from common.data_models import SnapshotData # pylint: disable=no-name-in-module
from morpheus.cli.register_stage import register_stage
from morpheus.common import TypeId
from morpheus.config import Config
Expand All @@ -27,6 +26,8 @@
from morpheus.messages import InferenceMemoryFIL
from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage

from common.data_models import SnapshotData # pylint: disable=no-name-in-module #isort:skip


@register_stage("ransomware-preprocess", modes=[PipelineModes.FIL])
class PreprocessingRWStage(PreprocessBaseStage):
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ filterwarnings = [

testpaths = ["tests"]

addopts = "--benchmark-disable"
# Don't run the benchmarks by default, don't search for tests in the tests/_utils directory which will trigger false
# alarms
addopts = "--benchmark-disable --ignore=tests/_utils"

asyncio_mode = "auto"

Expand Down
9 changes: 7 additions & 2 deletions python/morpheus/morpheus/controllers/file_to_df_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from morpheus.utils.column_info import PreparedDFInfo
from morpheus.utils.column_info import process_dataframe
from morpheus.utils.downloader import Downloader
from morpheus.utils.downloader import DownloadMethods

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -103,6 +104,9 @@ class FileToDFController:
Directory where cache will be stored.
timestamp_column_name : str
Name of the timestamp column.
download_method : typing.Union[DownloadMethods, str], optional, default = DownloadMethods.DASK_THREAD
The download method to use, if the `MORPHEUS_FILE_DOWNLOAD_TYPE` environment variable is set, it takes
presedence.
"""

def __init__(self,
Expand All @@ -111,7 +115,8 @@ def __init__(self,
file_type: FileTypes,
parser_kwargs: dict,
cache_dir: str,
timestamp_column_name: str):
timestamp_column_name: str,
download_method: typing.Union[DownloadMethods, str] = DownloadMethods.DASK_THREAD):

self._schema = schema
self._file_type = file_type
Expand All @@ -120,7 +125,7 @@ def __init__(self,
self._cache_dir = os.path.join(cache_dir, "file_cache")
self._timestamp_column_name = timestamp_column_name

self._downloader = Downloader()
self._downloader = Downloader(download_method=download_method)

def _get_or_create_dataframe_from_batch(
self, file_object_batch: typing.Tuple[fsspec.core.OpenFiles, int]) -> typing.Tuple[pd.DataFrame, bool]:
Expand Down
3 changes: 1 addition & 2 deletions python/morpheus/morpheus/controllers/monitor_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@

logger = logging.getLogger(__name__)

SupportedTypes = typing.Union[DataFrameType, MessageMeta, ControlMessage, list]


class MonitorController:
"""
Expand Down Expand Up @@ -59,6 +57,7 @@ class MonitorController:
Custom implementation of tqdm if required.
"""

SupportedTypes = typing.Union[DataFrameType, MessageMeta, ControlMessage, list]
controller_count: int = 0

def __init__(self,
Expand Down
4 changes: 2 additions & 2 deletions python/morpheus/morpheus/controllers/rss_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import requests_cache

from morpheus.messages import MessageMeta
from morpheus.utils.type_aliases import DataFrameModule
from morpheus.utils.type_aliases import DataFrameType
from morpheus.utils.type_aliases import DataFrameTypeStr
from morpheus.utils.type_utils import get_df_class

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -107,7 +107,7 @@ def __init__(self,
stop_after: int = 0,
interval_secs: float = 600,
should_stop_fn: Callable[[], bool] = None,
df_type: DataFrameTypeStr = "cudf"):
df_type: DataFrameModule = "cudf"):
if IMPORT_EXCEPTION is not None:
raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION

Expand Down
Loading

0 comments on commit b9d5e07

Please sign in to comment.