From 0d2bfba63a44e85ad1fbd5345ceb8513b2a29fe1 Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Fri, 18 Oct 2024 17:36:49 -0400
Subject: [PATCH] benchmark updates/fixes

---
 .../dev_cuda-125_arch-x86_64.yaml             |  1 +
 .../examples_cuda-125_arch-x86_64.yaml        |  2 +-
 .../benchmarks/test_bench_e2e_dfp_pipeline.py |  6 +--
 tests/benchmarks/README.md                    | 34 +++++------------
 tests/benchmarks/e2e_test_configs.json        |  2 +-
 .../test_bench_completion_pipeline.py         |  3 ++
 tests/benchmarks/test_bench_e2e_pipelines.py  | 37 +++----------------
 .../test_bench_rag_standalone_pipeline.py     |  4 +-
 .../test_bench_vdb_upload_pipeline.py         |  4 +-
 9 files changed, 29 insertions(+), 64 deletions(-)

diff --git a/conda/environments/dev_cuda-125_arch-x86_64.yaml b/conda/environments/dev_cuda-125_arch-x86_64.yaml
index af599fb7de..8122c9c64e 100644
--- a/conda/environments/dev_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/dev_cuda-125_arch-x86_64.yaml
@@ -73,6 +73,7 @@ dependencies:
 - pydantic
 - pylibcudf=24.10
 - pylint=3.0.3
+- pynvml=11.4.1
 - pypdfium2=4.30
 - pytest-asyncio
 - pytest-benchmark=4.0
diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml
index ffcae28e4a..b97f031d91 100644
--- a/conda/environments/examples_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml
@@ -54,7 +54,6 @@ dependencies:
 - requests-toolbelt=1.0
 - s3fs
 - scikit-learn=1.3.2
-- sentence-transformers=2.7
 - sqlalchemy<2.0
 - tqdm=4
 - transformers=4.36.2
@@ -77,5 +76,6 @@ dependencies:
   - milvus==2.3.5
   - nemollm==0.3.5
   - pymilvus==2.3.6
+  - sentence-transformers==2.7
   - torch==2.4.0+cu124
 name: examples_cuda-125_arch-x86_64
diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py
index 051e3b7f25..6376349bc7 100644
--- a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py
+++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py
@@ -40,7 +40,7 @@
 from morpheus.stages.postprocess.serialize_stage import SerializeStage
 from morpheus.utils.column_info import DataFrameInputSchema
 from morpheus.utils.file_utils import date_extractor
-from morpheus.utils.logger import configure_logging
+from morpheus.utils.logger import set_log_level
 from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage
 from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage
 from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage
@@ -105,7 +105,7 @@ def dfp_training_pipeline_stages(pipe_config: Config,
                                  filenames: typing.List[str],
                                  reuse_cache=False):
 
-    configure_logging(log_level=logger.level)
+    set_log_level(log_level=logger.level)
 
     pipeline = LinearPipeline(pipe_config)
     pipeline.set_source(MultiFileSource(pipe_config, filenames=filenames))
@@ -157,7 +157,7 @@ def dfp_inference_pipeline_stages(pipe_config: Config,
                                   output_filepath: str,
                                   reuse_cache=False):
 
-    configure_logging(log_level=logger.level)
+    set_log_level(log_level=logger.level)
 
     pipeline = LinearPipeline(pipe_config)
     pipeline.set_source(MultiFileSource(pipe_config, filenames=filenames))
diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md
index 148dbb3d44..b5d0bf826a 100644
--- a/tests/benchmarks/README.md
+++ b/tests/benchmarks/README.md
@@ -48,32 +48,20 @@ Once Triton server finishes starting up, it will display the status of all loade
 
 ### Set up Morpheus Dev Container
 
-If you don't already have the Morpheus Dev container, run the following to build it:
-```bash
-./docker/build_container_dev.sh
-```
-
-Now run the container:
-```bash
-./docker/run_container_dev.sh
-```
+Follow the instructions [here](../../docs/source/developer_guide/contributing.md#build-in-docker-container) to set up your development environment in either a Docker container or Conda environment.
 
-Note that Morpheus containers are tagged by date. By default, `run_container_dev.sh` will try to use current date as tag. Therefore, if you are trying to run a container that was not built on the current date, you must set the `DOCKER_IMAGE_TAG` environment variable. For example,
-```bash
-DOCKER_IMAGE_TAG=dev-221003 ./docker/run_container_dev.sh
-```
+##### Install the required dependencies
 
-In the `/workspace` directory of the container, run the following to compile Morpheus:
+Run the following in the `/workspace` directory of your dev container:
 ```bash
-./scripts/compile.sh
+mamba env update \
+  -n ${CONDA_DEFAULT_ENV} \
+  --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml
 ```
 
-Now install Morpheus:
-```bash
-pip install -e /workspace
-```
+##### Fetch input data for benchmarks
 
-Fetch input data for benchmarks:
+Run the following in the `/workspace` directory of your dev container:
 ```bash
 ./scripts/fetch_data.py fetch validation
 ```
@@ -124,7 +112,6 @@ The `test_bench_e2e_pipelines.py` script contains several benchmarks within it.
 - `test_sid_nlp_e2e`
 - `test_abp_fil_e2e`
 - `test_phishing_nlp_e2e`
-- `test_cloudtrail_ae_e2e`
 
 For example, to run E2E benchmarks on the SID NLP workflow:
 ```bash
@@ -138,11 +125,10 @@ pytest -s --run_benchmark --benchmark-enable --benchmark-warmup=on --benchmark-w
 
 The console output should look like this:
 ```
---------------------------------------------------------------------------------- benchmark: 4 tests --------------------------------------------------------------------------------
+--------------------------------------------------------------------------------- benchmark: 3 tests --------------------------------------------------------------------------------
 Name (time in s)              Min               Max              Mean            StdDev            Median               IQR            Outliers     OPS            Rounds  Iterations
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 test_sid_nlp_e2e           1.8907 (1.0)      1.9817 (1.0)      1.9400 (1.0)      0.0325 (2.12)     1.9438 (1.0)      0.0297 (1.21)          2;0  0.5155 (1.0)           5           1
-test_cloudtrail_ae_e2e     3.3403 (1.77)     3.3769 (1.70)     3.3626 (1.73)     0.0153 (1.0)      3.3668 (1.73)     0.0245 (1.0)           1;0  0.2974 (0.58)          5           1
 test_abp_fil_e2e           5.1271 (2.71)     5.3044 (2.68)     5.2083 (2.68)     0.0856 (5.59)     5.1862 (2.67)     0.1653 (6.75)          1;0  0.1920 (0.37)          5           1
 test_phishing_nlp_e2e      5.6629 (3.00)     6.0987 (3.08)     5.8835 (3.03)     0.1697 (11.08)    5.8988 (3.03)     0.2584 (10.55)         2;0  0.1700 (0.33)          5           1
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -193,7 +179,7 @@ Additional benchmark stats for each workflow:
 
 ### Production DFP E2E Benchmarks
 
-Note that the `test_cloudtrail_ae_e2e` benchmarks measure performance of a pipeline built using [Starter DFP](../../examples/digital_fingerprinting/starter/README.md) stages. Separate benchmark tests are also provided to measure performance of the example [Production DFP](../../examples/digital_fingerprinting/production/README.md) pipelines. More information about running those benchmarks can be found [here](../../examples/digital_fingerprinting/production/morpheus/benchmarks/README.md).
+Separate benchmark tests are provided to measure performance of the example [Production DFP](../../examples/digital_fingerprinting/production/README.md) pipelines. More information about running those benchmarks can be found [here](../../examples/digital_fingerprinting/production/morpheus/benchmarks/README.md).
 
 You can use the same Dev container created here to run the Production DFP benchmarks. You would just need to install additional dependencies as follows:
 
diff --git a/tests/benchmarks/e2e_test_configs.json b/tests/benchmarks/e2e_test_configs.json
index eae85c1deb..cda83d3af5 100644
--- a/tests/benchmarks/e2e_test_configs.json
+++ b/tests/benchmarks/e2e_test_configs.json
@@ -1,5 +1,5 @@
 {
-    "triton_server_url": "localhost:8001",
+    "triton_server_url": "localhost:8000",
     "test_sid_nlp_e2e": {
         "file_path": "../../models/datasets/validation-data/sid-validation-data.csv",
         "repeat": 10,
diff --git a/tests/benchmarks/test_bench_completion_pipeline.py b/tests/benchmarks/test_bench_completion_pipeline.py
index 20f921d228..9ec4d99616 100644
--- a/tests/benchmarks/test_bench_completion_pipeline.py
+++ b/tests/benchmarks/test_bench_completion_pipeline.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import collections.abc
+import os
 import typing
 
 import pytest
@@ -82,4 +83,6 @@ def test_completion_pipe(benchmark: collections.abc.Callable[[collections.abc.Ca
                          config: Config,
                          dataset: DatasetManager,
                          llm_service_cls: type[LLMService]):
+    if llm_service_cls == OpenAIChatService:
+        os.environ.update({"OPENAI_API_KEY": "test_api_key"})
     benchmark(_run_pipeline, config, llm_service_cls, source_df=dataset["countries.csv"])
diff --git a/tests/benchmarks/test_bench_e2e_pipelines.py b/tests/benchmarks/test_bench_e2e_pipelines.py
index e99e7bbc07..b12d07a2de 100644
--- a/tests/benchmarks/test_bench_e2e_pipelines.py
+++ b/tests/benchmarks/test_bench_e2e_pipelines.py
@@ -21,7 +21,6 @@
 
 from _utils import TEST_DIRS
 from morpheus.config import Config
-from morpheus.config import ConfigAutoEncoder
 from morpheus.config import ConfigFIL
 from morpheus.config import CppConfig
 from morpheus.config import PipelineModes
@@ -41,7 +40,7 @@
 from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage
 from morpheus.stages.preprocess.train_ae_stage import TrainAEStage
 from morpheus.utils.file_utils import load_labels_file
-from morpheus.utils.logger import configure_logging
+from morpheus.utils.logger import set_log_level
 
 E2E_CONFIG_FILE = os.path.join(TEST_DIRS.morpheus_root, "tests/benchmarks/e2e_test_configs.json")
 with open(E2E_CONFIG_FILE, 'r', encoding='UTF-8') as f:
@@ -50,7 +49,7 @@
 
 def nlp_pipeline(config: Config, input_file, repeat, vocab_hash_file, output_file, model_name):
 
-    configure_logging(log_level=logging.INFO)
+    set_log_level(log_level=logging.DEBUG)
 
     pipeline = LinearPipeline(config)
     pipeline.set_source(FileSourceStage(config, filename=input_file, repeat=repeat))
@@ -77,7 +76,7 @@ def nlp_pipeline(config: Config, input_file, repeat, vocab_hash_file, output_fil
 
 def fil_pipeline(config: Config, input_file, repeat, output_file, model_name):
 
-    configure_logging(log_level=logging.INFO)
+    set_log_level(log_level=logging.DEBUG)
 
     pipeline = LinearPipeline(config)
     pipeline.set_source(FileSourceStage(config, filename=input_file, repeat=repeat))
@@ -99,7 +98,8 @@ def fil_pipeline(config: Config, input_file, repeat, output_file, model_name):
 
 def ae_pipeline(config: Config, input_glob, repeat, train_data_glob, output_file):
 
-    configure_logging(log_level=logging.INFO)
+    set_log_level(log_level=logging.DEBUG)
+
     pipeline = LinearPipeline(config)
     pipeline.set_source(CloudTrailSourceStage(config, input_glob=input_glob, max_files=200, repeat=repeat))
     pipeline.add_stage(
@@ -196,30 +196,3 @@ def test_phishing_nlp_e2e(benchmark, tmp_path):
     model_name = "phishing-bert-onnx"
 
     benchmark(nlp_pipeline, config, input_filepath, repeat, vocab_filepath, output_filepath, model_name)
-
-
-@pytest.mark.benchmark
-def test_cloudtrail_ae_e2e(benchmark, tmp_path):
-
-    config = Config()
-    config.mode = PipelineModes.AE
-    config.num_threads = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["num_threads"]
-    config.pipeline_batch_size = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["pipeline_batch_size"]
-    config.model_max_batch_size = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["model_max_batch_size"]
-    config.feature_length = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["feature_length"]
-    config.edge_buffer_size = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["edge_buffer_size"]
-    config.class_labels = ["reconstruct_loss", "zscore"]
-
-    config.ae = ConfigAutoEncoder()
-    config.ae.userid_column_name = "userIdentityaccountId"
-    config.ae.userid_filter = "Account-123456789"
-    ae_cols_filepath = os.path.join(TEST_DIRS.data_dir, 'columns_ae_cloudtrail.txt')
-    config.ae.feature_columns = load_labels_file(ae_cols_filepath)
-    CppConfig.set_should_use_cpp(False)
-
-    input_glob = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["input_glob_path"]
-    repeat = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["repeat"]
-    train_glob = E2E_TEST_CONFIGS["test_cloudtrail_ae_e2e"]["train_glob_path"]
-    output_filepath = os.path.join(tmp_path, "cloudtrail_ae_e2e_output.csv")
-
-    benchmark(ae_pipeline, config, input_glob, repeat, train_glob, output_filepath)
diff --git a/tests/benchmarks/test_bench_rag_standalone_pipeline.py b/tests/benchmarks/test_bench_rag_standalone_pipeline.py
index 8f531326a8..1b8a7b65f3 100644
--- a/tests/benchmarks/test_bench_rag_standalone_pipeline.py
+++ b/tests/benchmarks/test_bench_rag_standalone_pipeline.py
@@ -135,10 +135,12 @@ def test_rag_standalone_pipe(benchmark: collections.abc.Callable[[collections.ab
                              repeat_count: int,
                              import_mod: types.ModuleType,
                              llm_service_name: str):
+    if llm_service_name=="openai":
+        os.environ.update({"OPENAI_API_KEY": "test_api_key"})
     collection_name = f"test_bench_rag_standalone_pipe_{llm_service_name}"
     populate_milvus(milvus_server_uri=milvus_server_uri,
                     collection_name=collection_name,
-                    resource_kwargs=import_mod.build_milvus_config(embedding_size=EMBEDDING_SIZE),
+                    resource_kwargs=import_mod.build_default_milvus_config(embedding_size=EMBEDDING_SIZE),
                     df=dataset["service/milvus_rss_data.json"],
                     overwrite=True)
 
diff --git a/tests/benchmarks/test_bench_vdb_upload_pipeline.py b/tests/benchmarks/test_bench_vdb_upload_pipeline.py
index f7864fb779..90e3321164 100644
--- a/tests/benchmarks/test_bench_vdb_upload_pipeline.py
+++ b/tests/benchmarks/test_bench_vdb_upload_pipeline.py
@@ -79,7 +79,7 @@ def _run_pipeline(config: Config,
     pipe.add_stage(
         WriteToVectorDBStage(config,
                              resource_name=collection_name,
-                             resource_kwargs=utils_mod.build_milvus_config(embedding_size=EMBEDDING_SIZE),
+                             resource_kwargs=utils_mod.build_default_milvus_config(embedding_size=EMBEDDING_SIZE),
                              recreate=True,
                              service="milvus",
                              uri=milvus_server_uri))
@@ -92,7 +92,7 @@ def _run_pipeline(config: Config,
 @pytest.mark.benchmark
 @pytest.mark.import_mod([
     os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py'),
-    os.path.join(TEST_DIRS.examples_dir, 'llm/common/web_scraper_stage.py'),
+    os.path.join(TEST_DIRS.examples_dir, 'llm/vdb_upload/module/web_scraper_stage.py'),
 ])
 @mock.patch('feedparser.http.get')
 @mock.patch('requests.Session')