Merge branch 'release/1.7' into cherry-pick-sparsezoo-legacy-analyze-name-change

bfineran · web-flow · commit f4f22cc3776d · 2024-02-21T17:00:37.000-05:00
diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py
@@ -346,6 +346,11 @@ def _clear_measurements():
             "Generated no batch timings, try extending benchmark time with '--time'"
         )
 
+    if SupportedTasks.is_text_generation(task) or SupportedTasks.is_code_generation(
+        task
+    ):
+        kwargs.pop("middleware_manager")
+
     return batch_times, total_run_time, num_streams
 
 
diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py
@@ -58,7 +58,7 @@
 ##########
 Example command for evaluating a quantized MPT model from SparseZoo using the Deepsparse Engine.
 The evaluation will be run using `lm-evaluation-harness` on `hellaswag` and `gsm8k` datasets:
-deepsparse.eval zoo:mpt-7b-mpt_pretrain-base_quantized \
+deepsparse.evaluate zoo:mpt-7b-mpt_pretrain-base_quantized \
                 --dataset hellaswag \
                 --dataset gsm8k \
                 --integration lm-evaluation-harness \
@@ -173,6 +173,14 @@ def main(
     metrics,
     integration_args,
 ):
+    """
+    Evaluate MODEL_PATH on the various evaluation integrations
+
+    - MODEL_PATH can be path to an ONNX model, local directory
+    containing ONNX model (including all the auxiliary files)
+    or a SparseZoo stub
+
+    """
     # join datasets to a list if multiple datasets are passed
     datasets = list(dataset) if not isinstance(dataset, str) else dataset
     # format kwargs to a  dict
diff --git a/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py b/src/deepsparse/evaluation/integrations/lm_evaluation_harness.py
@@ -25,7 +25,10 @@
 from deepsparse import Pipeline
 from deepsparse.evaluation.registry import EvaluationRegistry
 from deepsparse.evaluation.results import Dataset, Evaluation, Metric, Result
-from deepsparse.evaluation.utils import LM_EVALUATION_HARNESS
+from deepsparse.evaluation.utils import (
+    LM_EVALUATION_HARNESS,
+    LM_EVALUATION_HARNESS_ALIASES,
+)
 from deepsparse.utils.data import numpy_log_softmax
 from lm_eval import evaluator, tasks, utils
 from lm_eval.api.instance import Instance
@@ -39,7 +42,9 @@
 __all__ = ["integration_eval"]
 
 
-@EvaluationRegistry.register(name=LM_EVALUATION_HARNESS, alias="lm-eval-harness")
+@EvaluationRegistry.register(
+    name=LM_EVALUATION_HARNESS, alias=LM_EVALUATION_HARNESS_ALIASES
+)
 def integration_eval(
     pipeline: Pipeline,
     datasets: Union[List[str], str],
@@ -150,6 +155,10 @@ def max_length(self) -> int:
     def max_gen_toks(self) -> int:
         return self._max_gen_toks
 
+    @property
+    def model(self) -> Pipeline:
+        return self.pipeline
+
     def loglikelihood(self, requests) -> List[Tuple[float, bool]]:
         """
         Copied directly from
diff --git a/src/deepsparse/evaluation/utils.py b/src/deepsparse/evaluation/utils.py
@@ -18,6 +18,7 @@
 
 from deepsparse import Pipeline
 from deepsparse.operators.engine_operator import DEEPSPARSE_ENGINE
+from sparsezoo.utils.registry import standardize_lookup_name
 
 
 __all__ = [
@@ -29,20 +30,27 @@
 _LOGGER = logging.getLogger(__name__)
 
 LM_EVALUATION_HARNESS = "lm-evaluation-harness"
+LM_EVALUATION_HARNESS_ALIASES = ["lm-eval-harness", "lm-eval"]
 PERPLEXITY = "perplexity"
 
 
 def potentially_check_dependency_import(integration_name: str) -> bool:
     """
     Check if the `integration_name` requires importing a dependency.
+    Checking involves comparing the `integration_name` to the known
+    integrations (e.g. 'lm-evaluation-harness') or their aliases.
     If so, check if the dependency is installed and return True if it is.
     Otherwise, return False.
 
-    :param integration_name: The name of the integration to check
+    :param integration_name: The name of the integration to check. The name
+        is standardized using `standardize_lookup_name` before checking.
     :return: True if the dependency is installed, False otherwise
     """
+    integration_name = standardize_lookup_name(integration_name)
 
-    if integration_name == LM_EVALUATION_HARNESS:
+    if integration_name == LM_EVALUATION_HARNESS or any(
+        integration_name == alias for alias in LM_EVALUATION_HARNESS_ALIASES
+    ):
         from deepsparse.evaluation.integrations import try_import_lm_evaluation_harness
 
         try_import_lm_evaluation_harness()

Original file line number	Diff line number	Diff line change
`@@ -346,6 +346,11 @@ def _clear_measurements():`
`346`	`346`	`"Generated no batch timings, try extending benchmark time with '--time'"`
`347`	`347`	`)`
`348`	`348`
	`349`	`+ if SupportedTasks.is_text_generation(task) or SupportedTasks.is_code_generation(`
	`350`	`+ task`
	`351`	`+ ):`
	`352`	`+ kwargs.pop("middleware_manager")`
	`353`	`+`
`349`	`354`	`return batch_times, total_run_time, num_streams`
`350`	`355`
`351`	`356`