CrazyForks · pull · Jan 2, 2026 · Jan 2, 2026
diff --git a/src/google/adk/evaluation/eval_metrics.py b/src/google/adk/evaluation/eval_metrics.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import abc
 from enum import Enum
 from typing import Optional
 from typing import Union
@@ -362,3 +363,12 @@ class MetricInfo(EvalBaseModel):
   metric_value_info: MetricValueInfo = Field(
       description="Information on the nature of values supported by the metric."
   )
+
+
+class MetricInfoProvider(abc.ABC):
+  """Interface for providing MetricInfo."""
+
+  @abc.abstractmethod
+  def get_metric_info(self) -> MetricInfo:
+    """Returns MetricInfo for a given metric."""
+    raise NotImplementedError
diff --git a/src/google/adk/evaluation/final_response_match_v1.py b/src/google/adk/evaluation/final_response_match_v1.py
@@ -23,10 +23,6 @@
 from .eval_case import ConversationScenario
 from .eval_case import Invocation
 from .eval_metrics import EvalMetric
-from .eval_metrics import Interval
-from .eval_metrics import MetricInfo
-from .eval_metrics import MetricValueInfo
-from .eval_metrics import PrebuiltMetrics
 from .evaluator import EvalStatus
 from .evaluator import EvaluationResult
 from .evaluator import Evaluator
@@ -42,20 +38,6 @@ class RougeEvaluator(Evaluator):
   def __init__(self, eval_metric: EvalMetric):
     self._eval_metric = eval_metric
 
-  @staticmethod
-  def get_metric_info() -> MetricInfo:
-    return MetricInfo(
-        metric_name=PrebuiltMetrics.RESPONSE_MATCH_SCORE.value,
-        description=(
-            "This metric evaluates if the agent's final response matches a"
-            " golden/expected final response using Rouge_1 metric. Value range"
-            " for this metric is [0,1], with values closer to 1 more desirable."
-        ),
-        metric_value_info=MetricValueInfo(
-            interval=Interval(min_value=0.0, max_value=1.0)
-        ),
-    )
-
   @override
   def evaluate_invocations(
       self,

diff --git a/src/google/adk/evaluation/final_response_match_v2.py b/src/google/adk/evaluation/final_response_match_v2.py
@@ -26,11 +26,7 @@
 from .eval_case import Invocation
 from .eval_metrics import EvalMetric
 from .eval_metrics import EvalStatus
-from .eval_metrics import Interval
 from .eval_metrics import LlmAsAJudgeCriterion
-from .eval_metrics import MetricInfo
-from .eval_metrics import MetricValueInfo
-from .eval_metrics import PrebuiltMetrics
 from .evaluator import EvaluationResult
 from .evaluator import PerInvocationResult
 from .llm_as_judge import AutoRaterScore
@@ -154,20 +150,6 @@ def __init__(
     )
     self._auto_rater_prompt_template = _FINAL_RESPONSE_MATCH_V2_PROMPT
 
-  @staticmethod
-  def get_metric_info() -> MetricInfo:
-    return MetricInfo(
-        metric_name=PrebuiltMetrics.FINAL_RESPONSE_MATCH_V2.value,
-        description=(
-            "This metric evaluates if the agent's final response matches a"
-            " golden/expected final response using LLM as a judge. Value range"
-            " for this metric is [0,1], with values closer to 1 more desirable."
-        ),
-        metric_value_info=MetricValueInfo(
-            interval=Interval(min_value=0.0, max_value=1.0)
-        ),
-    )
-
   @override
   def format_auto_rater_prompt(
       self,

diff --git a/src/google/adk/evaluation/hallucinations_v1.py b/src/google/adk/evaluation/hallucinations_v1.py
@@ -40,10 +40,6 @@
 from .eval_case import InvocationEvents
 from .eval_metrics import EvalMetric
 from .eval_metrics import HallucinationsCriterion
-from .eval_metrics import Interval
-from .eval_metrics import MetricInfo
-from .eval_metrics import MetricValueInfo
-from .eval_metrics import PrebuiltMetrics
 from .evaluator import EvalStatus
 from .evaluator import EvaluationResult
 from .evaluator import Evaluator
@@ -310,21 +306,6 @@ def _setup_auto_rater(self) -> BaseLlm:
     llm_class = llm_registry.resolve(model_id)
     return llm_class(model=model_id)
 
-  @staticmethod
-  def get_metric_info() -> MetricInfo:
-    return MetricInfo(
-        metric_name=PrebuiltMetrics.HALLUCINATIONS_V1.value,
-        description=(
-            "This metric assesses whether a model response contains any false,"
-            " contradictory, or unsupported claims using a LLM as judge. Value"
-            " range for this metric is [0,1], with values closer to 1 more"
-            " desirable."
-        ),
-        metric_value_info=MetricValueInfo(
-            interval=Interval(min_value=0.0, max_value=1.0)
-        ),
-    )
-
   def _create_context_for_step(
       self,
       app_details: Optional[AppDetails],

diff --git a/src/google/adk/evaluation/metric_evaluator_registry.py b/src/google/adk/evaluation/metric_evaluator_registry.py
@@ -24,6 +24,14 @@
 from .evaluator import Evaluator
 from .final_response_match_v2 import FinalResponseMatchV2Evaluator
 from .hallucinations_v1 import HallucinationsV1Evaluator
+from .metric_info_providers import FinalResponseMatchV2EvaluatorMetricInfoProvider
+from .metric_info_providers import HallucinationsV1EvaluatorMetricInfoProvider
+from .metric_info_providers import PerTurnUserSimulatorQualityV1MetricInfoProvider
+from .metric_info_providers import ResponseEvaluatorMetricInfoProvider
+from .metric_info_providers import RubricBasedFinalResponseQualityV1EvaluatorMetricInfoProvider
+from .metric_info_providers import RubricBasedToolUseV1EvaluatorMetricInfoProvider
+from .metric_info_providers import SafetyEvaluatorV1MetricInfoProvider
+from .metric_info_providers import TrajectoryEvaluatorMetricInfoProvider
 from .response_evaluator import ResponseEvaluator
 from .rubric_based_final_response_quality_v1 import RubricBasedFinalResponseQualityV1Evaluator
 from .rubric_based_tool_use_quality_v1 import RubricBasedToolUseV1Evaluator
@@ -91,44 +99,44 @@ def _get_default_metric_evaluator_registry() -> MetricEvaluatorRegistry:
   metric_evaluator_registry = MetricEvaluatorRegistry()
 
   metric_evaluator_registry.register_evaluator(
-      metric_info=TrajectoryEvaluator.get_metric_info(),
+      metric_info=TrajectoryEvaluatorMetricInfoProvider().get_metric_info(),
       evaluator=TrajectoryEvaluator,
   )
 
   metric_evaluator_registry.register_evaluator(
-      metric_info=ResponseEvaluator.get_metric_info(
+      metric_info=ResponseEvaluatorMetricInfoProvider(
           PrebuiltMetrics.RESPONSE_EVALUATION_SCORE.value
-      ),
+      ).get_metric_info(),
       evaluator=ResponseEvaluator,
   )
   metric_evaluator_registry.register_evaluator(
-      metric_info=ResponseEvaluator.get_metric_info(
+      metric_info=ResponseEvaluatorMetricInfoProvider(
           PrebuiltMetrics.RESPONSE_MATCH_SCORE.value
-      ),
+      ).get_metric_info(),
       evaluator=ResponseEvaluator,
   )
   metric_evaluator_registry.register_evaluator(
-      metric_info=SafetyEvaluatorV1.get_metric_info(),
+      metric_info=SafetyEvaluatorV1MetricInfoProvider().get_metric_info(),
       evaluator=SafetyEvaluatorV1,
   )
   metric_evaluator_registry.register_evaluator(
-      metric_info=FinalResponseMatchV2Evaluator.get_metric_info(),
+      metric_info=FinalResponseMatchV2EvaluatorMetricInfoProvider().get_metric_info(),
       evaluator=FinalResponseMatchV2Evaluator,
   )
   metric_evaluator_registry.register_evaluator(
-      metric_info=RubricBasedFinalResponseQualityV1Evaluator.get_metric_info(),
+      metric_info=RubricBasedFinalResponseQualityV1EvaluatorMetricInfoProvider().get_metric_info(),
       evaluator=RubricBasedFinalResponseQualityV1Evaluator,
   )
   metric_evaluator_registry.register_evaluator(
-      metric_info=HallucinationsV1Evaluator.get_metric_info(),
+      metric_info=HallucinationsV1EvaluatorMetricInfoProvider().get_metric_info(),
       evaluator=HallucinationsV1Evaluator,
   )
   metric_evaluator_registry.register_evaluator(
-      metric_info=RubricBasedToolUseV1Evaluator.get_metric_info(),
+      metric_info=RubricBasedToolUseV1EvaluatorMetricInfoProvider().get_metric_info(),
       evaluator=RubricBasedToolUseV1Evaluator,
   )
   metric_evaluator_registry.register_evaluator(
-      metric_info=PerTurnUserSimulatorQualityV1.get_metric_info(),
+      metric_info=PerTurnUserSimulatorQualityV1MetricInfoProvider().get_metric_info(),
       evaluator=PerTurnUserSimulatorQualityV1,
   )
 

diff --git a/src/google/adk/evaluation/metric_info_providers.py b/src/google/adk/evaluation/metric_info_providers.py
@@ -0,0 +1,185 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from .eval_metrics import Interval
+from .eval_metrics import MetricInfo
+from .eval_metrics import MetricInfoProvider
+from .eval_metrics import MetricValueInfo
+from .eval_metrics import PrebuiltMetrics
+
+
+class TrajectoryEvaluatorMetricInfoProvider(MetricInfoProvider):
+  """Metric info provider for TrajectoryEvaluator."""
+
+  def get_metric_info(self) -> MetricInfo:
+    return MetricInfo(
+        metric_name=PrebuiltMetrics.TOOL_TRAJECTORY_AVG_SCORE.value,
+        description=(
+            "This metric compares two tool call trajectories (expected vs."
+            " actual) for the same user interaction. It performs an exact match"
+            " on the tool name and arguments for each step in the trajectory."
+            " A score of 1.0 indicates a perfect match, while 0.0 indicates a"
+            " mismatch. Higher values are better."
+        ),
+        metric_value_info=MetricValueInfo(
+            interval=Interval(min_value=0.0, max_value=1.0)
+        ),
+    )
+
+
+class ResponseEvaluatorMetricInfoProvider(MetricInfoProvider):
+  """Metric info provider for ResponseEvaluator."""
+
+  def __init__(self, metric_name: str):
+    self._metric_name = metric_name
+
+  def get_metric_info(self) -> MetricInfo:
+    """Returns MetricInfo for the given metric name."""
+    if PrebuiltMetrics.RESPONSE_EVALUATION_SCORE.value == self._metric_name:
+      return MetricInfo(
+          metric_name=PrebuiltMetrics.RESPONSE_EVALUATION_SCORE.value,
+          description=(
+              "This metric evaluates how coherent agent's response was. Value"
+              " range of this metric is [1,5], with values closer to 5 more"
+              " desirable."
+          ),
+          metric_value_info=MetricValueInfo(
+              interval=Interval(min_value=1.0, max_value=5.0)
+          ),
+      )
+    elif PrebuiltMetrics.RESPONSE_MATCH_SCORE.value == self._metric_name:
+      return MetricInfo(
+          metric_name=PrebuiltMetrics.RESPONSE_MATCH_SCORE.value,
+          description=(
+              "This metric evaluates if the agent's final response matches a"
+              " golden/expected final response using Rouge_1 metric. Value"
+              " range for this metric is [0,1], with values closer to 1 more"
+              " desirable."
+          ),
+          metric_value_info=MetricValueInfo(
+              interval=Interval(min_value=0.0, max_value=1.0)
+          ),
+      )
+    else:
+      raise ValueError(f"`{self._metric_name}` is not supported.")
+
+
+class SafetyEvaluatorV1MetricInfoProvider(MetricInfoProvider):
+  """Metric info provider for SafetyEvaluatorV1."""
+
+  def get_metric_info(self) -> MetricInfo:
+    return MetricInfo(
+        metric_name=PrebuiltMetrics.SAFETY_V1.value,
+        description=(
+            "This metric evaluates the safety (harmlessness) of an Agent's"
+            " Response. Value range of the metric is [0, 1], with values closer"
+            " to 1 to be more desirable (safe)."
+        ),
+        metric_value_info=MetricValueInfo(
+            interval=Interval(min_value=0.0, max_value=1.0)
+        ),
+    )
+
+
+class FinalResponseMatchV2EvaluatorMetricInfoProvider(MetricInfoProvider):
+  """Metric info provider for FinalResponseMatchV2Evaluator."""
+
+  def get_metric_info(self) -> MetricInfo:
+    return MetricInfo(
+        metric_name=PrebuiltMetrics.FINAL_RESPONSE_MATCH_V2.value,
+        description=(
+            "This metric evaluates if the agent's final response matches a"
+            " golden/expected final response using LLM as a judge. Value range"
+            " for this metric is [0,1], with values closer to 1 more desirable."
+        ),
+        metric_value_info=MetricValueInfo(
+            interval=Interval(min_value=0.0, max_value=1.0)
+        ),
+    )
+
+
+class RubricBasedFinalResponseQualityV1EvaluatorMetricInfoProvider(
+    MetricInfoProvider
+):
+  """Metric info provider for RubricBasedFinalResponseQualityV1Evaluator."""
+
+  def get_metric_info(self) -> MetricInfo:
+    return MetricInfo(
+        metric_name=PrebuiltMetrics.RUBRIC_BASED_FINAL_RESPONSE_QUALITY_V1.value,
+        description=(
+            "This metric assess if the agent's final response against a set of"
+            " rubrics using LLM as a judge. Value range for this metric is"
+            " [0,1], with values closer to 1 more desirable."
+        ),
+        metric_value_info=MetricValueInfo(
+            interval=Interval(min_value=0.0, max_value=1.0)
+        ),
+    )
+
+
+class HallucinationsV1EvaluatorMetricInfoProvider(MetricInfoProvider):
+  """Metric info provider for HallucinationsV1Evaluator."""
+
+  def get_metric_info(self) -> MetricInfo:
+    return MetricInfo(
+        metric_name=PrebuiltMetrics.HALLUCINATIONS_V1.value,
+        description=(
+            "This metric assesses whether a model response contains any false,"
+            " contradictory, or unsupported claims using a LLM as judge. Value"
+            " range for this metric is [0,1], with values closer to 1 more"
+            " desirable."
+        ),
+        metric_value_info=MetricValueInfo(
+            interval=Interval(min_value=0.0, max_value=1.0)
+        ),
+    )
+
+
+class RubricBasedToolUseV1EvaluatorMetricInfoProvider(MetricInfoProvider):
+  """Metric info provider for RubricBasedToolUseV1Evaluator."""
+
+  def get_metric_info(self) -> MetricInfo:
+    return MetricInfo(
+        metric_name=PrebuiltMetrics.RUBRIC_BASED_TOOL_USE_QUALITY_V1.value,
+        description=(
+            "This metric assess if the agent's usage of tools against a set of"
+            " rubrics using LLM as a judge. Value range for this metric is"
+            " [0,1], with values closer to 1 more desirable."
+        ),
+        metric_value_info=MetricValueInfo(
+            interval=Interval(min_value=0.0, max_value=1.0)
+        ),
+    )
+
+
+class PerTurnUserSimulatorQualityV1MetricInfoProvider(MetricInfoProvider):
+  """Metric info provider for PerTurnUserSimulatorQualityV1."""
+
+  def get_metric_info(self) -> MetricInfo:
+    return MetricInfo(
+        metric_name=PrebuiltMetrics.PER_TURN_USER_SIMULATOR_QUALITY_V1,
+        description=(
+            "This metric evaluates if the user messages generated by a "
+            "user simulator follow the given conversation scenario. It "
+            "validates each message separately. The resulting metric "
+            "computes the percentage of user messages that we mark as "
+            "valid. The value range for this metric is [0,1], with values "
+            "closer to 1 more desirable. "
+        ),
+        metric_value_info=MetricValueInfo(
+            interval=Interval(min_value=0.0, max_value=1.0)
+        ),
+    )