diff --git a/contributing/samples/bigquery/agent.py b/contributing/samples/bigquery/agent.py index 56a7367c8d..2389b25f47 100644 --- a/contributing/samples/bigquery/agent.py +++ b/contributing/samples/bigquery/agent.py @@ -71,7 +71,7 @@ # The variable name `root_agent` determines what your root agent is for the # debug CLI root_agent = LlmAgent( - model="gemini-2.0-flash", + model="gemini-2.5-flash", name=BIGQUERY_AGENT_NAME, description=( "Agent to answer questions about BigQuery data and models and execute" diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py index 9984580244..4aa39dce9c 100644 --- a/src/google/adk/cli/cli_tools_click.py +++ b/src/google/adk/cli/cli_tools_click.py @@ -36,7 +36,6 @@ from . import cli_deploy from .. import version from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE -from ..sessions.migration import migration_runner from .cli import run_cli from .fast_api import get_fast_api_app from .utils import envs @@ -1589,6 +1588,8 @@ def cli_migrate_session( """Migrates a session database to the latest schema version.""" logs.setup_adk_logger(getattr(logging, log_level.upper())) try: + from ..sessions.migration import migration_runner + migration_runner.upgrade(source_db_url, dest_db_url) click.secho("Migration check and upgrade process finished.", fg="green") except Exception as e: diff --git a/src/google/adk/evaluation/eval_config.py b/src/google/adk/evaluation/eval_config.py index 13b2e92274..92b61ac57c 100644 --- a/src/google/adk/evaluation/eval_config.py +++ b/src/google/adk/evaluation/eval_config.py @@ -23,7 +23,9 @@ from pydantic import BaseModel from pydantic import ConfigDict from pydantic import Field +from pydantic import model_validator +from ..agents.common_configs import CodeConfig from ..evaluation.eval_metrics import EvalMetric from .eval_metrics import BaseCriterion from .eval_metrics import Threshold @@ -72,11 +74,46 @@ class EvalConfig(BaseModel): """, ) + custom_metrics: Optional[dict[str, CodeConfig]] = Field( + default=None, + description="""A dictionary mapping custom metric names to CodeConfig +objects, which specify the path to the function for each custom metric. + +If a metric name in `criteria` is also present in `custom_metrics`, the +corresponding `CodeConfig`'s `name` field will be used to locate the custom +metric implementation. The `name` field should contain the fully qualified +path to the custom metric function, e.g., `my.custom.metrics.metric_function`. + +Example: +{ + "criteria": { + "my_custom_metric": 0.5 + }, + "custom_metrics": { + "my_custom_metric": { + "name": "path.to.my.custom.metric.function" + } + } +} +""", + ) + user_simulator_config: Optional[BaseUserSimulatorConfig] = Field( default=None, description="Config to be used by the user simulator.", ) + @model_validator(mode="after") + def check_custom_metrics_code_config_args(self) -> "EvalConfig": + if self.custom_metrics: + for metric_name, metric_config in self.custom_metrics.items(): + if metric_config.args: + raise ValueError( + f"args field in CodeConfig for custom metric '{metric_name}' is" + " not supported." + ) + return self + _DEFAULT_EVAL_CONFIG = EvalConfig( criteria={"tool_trajectory_avg_score": 1.0, "response_match_score": 0.8} @@ -106,12 +143,20 @@ def get_eval_metrics_from_config(eval_config: EvalConfig) -> list[EvalMetric]: eval_metric_list = [] if eval_config.criteria: for metric_name, criterion in eval_config.criteria.items(): + custom_function_path = None + if ( + eval_config.custom_metrics + and metric_name in eval_config.custom_metrics + ): + custom_function_path = eval_config.custom_metrics[metric_name].name + if isinstance(criterion, float): eval_metric_list.append( EvalMetric( metric_name=metric_name, threshold=criterion, criterion=BaseCriterion(threshold=criterion), + custom_function_path=custom_function_path, ) ) elif isinstance(criterion, BaseCriterion): @@ -120,6 +165,7 @@ def get_eval_metrics_from_config(eval_config: EvalConfig) -> list[EvalMetric]: metric_name=metric_name, threshold=criterion.threshold, criterion=criterion, + custom_function_path=custom_function_path, ) ) else: diff --git a/src/google/adk/evaluation/eval_metrics.py b/src/google/adk/evaluation/eval_metrics.py index f81059fb9d..3047922c3f 100644 --- a/src/google/adk/evaluation/eval_metrics.py +++ b/src/google/adk/evaluation/eval_metrics.py @@ -279,6 +279,11 @@ class EvalMetric(EvalBaseModel): default=None, description="""Evaluation criterion used by the metric.""" ) + custom_function_path: Optional[str] = Field( + default=None, + description="""Path to custom function, if this is a custom metric.""", + ) + class EvalMetricResultDetails(EvalBaseModel): rubric_scores: Optional[list[RubricScore]] = Field( diff --git a/tests/unittests/evaluation/test_eval_config.py b/tests/unittests/evaluation/test_eval_config.py index a1f9c8af0a..fd1a7938eb 100644 --- a/tests/unittests/evaluation/test_eval_config.py +++ b/tests/unittests/evaluation/test_eval_config.py @@ -20,6 +20,7 @@ from google.adk.evaluation.eval_config import get_evaluation_criteria_or_default from google.adk.evaluation.eval_rubrics import Rubric from google.adk.evaluation.eval_rubrics import RubricContent +import pytest def test_get_evaluation_criteria_or_default_returns_default(): @@ -99,6 +100,42 @@ def test_get_eval_metrics_from_config(): assert eval_metrics[3].criterion.rubrics[0] == rubric_1 +def test_get_eval_metrics_from_config_with_custom_metrics(): + eval_config = EvalConfig( + criteria={ + "custom_metric_1": 1.0, + "custom_metric_2": { + "threshold": 0.5, + }, + }, + custom_metrics={ + "custom_metric_1": {"name": "path/to/custom/metric_1"}, + "custom_metric_2": {"name": "path/to/custom/metric_2"}, + }, + ) + eval_metrics = get_eval_metrics_from_config(eval_config) + + assert len(eval_metrics) == 2 + assert eval_metrics[0].metric_name == "custom_metric_1" + assert eval_metrics[0].threshold == 1.0 + assert eval_metrics[0].criterion.threshold == 1.0 + assert eval_metrics[0].custom_function_path == "path/to/custom/metric_1" + assert eval_metrics[1].metric_name == "custom_metric_2" + assert eval_metrics[1].threshold == 0.5 + assert eval_metrics[1].criterion.threshold == 0.5 + assert eval_metrics[1].custom_function_path == "path/to/custom/metric_2" + + +def test_custom_metric_code_config_with_args_raises_error(): + with pytest.raises(ValueError): + eval_config = EvalConfig( + criteria={"custom_metric": 1.0}, + custom_metrics={ + "custom_metric": {"name": "name", "args": [{"value": 1}]} + }, + ) + + def test_get_eval_metrics_from_config_empty_criteria(): eval_config = EvalConfig(criteria={}) eval_metrics = get_eval_metrics_from_config(eval_config)