diff --git a/fiddler_utils/testing.py b/fiddler_utils/testing.py index 47815f91..9d80171c 100644 --- a/fiddler_utils/testing.py +++ b/fiddler_utils/testing.py @@ -47,6 +47,7 @@ import time import random import string +import requests try: import fiddler as fdl @@ -519,3 +520,112 @@ def cleanup_orphaned_test_metrics(model_id: str, prefix: str = "__test_") -> int logger.info(f"Cleanup complete: deleted {deleted_count}/{len(orphaned)} metrics") return deleted_count + +def query_custom_metric( + metric: fdl.CustomMetric, + model: fdl.Model, + url: str, + token: str, + start_time: str = "", + end_time: str = "", + baseline_name: str = "PRODUCTION", +) -> Optional[Dict[str, Any]]: + """Query a custom metric from Fiddler API for a specific time range or baseline. + + Args: + metric: The custom metric object (must have .id and .name attributes) + model: The model object (must have .id attribute) + url: Base URL for Fiddler API (e.g., "https://.fiddler.ai") + token: API authentication token + start_time: Start time in format "YYYY-MM-DD HH:MM:SS" (required for PRODUCTION) + end_time: End time in format "YYYY-MM-DD HH:MM:SS" (required for PRODUCTION) + baseline_name: Name of the baseline dataset to query, or "PRODUCTION" for time range + (default: "PRODUCTION") + + Returns: + Dictionary containing the metric data, or None if the query failed or returned no data + + Raises: + None: All errors are logged and None is returned + + Example: + ```python + # Query production data for a time range + result = query_custom_metric( + metric=my_metric, + model=model, + url="https://myorg.fiddler.ai", + token=api_token, + start_time="2025-01-01 00:00:00", + end_time="2025-01-31 23:59:59" + ) + + # Query a baseline dataset + result = query_custom_metric( + metric=my_metric, + model=model, + url="https://myorg.fiddler.ai", + token=api_token, + baseline_name="baseline_v1" + ) + ``` + """ + json_request = { + "model_id": str(model.id), + "env_type": "PRODUCTION", + "metrics": [ + { + "id": str(metric.id) + } + ], + } + + # Handle baseline dataset vs production time range + if baseline_name != "PRODUCTION": + try: + dataset = fdl.Dataset.from_name(name=baseline_name, model_id=model.id) + json_request["env_id"] = str(dataset.id) + except Exception as e: + logger.error(f"Failed to get dataset '{baseline_name}': {e}") + return None + else: + # For PRODUCTION, time range is required + if not start_time or not end_time: + logger.error( + "Start time and end time are required when querying PRODUCTION dataset. " + f"Got start_time='{start_time}', end_time='{end_time}'" + ) + return None + json_request["time_range"] = { + "start_time": start_time, + "end_time": end_time, + } + + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {token}', + } + + response = requests.post( + f'{url}/v3/analytics/metrics', + headers=headers, + json=json_request + ) + + if not response or response.status_code != 200: + logger.error( + f"Failed to query metric '{metric.name}'. " + f"Status code: {response.status_code if response else 'N/A'}" + ) + if response: + logger.debug(f"Response: {response.text}") + return None + + result_data = response.json().get('data', []) + + if not result_data: + logger.error(f"No data returned for metric '{metric.name}'") + return None + + return result_data[0] + diff --git a/misc-utils/README.md b/misc-utils/README.md index e113ae82..21e8a7d8 100644 --- a/misc-utils/README.md +++ b/misc-utils/README.md @@ -583,6 +583,7 @@ These notebooks demonstrate practical solutions for common Fiddler administrativ - `validate_and_preview_metric()` - Complete validation workflow - `batch_test_metrics()` - Test multiple definitions efficiently - `cleanup_orphaned_test_metrics()` - Remove leftover test metrics +- `query_custom_metric()` - Get the value of custom metric on previously published data **Use cases:** - **Cross-model migration:** Copy custom metrics/segments between models with different schemas diff --git a/misc-utils/fql_utilities_demo.ipynb b/misc-utils/fql_utilities_demo.ipynb index 099ce4f6..3e115a3c 100644 --- a/misc-utils/fql_utilities_demo.ipynb +++ b/misc-utils/fql_utilities_demo.ipynb @@ -101,12 +101,12 @@ "from typing import Dict, Set\n", "\n", "import fiddler as fdl\n", - "\n", + "# Add parent directory to path to import fiddler_utils\n", + "sys.path.insert(0, \"..\")\n", "from fiddler_utils import fql\n", "from fiddler_utils.connection import get_or_init\n", "\n", - "# Add parent directory to path to import fiddler_utils\n", - "sys.path.insert(0, \"..\")\n", + "\n", "\n", "print(\"✓ Imports successful\")\n" ] @@ -1653,6 +1653,7 @@ " test_metric_definition,\n", " validate_and_preview_metric,\n", " validate_metric_syntax_local,\n", + " query_custom_metric,\n", ")\n", "\n", "print(\"✓ FQL testing utilities imported\")" @@ -1788,6 +1789,100 @@ " print(\"Local validation cannot test tp(), fp(), jsd(), etc.\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6.3.1 Query the Value of the FQL Custom Metric\n", + "\n", + "This step requires that data has already been published into Fiddler (whether pre-production or production data)\n", + "\n", + "Once a custom metric is created, you can query Fiddler's metric API to get the value of the custom metric and compare to expected ground truth (if available or can calculate externally for validation).\n", + "\n", + "You can iterate through many defined custom metrics (or variations of them) and compare with expected values to quickly update/refine FQL calculations against previously published pre-production or production data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " # Let's use the first metric from the list and peek its definition\n", + " # This can be any custom metric that's been created/validated for syntax above\n", + "if URL and TOKEN and model:\n", + " custom_metrics = list(fdl.CustomMetric.list(model_id=model.id))\n", + " metric = custom_metrics[0]\n", + " print(\"Metric Name: \", metric.name)\n", + " print(\"Metric Definition: \", metric.definition)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# OPTION 1: Use pre-production or baseline dataset for evaluation\n", + "baseline_name = 'bank_churn_baseline2' # Or replace with your baseline name \n", + "\n", + "# Call function to query the value of the custom metric for pre-production data\n", + "result = query_custom_metric(\n", + " metric=metric,\n", + " model=model,\n", + " url=URL,\n", + " token=TOKEN,\n", + " baseline_name=baseline_name,\n", + ")\n", + "\n", + "print(f\"Evaluating custom metric on pre-production baseline: \\\"{baseline_name}\\\"\")\n", + "print(\"Metric Name: \", metric.name)\n", + "print(\"Metric Value: \", result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# OPTION 2: Use a time range for previously published production data\n", + "from datetime import datetime, timedelta\n", + "\n", + "start_time_published_data = str(datetime.now() - timedelta(days=7))\n", + "end_time_published_data = str(datetime.now() - timedelta(days=6))\n", + "print(\"Start of Time of Published Data: \", start_time_published_data)\n", + "print(\"End of Time of Published Data: \", end_time_published_data)\n", + "\n", + "# Call function to query the value of the custom metric for time window of production data\n", + "result = query_custom_metric(\n", + " metric=metric,\n", + " model=model,\n", + " url=URL,\n", + " token=TOKEN,\n", + " start_time=start_time_published_data,\n", + " end_time=end_time_published_data,\n", + ")\n", + "\n", + "print(\"Metric Name: \", metric.name)\n", + "print(\"Metric Value: \", result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If you have an expected or ground truth value, you can compare the result to it\n", + "# and iterate through many defined custom metrics to quickly refine/update FQL calculations\n", + "\n", + "expected_value = 0.92 # Replace with your expected value\n", + "print(\"Expected Value: \", round(expected_value, 2))\n", + "print(\"Calculated Metric Value: \", round(result, 2))\n", + "print(\"Difference: \", round(abs(result - expected_value), 2))" + ] + }, { "cell_type": "markdown", "id": "cell-81",