From 5857ba997b38b834e8100a7c9ceadf81d042fc47 Mon Sep 17 00:00:00 2001 From: Sait Cakmak Date: Fri, 7 Mar 2025 10:47:16 -0800 Subject: [PATCH] Fix data filtering in Experiment.to_df() (#3478) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/3478 Because the dataframe wasn't being filtered by the trial index, if the same arm appeared in multiple trials, this would simply throw away the data. Reviewed By: mgarrard Differential Revision: D70742871 fbshipit-source-id: 157bacd08df8ee6b5c1983de9e9eab6a05af5f5f --- ax/core/experiment.py | 7 +++++-- ax/core/tests/test_experiment.py | 25 ++++++++++--------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/ax/core/experiment.py b/ax/core/experiment.py index f0279b1901b..f7898722f99 100644 --- a/ax/core/experiment.py +++ b/ax/core/experiment.py @@ -1856,10 +1856,13 @@ def to_df(self, omit_empty_columns: bool = True) -> pd.DataFrame: for metric in self.metrics.keys(): try: observed_means[metric] = data_df[ - (data_df["arm_name"] == arm.name) + (data_df["trial_index"] == index) + & (data_df["arm_name"] == arm.name) & (data_df["metric_name"] == metric) ]["mean"].item() - except ValueError: + except (ValueError, KeyError): + # ValueError if there is no row for the (trial, arm, metric). + # KeyError if the df is empty and missing one of the columns. observed_means[metric] = None # Find the arm's associated generation method from the trial via the diff --git a/ax/core/tests/test_experiment.py b/ax/core/tests/test_experiment.py index 6abdfa4817d..3ba5bf499a6 100644 --- a/ax/core/tests/test_experiment.py +++ b/ax/core/tests/test_experiment.py @@ -1326,24 +1326,19 @@ def test_to_df(self) -> None: experiment = get_experiment_with_observations( observations=[[1.0, 2.0], [3.0, 4.0]] ) + experiment.new_trial(generator_run=experiment.trials[0].generator_runs[0]) df = experiment.to_df() - xs = [ - experiment.trials[0].arms[0].parameters["x"], - experiment.trials[1].arms[0].parameters["x"], - ] - ys = [ - experiment.trials[0].arms[0].parameters["y"], - experiment.trials[1].arms[0].parameters["y"], - ] + xs = [experiment.trials[i].arms[0].parameters["x"] for i in range(3)] + ys = [experiment.trials[i].arms[0].parameters["y"] for i in range(3)] expected_df = pd.DataFrame.from_dict( { - "trial_index": [0, 1], - "arm_name": ["0_0", "1_0"], - "trial_status": ["COMPLETED", "COMPLETED"], - "generation_method": ["Sobol", "Sobol"], - "name": ["0", "1"], # the metadata - "m1": [1.0, 3.0], - "m2": [2.0, 4.0], + "trial_index": [0, 1, 2], + "arm_name": ["0_0", "1_0", "0_0"], + "trial_status": ["COMPLETED", "COMPLETED", "CANDIDATE"], + "generation_method": ["Sobol", "Sobol", "Sobol"], + "name": ["0", "1", None], # the metadata + "m1": [1.0, 3.0, None], + "m2": [2.0, 4.0, None], "x": xs, "y": ys, }