Skip to content

Commit

Permalink
Fix data filtering in Experiment.to_df() (#3478)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3478

Because the dataframe wasn't being filtered by the trial index, if the same arm appeared in multiple trials, this would simply throw away the data.

Reviewed By: mgarrard

Differential Revision: D70742871

fbshipit-source-id: 157bacd08df8ee6b5c1983de9e9eab6a05af5f5f
  • Loading branch information
saitcakmak authored and facebook-github-bot committed Mar 7, 2025
1 parent 22c32a8 commit 5857ba9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 17 deletions.
7 changes: 5 additions & 2 deletions ax/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,10 +1856,13 @@ def to_df(self, omit_empty_columns: bool = True) -> pd.DataFrame:
for metric in self.metrics.keys():
try:
observed_means[metric] = data_df[
(data_df["arm_name"] == arm.name)
(data_df["trial_index"] == index)
& (data_df["arm_name"] == arm.name)
& (data_df["metric_name"] == metric)
]["mean"].item()
except ValueError:
except (ValueError, KeyError):
# ValueError if there is no row for the (trial, arm, metric).
# KeyError if the df is empty and missing one of the columns.
observed_means[metric] = None

# Find the arm's associated generation method from the trial via the
Expand Down
25 changes: 10 additions & 15 deletions ax/core/tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1326,24 +1326,19 @@ def test_to_df(self) -> None:
experiment = get_experiment_with_observations(
observations=[[1.0, 2.0], [3.0, 4.0]]
)
experiment.new_trial(generator_run=experiment.trials[0].generator_runs[0])
df = experiment.to_df()
xs = [
experiment.trials[0].arms[0].parameters["x"],
experiment.trials[1].arms[0].parameters["x"],
]
ys = [
experiment.trials[0].arms[0].parameters["y"],
experiment.trials[1].arms[0].parameters["y"],
]
xs = [experiment.trials[i].arms[0].parameters["x"] for i in range(3)]
ys = [experiment.trials[i].arms[0].parameters["y"] for i in range(3)]
expected_df = pd.DataFrame.from_dict(
{
"trial_index": [0, 1],
"arm_name": ["0_0", "1_0"],
"trial_status": ["COMPLETED", "COMPLETED"],
"generation_method": ["Sobol", "Sobol"],
"name": ["0", "1"], # the metadata
"m1": [1.0, 3.0],
"m2": [2.0, 4.0],
"trial_index": [0, 1, 2],
"arm_name": ["0_0", "1_0", "0_0"],
"trial_status": ["COMPLETED", "COMPLETED", "CANDIDATE"],
"generation_method": ["Sobol", "Sobol", "Sobol"],
"name": ["0", "1", None], # the metadata
"m1": [1.0, 3.0, None],
"m2": [2.0, 4.0, None],
"x": xs,
"y": ys,
}
Expand Down

0 comments on commit 5857ba9

Please sign in to comment.