From ea2bcdf217475734deee4460d09957da658a7a2d Mon Sep 17 00:00:00 2001 From: Weichen Li Date: Fri, 22 Aug 2025 22:41:29 -0500 Subject: [PATCH] fix metric calculation caused by mismatched pair comparisons --- scripts/benchmark_eval_analysis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/benchmark_eval_analysis.py b/scripts/benchmark_eval_analysis.py index fe2d220c..d40170ee 100644 --- a/scripts/benchmark_eval_analysis.py +++ b/scripts/benchmark_eval_analysis.py @@ -47,6 +47,7 @@ def patch(eval_results, dataset): "runtime": -1.0, "runtime_stats": {} } + eval_results = dict(sorted(eval_results.items(), key=lambda x: int(x[0]))) return eval_results def analyze_greedy_eval(run_name, hardware, baseline, level):