From 113b5f6e09102e3c596b95604688694e13e5204d Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Thu, 3 Oct 2024 23:14:37 +0000
Subject: [PATCH 1/4] chore: add bigframes backtrace benchmark support.

---
 noxfile.py                           | 11 +++++
 scripts/run_and_publish_benchmark.py | 62 +++++++++++++++++++++++-----
 tests/benchmark/utils.py             | 16 +++++--
 3 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index 92f8acad7f..4abd4c6040 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -887,9 +887,19 @@ def benchmark(session: nox.Session):
             "all benchmarks are run."
         ),
     )
+    parser.add_argument(
+        "-v",
+        "--bigframe-version",
+        type=str,
+        default=None,
+        help="Specify the version of bigframes to test against.",
+    )
 
     args = parser.parse_args(session.posargs)
 
+    if args.bigframe_version:
+        session.install(f"bigframes=={args.bigframe_version}")
+
     benchmark_script_list: List[pathlib.Path] = []
     if args.benchmark_filter:
         for filter_item in args.benchmark_filter:
@@ -922,6 +932,7 @@ def benchmark(session: nox.Session):
             f"--publish-benchmarks={base_path}",
             f"--iterations={args.iterations}",
             f"--output-csv={args.output_csv}",
+            f"--backtrace={args.bigframe_version is not None}",
         )
 
 
diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py
index 8b55493770..25c7bae2c4 100644
--- a/scripts/run_and_publish_benchmark.py
+++ b/scripts/run_and_publish_benchmark.py
@@ -26,6 +26,7 @@
 import numpy as np
 import pandas as pd
 import pandas_gbq
+import requests
 
 LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME"
 CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
@@ -245,25 +246,52 @@ def geometric_mean_excluding_zeros(data):
     return round(np.exp(log_data.mean()), 1)
 
 
-def get_repository_status():
+def get_pypi_release_time(package_name, version):
+    """
+    Fetch the release time of a specific version of a package from PyPI.
+    """
+    url = f"https://pypi.org/pypi/{package_name}/{version}/json"
+    response = requests.get(url)
+
+    if response.status_code == 200:
+        data = response.json()
+        release_time = data["urls"][0]["upload_time_iso_8601"]
+        return release_time
+    else:
+        raise ValueError(
+            f"Failed to retrieve package info for {package_name} version {version}"
+        )
+
+
+def get_repository_status(backtrace: bool = False):
     current_directory = os.getcwd()
     subprocess.run(
         ["git", "config", "--global", "--add", "safe.directory", current_directory],
         check=True,
     )
 
-    git_hash = subprocess.check_output(
-        ["git", "rev-parse", "--short", "HEAD"], text=True
-    ).strip()
-    bigframes_version = subprocess.check_output(
-        ["python", "-c", "import bigframes; print(bigframes.__version__)"], text=True
-    ).strip()
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        bigframes_version = subprocess.check_output(
+            ["python", "-c", "import bigframes; print(bigframes.__version__)"],
+            text=True,
+            cwd=tmpdirname,
+        ).strip()
+
+    if backtrace:
+        git_hash = "benchmark_backtrace"
+        benchmark_start_time = get_pypi_release_time("bigframes", bigframes_version)
+    else:
+        git_hash = subprocess.check_output(
+            ["git", "rev-parse", "--short", "HEAD"], text=True
+        ).strip()
+        benchmark_start_time = datetime.datetime.now().isoformat()
+
     release_version = (
         f"{bigframes_version}dev{datetime.datetime.now().strftime('%Y%m%d')}+{git_hash}"
     )
 
     return {
-        "benchmark_start_time": datetime.datetime.now().isoformat(),
+        "benchmark_start_time": benchmark_start_time,
         "git_hash": git_hash,
         "bigframes_version": bigframes_version,
         "release_version": release_version,
@@ -302,14 +330,16 @@ def find_config(start_path):
     return None
 
 
-def publish_to_bigquery(dataframe, notebook, project_name="bigframes-metrics"):
+def publish_to_bigquery(
+    dataframe, notebook, project_name="bigframes-metrics", backtrace=False
+):
     bigquery_table = (
         f"{project_name}.benchmark_report.notebook_benchmark"
         if notebook
         else f"{project_name}.benchmark_report.benchmark"
     )
 
-    repo_status = get_repository_status()
+    repo_status = get_repository_status(backtrace)
     for idx, col in enumerate(repo_status.keys()):
         dataframe.insert(idx, col, repo_status[col])
 
@@ -420,6 +450,14 @@ def parse_arguments():
         help="Determines whether to output results to a CSV file. If no location is provided, a temporary location is automatically generated.",
     )
 
+    parser.add_argument(
+        "--backtrace",
+        type=str,
+        choices=["True", "False"],
+        default="False",
+        help="Specify whether to perform backtrace benchmarking. Use 'True' to enable or 'False' to disable it.",
+    )
+
     return parser.parse_args()
 
 
@@ -450,7 +488,9 @@ def main():
         # The 'BENCHMARK_AND_PUBLISH' environment variable should be set to 'true' only
         # in specific Kokoro sessions.
         if os.getenv("BENCHMARK_AND_PUBLISH", "false") == "true":
-            publish_to_bigquery(benchmark_metrics, args.notebook)
+            publish_to_bigquery(
+                benchmark_metrics, args.notebook, backtrace=(args.backtrace == "True")
+            )
         # If the 'GCLOUD_BENCH_PUBLISH_PROJECT' environment variable is set, publish the
         # benchmark metrics to a specified BigQuery table in the provided project. This is
         # intended for local testing where the default behavior is not to publish results.
diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py
index 887d54dba2..7cc5207d6b 100644
--- a/tests/benchmark/utils.py
+++ b/tests/benchmark/utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import argparse
+import inspect
 import time
 
 import bigframes
@@ -94,9 +95,18 @@ def _str_to_bool(value):
 
 
 def _initialize_session(ordered: bool):
-    context = bigframes.BigQueryOptions(
-        location="US", ordering_mode="strict" if ordered else "partial"
-    )
+    options_signature = inspect.signature(bigframes.BigQueryOptions.__init__)
+    if "ordering_mode" in options_signature.parameters:
+        context = bigframes.BigQueryOptions(
+            location="US", ordering_mode="strict" if ordered else "partial"
+        )
+    # Older versions of bigframes
+    elif "_strictly_ordered" in options_signature.parameters:
+        context = bigframes.BigQueryOptions(location="US", _strictly_ordered=ordered)  # type: ignore
+    elif not ordered:
+        raise ValueError("Unordered mode not supported")
+    else:
+        context = bigframes.BigQueryOptions(location="US")
     session = bigframes.Session(context=context)
     print(f"Initialized {'ordered' if ordered else 'unordered'} session.")
     return session

From 73e9f8b26bc1adca01e6158e8f4be39647cf14ea Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 4 Oct 2024 00:44:49 +0000
Subject: [PATCH 2/4] add column of error traceback.

---
 scripts/run_and_publish_benchmark.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py
index 25c7bae2c4..37a78ccda4 100644
--- a/scripts/run_and_publish_benchmark.py
+++ b/scripts/run_and_publish_benchmark.py
@@ -21,6 +21,7 @@
 import subprocess
 import sys
 import tempfile
+import traceback
 from typing import Dict, List, Tuple, Union
 
 import numpy as np
@@ -74,8 +75,10 @@ def run_benchmark_subprocess(args, log_env_name_var, file_path=None, region=None
             if file.suffix != ".backup":
                 print(f"Benchmark failed, deleting: {file}")
                 file.unlink()
+
         error_file = directory / f"{pathlib.Path(file_path).name}.error"
-        error_file.touch()
+        with error_file.open("w") as f:
+            f.write(traceback.format_exc())
 
 
 def collect_benchmark_result(
@@ -84,7 +87,7 @@ def collect_benchmark_result(
     """Generate a DataFrame report on HTTP queries, bytes processed, slot time and execution time from log files."""
     path = pathlib.Path(benchmark_path)
     try:
-        results_dict: Dict[str, List[Union[int, float, None]]] = {}
+        results_dict: Dict[str, List[Union[int, float, str, None]]] = {}
         bytes_files = sorted(path.rglob("*.bytesprocessed"))
         millis_files = sorted(path.rglob("*.slotmillis"))
         bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds"))
@@ -101,6 +104,18 @@ def collect_benchmark_result(
                 "Mismatch in the number of report files for bytes, millis, and seconds."
             )
 
+        for error_file in error_files:
+            filename = error_file.relative_to(path).with_suffix("")
+            with open(error_file, "r") as file:
+                results_dict[str(filename)] = [
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    file.read().strip(),
+                ]
+
         for idx in range(len(bytes_files)):
             bytes_file = bytes_files[idx]
             millis_file = millis_files[idx]
@@ -143,6 +158,7 @@ def collect_benchmark_result(
                 total_slot_millis,
                 local_seconds,
                 bq_seconds,
+                None,
             ]
     finally:
         for files_to_remove in (
@@ -161,6 +177,7 @@ def collect_benchmark_result(
         "Slot_Millis",
         "Local_Execution_Time_Sec",
         "BigQuery_Execution_Time_Sec",
+        "Error",
     ]
 
     benchmark_metrics = pd.DataFrame.from_dict(

From 587a32fd304ee6249aa1f64702055c809cf356de Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 4 Oct 2024 19:35:28 +0000
Subject: [PATCH 3/4] update comment

---
 scripts/run_and_publish_benchmark.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py
index 37a78ccda4..20b6b0996a 100644
--- a/scripts/run_and_publish_benchmark.py
+++ b/scripts/run_and_publish_benchmark.py
@@ -296,6 +296,9 @@ def get_repository_status(backtrace: bool = False):
 
     if backtrace:
         git_hash = "benchmark_backtrace"
+        # We use the PyPI release time for backtrace benchmarks to align with the version's
+        # release date. This ensures that combining backtrace data with regular benchmark
+        # results won't affect time-based analysis.
         benchmark_start_time = get_pypi_release_time("bigframes", bigframes_version)
     else:
         git_hash = subprocess.check_output(

From e150ae88457e0fd3006fa40642dbeaf5df3529f5 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Thu, 10 Oct 2024 00:31:07 +0000
Subject: [PATCH 4/4] import local third-party benchmarks.

---
 tests/benchmark/db_benchmark/groupby/q1.py  |  4 +++-
 tests/benchmark/db_benchmark/groupby/q10.py |  4 +++-
 tests/benchmark/db_benchmark/groupby/q2.py  |  4 +++-
 tests/benchmark/db_benchmark/groupby/q3.py  |  4 +++-
 tests/benchmark/db_benchmark/groupby/q4.py  |  4 +++-
 tests/benchmark/db_benchmark/groupby/q5.py  |  4 +++-
 tests/benchmark/db_benchmark/groupby/q6.py  |  4 +++-
 tests/benchmark/db_benchmark/groupby/q7.py  |  4 +++-
 tests/benchmark/db_benchmark/groupby/q8.py  |  4 +++-
 tests/benchmark/db_benchmark/join/q1.py     |  5 +++--
 tests/benchmark/db_benchmark/join/q2.py     |  5 +++--
 tests/benchmark/db_benchmark/join/q3.py     |  5 +++--
 tests/benchmark/db_benchmark/join/q4.py     |  5 +++--
 tests/benchmark/db_benchmark/join/q5.py     |  5 +++--
 tests/benchmark/db_benchmark/sort/q1.py     |  4 +++-
 tests/benchmark/tpch/q1.py                  |  2 +-
 tests/benchmark/tpch/q10.py                 |  2 +-
 tests/benchmark/tpch/q11.py                 |  2 +-
 tests/benchmark/tpch/q12.py                 |  2 +-
 tests/benchmark/tpch/q13.py                 |  2 +-
 tests/benchmark/tpch/q14.py                 |  2 +-
 tests/benchmark/tpch/q15.py                 |  2 +-
 tests/benchmark/tpch/q16.py                 |  2 +-
 tests/benchmark/tpch/q17.py                 |  2 +-
 tests/benchmark/tpch/q18.py                 |  2 +-
 tests/benchmark/tpch/q19.py                 |  2 +-
 tests/benchmark/tpch/q2.py                  |  2 +-
 tests/benchmark/tpch/q20.py                 |  2 +-
 tests/benchmark/tpch/q21.py                 |  2 +-
 tests/benchmark/tpch/q22.py                 |  2 +-
 tests/benchmark/tpch/q3.py                  |  2 +-
 tests/benchmark/tpch/q4.py                  |  2 +-
 tests/benchmark/tpch/q5.py                  |  2 +-
 tests/benchmark/tpch/q6.py                  |  2 +-
 tests/benchmark/tpch/q7.py                  |  2 +-
 tests/benchmark/tpch/q8.py                  |  2 +-
 tests/benchmark/tpch/q9.py                  |  2 +-
 tests/benchmark/utils.py                    | 23 +++++++++++++++++++++
 38 files changed, 90 insertions(+), 42 deletions(-)

diff --git a/tests/benchmark/db_benchmark/groupby/q1.py b/tests/benchmark/db_benchmark/groupby/q1.py
index dc86817908..8215941171 100644
--- a/tests/benchmark/db_benchmark/groupby/q1.py
+++ b/tests/benchmark/db_benchmark/groupby/q1.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q1,
diff --git a/tests/benchmark/db_benchmark/groupby/q10.py b/tests/benchmark/db_benchmark/groupby/q10.py
index 99d28e2f9a..cba6794f80 100644
--- a/tests/benchmark/db_benchmark/groupby/q10.py
+++ b/tests/benchmark/db_benchmark/groupby/q10.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q10,
diff --git a/tests/benchmark/db_benchmark/groupby/q2.py b/tests/benchmark/db_benchmark/groupby/q2.py
index b06a4189fe..a4df1256eb 100644
--- a/tests/benchmark/db_benchmark/groupby/q2.py
+++ b/tests/benchmark/db_benchmark/groupby/q2.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q2,
diff --git a/tests/benchmark/db_benchmark/groupby/q3.py b/tests/benchmark/db_benchmark/groupby/q3.py
index d66dd7b39d..2a88c24c87 100644
--- a/tests/benchmark/db_benchmark/groupby/q3.py
+++ b/tests/benchmark/db_benchmark/groupby/q3.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q3,
diff --git a/tests/benchmark/db_benchmark/groupby/q4.py b/tests/benchmark/db_benchmark/groupby/q4.py
index 6c72069a53..883407579b 100644
--- a/tests/benchmark/db_benchmark/groupby/q4.py
+++ b/tests/benchmark/db_benchmark/groupby/q4.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q4,
diff --git a/tests/benchmark/db_benchmark/groupby/q5.py b/tests/benchmark/db_benchmark/groupby/q5.py
index 3e6db9783e..4c26a14303 100644
--- a/tests/benchmark/db_benchmark/groupby/q5.py
+++ b/tests/benchmark/db_benchmark/groupby/q5.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q5,
diff --git a/tests/benchmark/db_benchmark/groupby/q6.py b/tests/benchmark/db_benchmark/groupby/q6.py
index f763280b5b..e0b3416731 100644
--- a/tests/benchmark/db_benchmark/groupby/q6.py
+++ b/tests/benchmark/db_benchmark/groupby/q6.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q6,
diff --git a/tests/benchmark/db_benchmark/groupby/q7.py b/tests/benchmark/db_benchmark/groupby/q7.py
index 4e7f2d58b6..2c27a9a9da 100644
--- a/tests/benchmark/db_benchmark/groupby/q7.py
+++ b/tests/benchmark/db_benchmark/groupby/q7.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q7,
diff --git a/tests/benchmark/db_benchmark/groupby/q8.py b/tests/benchmark/db_benchmark/groupby/q8.py
index 75d5dcaa0c..89399b5484 100644
--- a/tests/benchmark/db_benchmark/groupby/q8.py
+++ b/tests/benchmark/db_benchmark/groupby/q8.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_groupby_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.groupby_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q8,
diff --git a/tests/benchmark/db_benchmark/join/q1.py b/tests/benchmark/db_benchmark/join/q1.py
index 4ca0ee3389..275662bfa2 100644
--- a/tests/benchmark/db_benchmark/join/q1.py
+++ b/tests/benchmark/db_benchmark/join/q1.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
     (
@@ -25,8 +24,10 @@
         session,
         suffix,
     ) = utils.get_configuration(include_table_id=True)
-
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_join_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.join_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_join_queries.q1,
diff --git a/tests/benchmark/db_benchmark/join/q2.py b/tests/benchmark/db_benchmark/join/q2.py
index 19efd6fbf2..910463e3b9 100644
--- a/tests/benchmark/db_benchmark/join/q2.py
+++ b/tests/benchmark/db_benchmark/join/q2.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
     (
@@ -25,8 +24,10 @@
         session,
         suffix,
     ) = utils.get_configuration(include_table_id=True)
-
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_join_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.join_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_join_queries.q2,
diff --git a/tests/benchmark/db_benchmark/join/q3.py b/tests/benchmark/db_benchmark/join/q3.py
index d0a931bfb2..fb28fe0841 100644
--- a/tests/benchmark/db_benchmark/join/q3.py
+++ b/tests/benchmark/db_benchmark/join/q3.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
     (
@@ -25,8 +24,10 @@
         session,
         suffix,
     ) = utils.get_configuration(include_table_id=True)
-
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_join_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.join_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_join_queries.q3,
diff --git a/tests/benchmark/db_benchmark/join/q4.py b/tests/benchmark/db_benchmark/join/q4.py
index ebd7c461d0..37da03f0f9 100644
--- a/tests/benchmark/db_benchmark/join/q4.py
+++ b/tests/benchmark/db_benchmark/join/q4.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
     (
@@ -25,8 +24,10 @@
         session,
         suffix,
     ) = utils.get_configuration(include_table_id=True)
-
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_join_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.join_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_join_queries.q4,
diff --git a/tests/benchmark/db_benchmark/join/q5.py b/tests/benchmark/db_benchmark/join/q5.py
index 7114acd408..0867befee5 100644
--- a/tests/benchmark/db_benchmark/join/q5.py
+++ b/tests/benchmark/db_benchmark/join/q5.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
     (
@@ -25,8 +24,10 @@
         session,
         suffix,
     ) = utils.get_configuration(include_table_id=True)
-
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_join_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.join_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_join_queries.q5,
diff --git a/tests/benchmark/db_benchmark/sort/q1.py b/tests/benchmark/db_benchmark/sort/q1.py
index 5f6c404443..772d577b7f 100644
--- a/tests/benchmark/db_benchmark/sort/q1.py
+++ b/tests/benchmark/db_benchmark/sort/q1.py
@@ -15,7 +15,6 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.db_benchmark.sort_queries as vendored_dbbenchmark_sort_queries
 
 if __name__ == "__main__":
     (
@@ -26,6 +25,9 @@
         suffix,
     ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
+    vendored_dbbenchmark_sort_queries = utils.import_local_module(
+        "bigframes_vendored.db_benchmark.sort_queries"
+    )
 
     utils.get_execution_time(
         vendored_dbbenchmark_sort_queries.q1,
diff --git a/tests/benchmark/tpch/q1.py b/tests/benchmark/tpch/q1.py
index a672103931..b21894cffa 100644
--- a/tests/benchmark/tpch/q1.py
+++ b/tests/benchmark/tpch/q1.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q1 as vendored_tpch_q1
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q1 = utils.import_local_module("bigframes_vendored.tpch.queries.q1")
 
     utils.get_execution_time(
         vendored_tpch_q1.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q10.py b/tests/benchmark/tpch/q10.py
index d468a90156..69fc8d0806 100644
--- a/tests/benchmark/tpch/q10.py
+++ b/tests/benchmark/tpch/q10.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q10 as vendored_tpch_q10
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q10 = utils.import_local_module("bigframes_vendored.tpch.queries.q10")
 
     utils.get_execution_time(
         vendored_tpch_q10.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q11.py b/tests/benchmark/tpch/q11.py
index dbf3fd94de..1f8eb1acdb 100644
--- a/tests/benchmark/tpch/q11.py
+++ b/tests/benchmark/tpch/q11.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q11 as vendored_tpch_q11
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q11 = utils.import_local_module("bigframes_vendored.tpch.queries.q11")
 
     utils.get_execution_time(
         vendored_tpch_q11.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q12.py b/tests/benchmark/tpch/q12.py
index 57774457ae..a2ec72ec1a 100644
--- a/tests/benchmark/tpch/q12.py
+++ b/tests/benchmark/tpch/q12.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q12 as vendored_tpch_q12
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q12 = utils.import_local_module("bigframes_vendored.tpch.queries.q12")
 
     utils.get_execution_time(
         vendored_tpch_q12.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q13.py b/tests/benchmark/tpch/q13.py
index a7f2780e4b..366ee171ac 100644
--- a/tests/benchmark/tpch/q13.py
+++ b/tests/benchmark/tpch/q13.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q13 as vendored_tpch_q13
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q13 = utils.import_local_module("bigframes_vendored.tpch.queries.q13")
 
     utils.get_execution_time(
         vendored_tpch_q13.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q14.py b/tests/benchmark/tpch/q14.py
index e9599f3bd8..94115b9a3a 100644
--- a/tests/benchmark/tpch/q14.py
+++ b/tests/benchmark/tpch/q14.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q14 as vendored_tpch_q14
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q14 = utils.import_local_module("bigframes_vendored.tpch.queries.q14")
 
     utils.get_execution_time(
         vendored_tpch_q14.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q15.py b/tests/benchmark/tpch/q15.py
index ff200384a8..907e5620d2 100644
--- a/tests/benchmark/tpch/q15.py
+++ b/tests/benchmark/tpch/q15.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q15 as vendored_tpch_q15
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q15 = utils.import_local_module("bigframes_vendored.tpch.queries.q15")
 
     utils.get_execution_time(
         vendored_tpch_q15.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q16.py b/tests/benchmark/tpch/q16.py
index 69fc1b9523..7505df1a96 100644
--- a/tests/benchmark/tpch/q16.py
+++ b/tests/benchmark/tpch/q16.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q16 as vendored_tpch_q16
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q16 = utils.import_local_module("bigframes_vendored.tpch.queries.q16")
 
     utils.get_execution_time(
         vendored_tpch_q16.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q17.py b/tests/benchmark/tpch/q17.py
index 14707f4a93..d4142e2857 100644
--- a/tests/benchmark/tpch/q17.py
+++ b/tests/benchmark/tpch/q17.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q17 as vendored_tpch_q17
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q17 = utils.import_local_module("bigframes_vendored.tpch.queries.q17")
 
     utils.get_execution_time(
         vendored_tpch_q17.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q18.py b/tests/benchmark/tpch/q18.py
index 54cf0d0432..32f9915ce4 100644
--- a/tests/benchmark/tpch/q18.py
+++ b/tests/benchmark/tpch/q18.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q18 as vendored_tpch_q18
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q18 = utils.import_local_module("bigframes_vendored.tpch.queries.q18")
 
     utils.get_execution_time(
         vendored_tpch_q18.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q19.py b/tests/benchmark/tpch/q19.py
index 1ec44391ff..9bfa6c9755 100644
--- a/tests/benchmark/tpch/q19.py
+++ b/tests/benchmark/tpch/q19.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q19 as vendored_tpch_q19
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q19 = utils.import_local_module("bigframes_vendored.tpch.queries.q19")
 
     utils.get_execution_time(
         vendored_tpch_q19.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q2.py b/tests/benchmark/tpch/q2.py
index da8064b400..fdb0160391 100644
--- a/tests/benchmark/tpch/q2.py
+++ b/tests/benchmark/tpch/q2.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q2 as vendored_tpch_q2
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q2 = utils.import_local_module("bigframes_vendored.tpch.queries.q2")
 
     utils.get_execution_time(
         vendored_tpch_q2.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q20.py b/tests/benchmark/tpch/q20.py
index 33e4f72ef6..7aee554f09 100644
--- a/tests/benchmark/tpch/q20.py
+++ b/tests/benchmark/tpch/q20.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q20 as vendored_tpch_q20
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q20 = utils.import_local_module("bigframes_vendored.tpch.queries.q20")
 
     utils.get_execution_time(
         vendored_tpch_q20.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q21.py b/tests/benchmark/tpch/q21.py
index f73f87725f..0e151bfbbc 100644
--- a/tests/benchmark/tpch/q21.py
+++ b/tests/benchmark/tpch/q21.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q21 as vendored_tpch_q21
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q21 = utils.import_local_module("bigframes_vendored.tpch.queries.q21")
 
     utils.get_execution_time(
         vendored_tpch_q21.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q22.py b/tests/benchmark/tpch/q22.py
index 0a6f6d923c..8828bfed19 100644
--- a/tests/benchmark/tpch/q22.py
+++ b/tests/benchmark/tpch/q22.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q22 as vendored_tpch_q22
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q22 = utils.import_local_module("bigframes_vendored.tpch.queries.q22")
 
     utils.get_execution_time(
         vendored_tpch_q22.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q3.py b/tests/benchmark/tpch/q3.py
index 92322eea21..361755a168 100644
--- a/tests/benchmark/tpch/q3.py
+++ b/tests/benchmark/tpch/q3.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q3 as vendored_tpch_q3
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q3 = utils.import_local_module("bigframes_vendored.tpch.queries.q3")
 
     utils.get_execution_time(
         vendored_tpch_q3.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q4.py b/tests/benchmark/tpch/q4.py
index 2d6931d6b1..6029767755 100644
--- a/tests/benchmark/tpch/q4.py
+++ b/tests/benchmark/tpch/q4.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q4 as vendored_tpch_q4
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q4 = utils.import_local_module("bigframes_vendored.tpch.queries.q4")
 
     utils.get_execution_time(
         vendored_tpch_q4.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q5.py b/tests/benchmark/tpch/q5.py
index e8fd83e193..3362101f6e 100644
--- a/tests/benchmark/tpch/q5.py
+++ b/tests/benchmark/tpch/q5.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q5 as vendored_tpch_q5
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q5 = utils.import_local_module("bigframes_vendored.tpch.queries.q5")
 
     utils.get_execution_time(
         vendored_tpch_q5.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q6.py b/tests/benchmark/tpch/q6.py
index 152d6c663e..64b9f674bd 100644
--- a/tests/benchmark/tpch/q6.py
+++ b/tests/benchmark/tpch/q6.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q6 as vendored_tpch_q6
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q6 = utils.import_local_module("bigframes_vendored.tpch.queries.q6")
 
     utils.get_execution_time(
         vendored_tpch_q6.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q7.py b/tests/benchmark/tpch/q7.py
index 1c3e455e1c..edd00df334 100644
--- a/tests/benchmark/tpch/q7.py
+++ b/tests/benchmark/tpch/q7.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q7 as vendored_tpch_q7
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q7 = utils.import_local_module("bigframes_vendored.tpch.queries.q7")
 
     utils.get_execution_time(
         vendored_tpch_q7.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q8.py b/tests/benchmark/tpch/q8.py
index 8d23194834..b6c0e9ff93 100644
--- a/tests/benchmark/tpch/q8.py
+++ b/tests/benchmark/tpch/q8.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q8 as vendored_tpch_q8
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q8 = utils.import_local_module("bigframes_vendored.tpch.queries.q8")
 
     utils.get_execution_time(
         vendored_tpch_q8.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/tpch/q9.py b/tests/benchmark/tpch/q9.py
index 329e315c2c..29de1a98af 100644
--- a/tests/benchmark/tpch/q9.py
+++ b/tests/benchmark/tpch/q9.py
@@ -14,11 +14,11 @@
 import pathlib
 
 import benchmark.utils as utils
-import bigframes_vendored.tpch.queries.q9 as vendored_tpch_q9
 
 if __name__ == "__main__":
     project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
+    vendored_tpch_q9 = utils.import_local_module("bigframes_vendored.tpch.queries.q9")
 
     utils.get_execution_time(
         vendored_tpch_q9.q, current_path, suffix, project_id, dataset_id, session
diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py
index 7cc5207d6b..f8cc25873b 100644
--- a/tests/benchmark/utils.py
+++ b/tests/benchmark/utils.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import argparse
+import importlib.util
 import inspect
+import pathlib
 import time
 
 import bigframes
@@ -85,6 +87,27 @@ def get_execution_time(func, current_path, suffix, *args, **kwargs):
         log_file.write(f"{runtime}\n")
 
 
+def import_local_module(module_name, base_path=pathlib.Path.cwd() / "third_party"):
+    """
+    Dynamically imports the latest benchmark scripts from a specified local directory,
+    allowing these scripts to be used across different versions of libraries. This setup
+    ensures that benchmark tests can be conducted using the most up-to-date scripts,
+    irrespective of the library version being tested.
+    """
+    relative_path = pathlib.Path(*module_name.split("."))
+    module_file_path = base_path / relative_path.with_suffix(".py")
+    spec = importlib.util.spec_from_file_location(module_name, module_file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load module {module_name} from {base_path}")
+
+    module = importlib.util.module_from_spec(spec)
+
+    assert spec.loader is not None
+    spec.loader.exec_module(module)
+
+    return module
+
+
 def _str_to_bool(value):
     if value == "True":
         return True