From 113b5f6e09102e3c596b95604688694e13e5204d Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 3 Oct 2024 23:14:37 +0000 Subject: [PATCH 1/4] chore: add bigframes backtrace benchmark support. --- noxfile.py | 11 +++++ scripts/run_and_publish_benchmark.py | 62 +++++++++++++++++++++++----- tests/benchmark/utils.py | 16 +++++-- 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/noxfile.py b/noxfile.py index 92f8acad7f..4abd4c6040 100644 --- a/noxfile.py +++ b/noxfile.py @@ -887,9 +887,19 @@ def benchmark(session: nox.Session): "all benchmarks are run." ), ) + parser.add_argument( + "-v", + "--bigframe-version", + type=str, + default=None, + help="Specify the version of bigframes to test against.", + ) args = parser.parse_args(session.posargs) + if args.bigframe_version: + session.install(f"bigframes=={args.bigframe_version}") + benchmark_script_list: List[pathlib.Path] = [] if args.benchmark_filter: for filter_item in args.benchmark_filter: @@ -922,6 +932,7 @@ def benchmark(session: nox.Session): f"--publish-benchmarks={base_path}", f"--iterations={args.iterations}", f"--output-csv={args.output_csv}", + f"--backtrace={args.bigframe_version is not None}", ) diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py index 8b55493770..25c7bae2c4 100644 --- a/scripts/run_and_publish_benchmark.py +++ b/scripts/run_and_publish_benchmark.py @@ -26,6 +26,7 @@ import numpy as np import pandas as pd import pandas_gbq +import requests LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME" CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -245,25 +246,52 @@ def geometric_mean_excluding_zeros(data): return round(np.exp(log_data.mean()), 1) -def get_repository_status(): +def get_pypi_release_time(package_name, version): + """ + Fetch the release time of a specific version of a package from PyPI. + """ + url = f"https://pypi.org/pypi/{package_name}/{version}/json" + response = requests.get(url) + + if response.status_code == 200: + data = response.json() + release_time = data["urls"][0]["upload_time_iso_8601"] + return release_time + else: + raise ValueError( + f"Failed to retrieve package info for {package_name} version {version}" + ) + + +def get_repository_status(backtrace: bool = False): current_directory = os.getcwd() subprocess.run( ["git", "config", "--global", "--add", "safe.directory", current_directory], check=True, ) - git_hash = subprocess.check_output( - ["git", "rev-parse", "--short", "HEAD"], text=True - ).strip() - bigframes_version = subprocess.check_output( - ["python", "-c", "import bigframes; print(bigframes.__version__)"], text=True - ).strip() + with tempfile.TemporaryDirectory() as tmpdirname: + bigframes_version = subprocess.check_output( + ["python", "-c", "import bigframes; print(bigframes.__version__)"], + text=True, + cwd=tmpdirname, + ).strip() + + if backtrace: + git_hash = "benchmark_backtrace" + benchmark_start_time = get_pypi_release_time("bigframes", bigframes_version) + else: + git_hash = subprocess.check_output( + ["git", "rev-parse", "--short", "HEAD"], text=True + ).strip() + benchmark_start_time = datetime.datetime.now().isoformat() + release_version = ( f"{bigframes_version}dev{datetime.datetime.now().strftime('%Y%m%d')}+{git_hash}" ) return { - "benchmark_start_time": datetime.datetime.now().isoformat(), + "benchmark_start_time": benchmark_start_time, "git_hash": git_hash, "bigframes_version": bigframes_version, "release_version": release_version, @@ -302,14 +330,16 @@ def find_config(start_path): return None -def publish_to_bigquery(dataframe, notebook, project_name="bigframes-metrics"): +def publish_to_bigquery( + dataframe, notebook, project_name="bigframes-metrics", backtrace=False +): bigquery_table = ( f"{project_name}.benchmark_report.notebook_benchmark" if notebook else f"{project_name}.benchmark_report.benchmark" ) - repo_status = get_repository_status() + repo_status = get_repository_status(backtrace) for idx, col in enumerate(repo_status.keys()): dataframe.insert(idx, col, repo_status[col]) @@ -420,6 +450,14 @@ def parse_arguments(): help="Determines whether to output results to a CSV file. If no location is provided, a temporary location is automatically generated.", ) + parser.add_argument( + "--backtrace", + type=str, + choices=["True", "False"], + default="False", + help="Specify whether to perform backtrace benchmarking. Use 'True' to enable or 'False' to disable it.", + ) + return parser.parse_args() @@ -450,7 +488,9 @@ def main(): # The 'BENCHMARK_AND_PUBLISH' environment variable should be set to 'true' only # in specific Kokoro sessions. if os.getenv("BENCHMARK_AND_PUBLISH", "false") == "true": - publish_to_bigquery(benchmark_metrics, args.notebook) + publish_to_bigquery( + benchmark_metrics, args.notebook, backtrace=(args.backtrace == "True") + ) # If the 'GCLOUD_BENCH_PUBLISH_PROJECT' environment variable is set, publish the # benchmark metrics to a specified BigQuery table in the provided project. This is # intended for local testing where the default behavior is not to publish results. diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py index 887d54dba2..7cc5207d6b 100644 --- a/tests/benchmark/utils.py +++ b/tests/benchmark/utils.py @@ -13,6 +13,7 @@ # limitations under the License. import argparse +import inspect import time import bigframes @@ -94,9 +95,18 @@ def _str_to_bool(value): def _initialize_session(ordered: bool): - context = bigframes.BigQueryOptions( - location="US", ordering_mode="strict" if ordered else "partial" - ) + options_signature = inspect.signature(bigframes.BigQueryOptions.__init__) + if "ordering_mode" in options_signature.parameters: + context = bigframes.BigQueryOptions( + location="US", ordering_mode="strict" if ordered else "partial" + ) + # Older versions of bigframes + elif "_strictly_ordered" in options_signature.parameters: + context = bigframes.BigQueryOptions(location="US", _strictly_ordered=ordered) # type: ignore + elif not ordered: + raise ValueError("Unordered mode not supported") + else: + context = bigframes.BigQueryOptions(location="US") session = bigframes.Session(context=context) print(f"Initialized {'ordered' if ordered else 'unordered'} session.") return session From 73e9f8b26bc1adca01e6158e8f4be39647cf14ea Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Fri, 4 Oct 2024 00:44:49 +0000 Subject: [PATCH 2/4] add column of error traceback. --- scripts/run_and_publish_benchmark.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py index 25c7bae2c4..37a78ccda4 100644 --- a/scripts/run_and_publish_benchmark.py +++ b/scripts/run_and_publish_benchmark.py @@ -21,6 +21,7 @@ import subprocess import sys import tempfile +import traceback from typing import Dict, List, Tuple, Union import numpy as np @@ -74,8 +75,10 @@ def run_benchmark_subprocess(args, log_env_name_var, file_path=None, region=None if file.suffix != ".backup": print(f"Benchmark failed, deleting: {file}") file.unlink() + error_file = directory / f"{pathlib.Path(file_path).name}.error" - error_file.touch() + with error_file.open("w") as f: + f.write(traceback.format_exc()) def collect_benchmark_result( @@ -84,7 +87,7 @@ def collect_benchmark_result( """Generate a DataFrame report on HTTP queries, bytes processed, slot time and execution time from log files.""" path = pathlib.Path(benchmark_path) try: - results_dict: Dict[str, List[Union[int, float, None]]] = {} + results_dict: Dict[str, List[Union[int, float, str, None]]] = {} bytes_files = sorted(path.rglob("*.bytesprocessed")) millis_files = sorted(path.rglob("*.slotmillis")) bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds")) @@ -101,6 +104,18 @@ def collect_benchmark_result( "Mismatch in the number of report files for bytes, millis, and seconds." ) + for error_file in error_files: + filename = error_file.relative_to(path).with_suffix("") + with open(error_file, "r") as file: + results_dict[str(filename)] = [ + None, + None, + None, + None, + None, + file.read().strip(), + ] + for idx in range(len(bytes_files)): bytes_file = bytes_files[idx] millis_file = millis_files[idx] @@ -143,6 +158,7 @@ def collect_benchmark_result( total_slot_millis, local_seconds, bq_seconds, + None, ] finally: for files_to_remove in ( @@ -161,6 +177,7 @@ def collect_benchmark_result( "Slot_Millis", "Local_Execution_Time_Sec", "BigQuery_Execution_Time_Sec", + "Error", ] benchmark_metrics = pd.DataFrame.from_dict( From 587a32fd304ee6249aa1f64702055c809cf356de Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Fri, 4 Oct 2024 19:35:28 +0000 Subject: [PATCH 3/4] update comment --- scripts/run_and_publish_benchmark.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py index 37a78ccda4..20b6b0996a 100644 --- a/scripts/run_and_publish_benchmark.py +++ b/scripts/run_and_publish_benchmark.py @@ -296,6 +296,9 @@ def get_repository_status(backtrace: bool = False): if backtrace: git_hash = "benchmark_backtrace" + # We use the PyPI release time for backtrace benchmarks to align with the version's + # release date. This ensures that combining backtrace data with regular benchmark + # results won't affect time-based analysis. benchmark_start_time = get_pypi_release_time("bigframes", bigframes_version) else: git_hash = subprocess.check_output( From e150ae88457e0fd3006fa40642dbeaf5df3529f5 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 10 Oct 2024 00:31:07 +0000 Subject: [PATCH 4/4] import local third-party benchmarks. --- tests/benchmark/db_benchmark/groupby/q1.py | 4 +++- tests/benchmark/db_benchmark/groupby/q10.py | 4 +++- tests/benchmark/db_benchmark/groupby/q2.py | 4 +++- tests/benchmark/db_benchmark/groupby/q3.py | 4 +++- tests/benchmark/db_benchmark/groupby/q4.py | 4 +++- tests/benchmark/db_benchmark/groupby/q5.py | 4 +++- tests/benchmark/db_benchmark/groupby/q6.py | 4 +++- tests/benchmark/db_benchmark/groupby/q7.py | 4 +++- tests/benchmark/db_benchmark/groupby/q8.py | 4 +++- tests/benchmark/db_benchmark/join/q1.py | 5 +++-- tests/benchmark/db_benchmark/join/q2.py | 5 +++-- tests/benchmark/db_benchmark/join/q3.py | 5 +++-- tests/benchmark/db_benchmark/join/q4.py | 5 +++-- tests/benchmark/db_benchmark/join/q5.py | 5 +++-- tests/benchmark/db_benchmark/sort/q1.py | 4 +++- tests/benchmark/tpch/q1.py | 2 +- tests/benchmark/tpch/q10.py | 2 +- tests/benchmark/tpch/q11.py | 2 +- tests/benchmark/tpch/q12.py | 2 +- tests/benchmark/tpch/q13.py | 2 +- tests/benchmark/tpch/q14.py | 2 +- tests/benchmark/tpch/q15.py | 2 +- tests/benchmark/tpch/q16.py | 2 +- tests/benchmark/tpch/q17.py | 2 +- tests/benchmark/tpch/q18.py | 2 +- tests/benchmark/tpch/q19.py | 2 +- tests/benchmark/tpch/q2.py | 2 +- tests/benchmark/tpch/q20.py | 2 +- tests/benchmark/tpch/q21.py | 2 +- tests/benchmark/tpch/q22.py | 2 +- tests/benchmark/tpch/q3.py | 2 +- tests/benchmark/tpch/q4.py | 2 +- tests/benchmark/tpch/q5.py | 2 +- tests/benchmark/tpch/q6.py | 2 +- tests/benchmark/tpch/q7.py | 2 +- tests/benchmark/tpch/q8.py | 2 +- tests/benchmark/tpch/q9.py | 2 +- tests/benchmark/utils.py | 23 +++++++++++++++++++++ 38 files changed, 90 insertions(+), 42 deletions(-) diff --git a/tests/benchmark/db_benchmark/groupby/q1.py b/tests/benchmark/db_benchmark/groupby/q1.py index dc86817908..8215941171 100644 --- a/tests/benchmark/db_benchmark/groupby/q1.py +++ b/tests/benchmark/db_benchmark/groupby/q1.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q1, diff --git a/tests/benchmark/db_benchmark/groupby/q10.py b/tests/benchmark/db_benchmark/groupby/q10.py index 99d28e2f9a..cba6794f80 100644 --- a/tests/benchmark/db_benchmark/groupby/q10.py +++ b/tests/benchmark/db_benchmark/groupby/q10.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q10, diff --git a/tests/benchmark/db_benchmark/groupby/q2.py b/tests/benchmark/db_benchmark/groupby/q2.py index b06a4189fe..a4df1256eb 100644 --- a/tests/benchmark/db_benchmark/groupby/q2.py +++ b/tests/benchmark/db_benchmark/groupby/q2.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q2, diff --git a/tests/benchmark/db_benchmark/groupby/q3.py b/tests/benchmark/db_benchmark/groupby/q3.py index d66dd7b39d..2a88c24c87 100644 --- a/tests/benchmark/db_benchmark/groupby/q3.py +++ b/tests/benchmark/db_benchmark/groupby/q3.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q3, diff --git a/tests/benchmark/db_benchmark/groupby/q4.py b/tests/benchmark/db_benchmark/groupby/q4.py index 6c72069a53..883407579b 100644 --- a/tests/benchmark/db_benchmark/groupby/q4.py +++ b/tests/benchmark/db_benchmark/groupby/q4.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q4, diff --git a/tests/benchmark/db_benchmark/groupby/q5.py b/tests/benchmark/db_benchmark/groupby/q5.py index 3e6db9783e..4c26a14303 100644 --- a/tests/benchmark/db_benchmark/groupby/q5.py +++ b/tests/benchmark/db_benchmark/groupby/q5.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q5, diff --git a/tests/benchmark/db_benchmark/groupby/q6.py b/tests/benchmark/db_benchmark/groupby/q6.py index f763280b5b..e0b3416731 100644 --- a/tests/benchmark/db_benchmark/groupby/q6.py +++ b/tests/benchmark/db_benchmark/groupby/q6.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q6, diff --git a/tests/benchmark/db_benchmark/groupby/q7.py b/tests/benchmark/db_benchmark/groupby/q7.py index 4e7f2d58b6..2c27a9a9da 100644 --- a/tests/benchmark/db_benchmark/groupby/q7.py +++ b/tests/benchmark/db_benchmark/groupby/q7.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q7, diff --git a/tests/benchmark/db_benchmark/groupby/q8.py b/tests/benchmark/db_benchmark/groupby/q8.py index 75d5dcaa0c..89399b5484 100644 --- a/tests/benchmark/db_benchmark/groupby/q8.py +++ b/tests/benchmark/db_benchmark/groupby/q8.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_groupby_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.groupby_queries" + ) utils.get_execution_time( vendored_dbbenchmark_groupby_queries.q8, diff --git a/tests/benchmark/db_benchmark/join/q1.py b/tests/benchmark/db_benchmark/join/q1.py index 4ca0ee3389..275662bfa2 100644 --- a/tests/benchmark/db_benchmark/join/q1.py +++ b/tests/benchmark/db_benchmark/join/q1.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": ( @@ -25,8 +24,10 @@ session, suffix, ) = utils.get_configuration(include_table_id=True) - current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_join_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.join_queries" + ) utils.get_execution_time( vendored_dbbenchmark_join_queries.q1, diff --git a/tests/benchmark/db_benchmark/join/q2.py b/tests/benchmark/db_benchmark/join/q2.py index 19efd6fbf2..910463e3b9 100644 --- a/tests/benchmark/db_benchmark/join/q2.py +++ b/tests/benchmark/db_benchmark/join/q2.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": ( @@ -25,8 +24,10 @@ session, suffix, ) = utils.get_configuration(include_table_id=True) - current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_join_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.join_queries" + ) utils.get_execution_time( vendored_dbbenchmark_join_queries.q2, diff --git a/tests/benchmark/db_benchmark/join/q3.py b/tests/benchmark/db_benchmark/join/q3.py index d0a931bfb2..fb28fe0841 100644 --- a/tests/benchmark/db_benchmark/join/q3.py +++ b/tests/benchmark/db_benchmark/join/q3.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": ( @@ -25,8 +24,10 @@ session, suffix, ) = utils.get_configuration(include_table_id=True) - current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_join_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.join_queries" + ) utils.get_execution_time( vendored_dbbenchmark_join_queries.q3, diff --git a/tests/benchmark/db_benchmark/join/q4.py b/tests/benchmark/db_benchmark/join/q4.py index ebd7c461d0..37da03f0f9 100644 --- a/tests/benchmark/db_benchmark/join/q4.py +++ b/tests/benchmark/db_benchmark/join/q4.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": ( @@ -25,8 +24,10 @@ session, suffix, ) = utils.get_configuration(include_table_id=True) - current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_join_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.join_queries" + ) utils.get_execution_time( vendored_dbbenchmark_join_queries.q4, diff --git a/tests/benchmark/db_benchmark/join/q5.py b/tests/benchmark/db_benchmark/join/q5.py index 7114acd408..0867befee5 100644 --- a/tests/benchmark/db_benchmark/join/q5.py +++ b/tests/benchmark/db_benchmark/join/q5.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries if __name__ == "__main__": ( @@ -25,8 +24,10 @@ session, suffix, ) = utils.get_configuration(include_table_id=True) - current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_join_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.join_queries" + ) utils.get_execution_time( vendored_dbbenchmark_join_queries.q5, diff --git a/tests/benchmark/db_benchmark/sort/q1.py b/tests/benchmark/db_benchmark/sort/q1.py index 5f6c404443..772d577b7f 100644 --- a/tests/benchmark/db_benchmark/sort/q1.py +++ b/tests/benchmark/db_benchmark/sort/q1.py @@ -15,7 +15,6 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.db_benchmark.sort_queries as vendored_dbbenchmark_sort_queries if __name__ == "__main__": ( @@ -26,6 +25,9 @@ suffix, ) = utils.get_configuration(include_table_id=True) current_path = pathlib.Path(__file__).absolute() + vendored_dbbenchmark_sort_queries = utils.import_local_module( + "bigframes_vendored.db_benchmark.sort_queries" + ) utils.get_execution_time( vendored_dbbenchmark_sort_queries.q1, diff --git a/tests/benchmark/tpch/q1.py b/tests/benchmark/tpch/q1.py index a672103931..b21894cffa 100644 --- a/tests/benchmark/tpch/q1.py +++ b/tests/benchmark/tpch/q1.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q1 as vendored_tpch_q1 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q1 = utils.import_local_module("bigframes_vendored.tpch.queries.q1") utils.get_execution_time( vendored_tpch_q1.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q10.py b/tests/benchmark/tpch/q10.py index d468a90156..69fc8d0806 100644 --- a/tests/benchmark/tpch/q10.py +++ b/tests/benchmark/tpch/q10.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q10 as vendored_tpch_q10 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q10 = utils.import_local_module("bigframes_vendored.tpch.queries.q10") utils.get_execution_time( vendored_tpch_q10.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q11.py b/tests/benchmark/tpch/q11.py index dbf3fd94de..1f8eb1acdb 100644 --- a/tests/benchmark/tpch/q11.py +++ b/tests/benchmark/tpch/q11.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q11 as vendored_tpch_q11 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q11 = utils.import_local_module("bigframes_vendored.tpch.queries.q11") utils.get_execution_time( vendored_tpch_q11.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q12.py b/tests/benchmark/tpch/q12.py index 57774457ae..a2ec72ec1a 100644 --- a/tests/benchmark/tpch/q12.py +++ b/tests/benchmark/tpch/q12.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q12 as vendored_tpch_q12 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q12 = utils.import_local_module("bigframes_vendored.tpch.queries.q12") utils.get_execution_time( vendored_tpch_q12.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q13.py b/tests/benchmark/tpch/q13.py index a7f2780e4b..366ee171ac 100644 --- a/tests/benchmark/tpch/q13.py +++ b/tests/benchmark/tpch/q13.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q13 as vendored_tpch_q13 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q13 = utils.import_local_module("bigframes_vendored.tpch.queries.q13") utils.get_execution_time( vendored_tpch_q13.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q14.py b/tests/benchmark/tpch/q14.py index e9599f3bd8..94115b9a3a 100644 --- a/tests/benchmark/tpch/q14.py +++ b/tests/benchmark/tpch/q14.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q14 as vendored_tpch_q14 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q14 = utils.import_local_module("bigframes_vendored.tpch.queries.q14") utils.get_execution_time( vendored_tpch_q14.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q15.py b/tests/benchmark/tpch/q15.py index ff200384a8..907e5620d2 100644 --- a/tests/benchmark/tpch/q15.py +++ b/tests/benchmark/tpch/q15.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q15 as vendored_tpch_q15 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q15 = utils.import_local_module("bigframes_vendored.tpch.queries.q15") utils.get_execution_time( vendored_tpch_q15.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q16.py b/tests/benchmark/tpch/q16.py index 69fc1b9523..7505df1a96 100644 --- a/tests/benchmark/tpch/q16.py +++ b/tests/benchmark/tpch/q16.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q16 as vendored_tpch_q16 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q16 = utils.import_local_module("bigframes_vendored.tpch.queries.q16") utils.get_execution_time( vendored_tpch_q16.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q17.py b/tests/benchmark/tpch/q17.py index 14707f4a93..d4142e2857 100644 --- a/tests/benchmark/tpch/q17.py +++ b/tests/benchmark/tpch/q17.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q17 as vendored_tpch_q17 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q17 = utils.import_local_module("bigframes_vendored.tpch.queries.q17") utils.get_execution_time( vendored_tpch_q17.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q18.py b/tests/benchmark/tpch/q18.py index 54cf0d0432..32f9915ce4 100644 --- a/tests/benchmark/tpch/q18.py +++ b/tests/benchmark/tpch/q18.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q18 as vendored_tpch_q18 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q18 = utils.import_local_module("bigframes_vendored.tpch.queries.q18") utils.get_execution_time( vendored_tpch_q18.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q19.py b/tests/benchmark/tpch/q19.py index 1ec44391ff..9bfa6c9755 100644 --- a/tests/benchmark/tpch/q19.py +++ b/tests/benchmark/tpch/q19.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q19 as vendored_tpch_q19 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q19 = utils.import_local_module("bigframes_vendored.tpch.queries.q19") utils.get_execution_time( vendored_tpch_q19.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q2.py b/tests/benchmark/tpch/q2.py index da8064b400..fdb0160391 100644 --- a/tests/benchmark/tpch/q2.py +++ b/tests/benchmark/tpch/q2.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q2 as vendored_tpch_q2 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q2 = utils.import_local_module("bigframes_vendored.tpch.queries.q2") utils.get_execution_time( vendored_tpch_q2.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q20.py b/tests/benchmark/tpch/q20.py index 33e4f72ef6..7aee554f09 100644 --- a/tests/benchmark/tpch/q20.py +++ b/tests/benchmark/tpch/q20.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q20 as vendored_tpch_q20 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q20 = utils.import_local_module("bigframes_vendored.tpch.queries.q20") utils.get_execution_time( vendored_tpch_q20.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q21.py b/tests/benchmark/tpch/q21.py index f73f87725f..0e151bfbbc 100644 --- a/tests/benchmark/tpch/q21.py +++ b/tests/benchmark/tpch/q21.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q21 as vendored_tpch_q21 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q21 = utils.import_local_module("bigframes_vendored.tpch.queries.q21") utils.get_execution_time( vendored_tpch_q21.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q22.py b/tests/benchmark/tpch/q22.py index 0a6f6d923c..8828bfed19 100644 --- a/tests/benchmark/tpch/q22.py +++ b/tests/benchmark/tpch/q22.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q22 as vendored_tpch_q22 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q22 = utils.import_local_module("bigframes_vendored.tpch.queries.q22") utils.get_execution_time( vendored_tpch_q22.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q3.py b/tests/benchmark/tpch/q3.py index 92322eea21..361755a168 100644 --- a/tests/benchmark/tpch/q3.py +++ b/tests/benchmark/tpch/q3.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q3 as vendored_tpch_q3 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q3 = utils.import_local_module("bigframes_vendored.tpch.queries.q3") utils.get_execution_time( vendored_tpch_q3.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q4.py b/tests/benchmark/tpch/q4.py index 2d6931d6b1..6029767755 100644 --- a/tests/benchmark/tpch/q4.py +++ b/tests/benchmark/tpch/q4.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q4 as vendored_tpch_q4 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q4 = utils.import_local_module("bigframes_vendored.tpch.queries.q4") utils.get_execution_time( vendored_tpch_q4.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q5.py b/tests/benchmark/tpch/q5.py index e8fd83e193..3362101f6e 100644 --- a/tests/benchmark/tpch/q5.py +++ b/tests/benchmark/tpch/q5.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q5 as vendored_tpch_q5 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q5 = utils.import_local_module("bigframes_vendored.tpch.queries.q5") utils.get_execution_time( vendored_tpch_q5.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q6.py b/tests/benchmark/tpch/q6.py index 152d6c663e..64b9f674bd 100644 --- a/tests/benchmark/tpch/q6.py +++ b/tests/benchmark/tpch/q6.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q6 as vendored_tpch_q6 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q6 = utils.import_local_module("bigframes_vendored.tpch.queries.q6") utils.get_execution_time( vendored_tpch_q6.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q7.py b/tests/benchmark/tpch/q7.py index 1c3e455e1c..edd00df334 100644 --- a/tests/benchmark/tpch/q7.py +++ b/tests/benchmark/tpch/q7.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q7 as vendored_tpch_q7 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q7 = utils.import_local_module("bigframes_vendored.tpch.queries.q7") utils.get_execution_time( vendored_tpch_q7.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q8.py b/tests/benchmark/tpch/q8.py index 8d23194834..b6c0e9ff93 100644 --- a/tests/benchmark/tpch/q8.py +++ b/tests/benchmark/tpch/q8.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q8 as vendored_tpch_q8 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q8 = utils.import_local_module("bigframes_vendored.tpch.queries.q8") utils.get_execution_time( vendored_tpch_q8.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/tpch/q9.py b/tests/benchmark/tpch/q9.py index 329e315c2c..29de1a98af 100644 --- a/tests/benchmark/tpch/q9.py +++ b/tests/benchmark/tpch/q9.py @@ -14,11 +14,11 @@ import pathlib import benchmark.utils as utils -import bigframes_vendored.tpch.queries.q9 as vendored_tpch_q9 if __name__ == "__main__": project_id, dataset_id, session, suffix = utils.get_configuration() current_path = pathlib.Path(__file__).absolute() + vendored_tpch_q9 = utils.import_local_module("bigframes_vendored.tpch.queries.q9") utils.get_execution_time( vendored_tpch_q9.q, current_path, suffix, project_id, dataset_id, session diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py index 7cc5207d6b..f8cc25873b 100644 --- a/tests/benchmark/utils.py +++ b/tests/benchmark/utils.py @@ -13,7 +13,9 @@ # limitations under the License. import argparse +import importlib.util import inspect +import pathlib import time import bigframes @@ -85,6 +87,27 @@ def get_execution_time(func, current_path, suffix, *args, **kwargs): log_file.write(f"{runtime}\n") +def import_local_module(module_name, base_path=pathlib.Path.cwd() / "third_party"): + """ + Dynamically imports the latest benchmark scripts from a specified local directory, + allowing these scripts to be used across different versions of libraries. This setup + ensures that benchmark tests can be conducted using the most up-to-date scripts, + irrespective of the library version being tested. + """ + relative_path = pathlib.Path(*module_name.split(".")) + module_file_path = base_path / relative_path.with_suffix(".py") + spec = importlib.util.spec_from_file_location(module_name, module_file_path) + if spec is None: + raise ImportError(f"Cannot load module {module_name} from {base_path}") + + module = importlib.util.module_from_spec(spec) + + assert spec.loader is not None + spec.loader.exec_module(module) + + return module + + def _str_to_bool(value): if value == "True": return True