diff --git a/benchmark/src/bench_overlap_parallel.py b/benchmark/src/bench_overlap_parallel.py new file mode 100755 index 0000000..decd9c8 --- /dev/null +++ b/benchmark/src/bench_overlap_parallel.py @@ -0,0 +1,208 @@ +import json +import os +import timeit + +import bioframe as bf +import numpy as np +import pandas as pd +import pyranges as pr +import pyranges1 as pr1 +from rich import print +from rich.box import MARKDOWN +from rich.table import Table + +import polars_bio as pb + +BENCH_DATA_ROOT = os.getenv("BENCH_DATA_ROOT") + +if BENCH_DATA_ROOT is None: + raise ValueError("BENCH_DATA_ROOT is not set") + +pb.ctx.set_option("datafusion.optimizer.repartition_joins", "true") + +columns = ("contig", "pos_start", "pos_end") + +test_threads = [1, 2, 4, 8, 16] + +test_cases = [ + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/fBrain-DS14718/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/exons/*.parquet", + # "name": "1-2", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/exons/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/ex-anno/*.parquet", + # "name": "2-7", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/fBrain-DS14718/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/chainRn4/*.parquet", + # "name": "1-0", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/ex-anno/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/chainRn4/*.parquet", + # "name": "7-0", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/ex-anno/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/chainOrnAna1/*.parquet", + # "name": "7-3", + # }, + { + "df_path_1": f"{BENCH_DATA_ROOT}/chainRn4/*.parquet", + "df_path_2": f"{BENCH_DATA_ROOT}/ex-rna/*.parquet", + "name": "0-8", + }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/chainVicPac2/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/ex-rna/*.parquet", + # "name": "4-8", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/ex-anno/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/ex-rna/*.parquet", + # "name": "7-8", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/chainOrnAna1/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/chainRn4/*.parquet", + # "name": "3-0", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/chainRn4/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/chainVicPac2/*.parquet", + # "name": "0-4", + # }, + # { + # "df_path_1": f"{BENCH_DATA_ROOT}/chainRn4/*.parquet", + # "df_path_2": f"{BENCH_DATA_ROOT}/chainXenTro3Link/*.parquet", + # "name": "0-5", + # }, +] + + +# pyranges0 +def df2pr0(df): + return pr.PyRanges( + chromosomes=df.contig, + starts=df.pos_start, + ends=df.pos_end, + ) + + +# df_1_pr0 = df2pr0(df_1) +# df_2_pr0 = df2pr0(df_2) + + +### pyranges1 +def df2pr1(df): + return pr1.PyRanges( + { + "Chromosome": df.contig, + "Start": df.pos_start, + "End": df.pos_end, + } + ) + + +def polars_bio(df_path_1, df_path_2): + pb.overlap(df_path_1, df_path_2, col1=columns, col2=columns).collect().count() + + +def pyranges0(df_1_pr0, df_2_pr0): + len(df_1_pr0.join(df_2_pr0)) + + +functions = [ + pyranges0, + polars_bio, +] + + +num_repeats = 3 +num_executions = 3 + +# mkdir +# mkdir results directory if it does not exist + +os.makedirs("results", exist_ok=True) + +for t in test_cases: + results = [] + df_1 = pd.read_parquet(t["df_path_1"].replace("*.parquet", ""), engine="pyarrow") + df_2 = pd.read_parquet(t["df_path_2"].replace("*.parquet", ""), engine="pyarrow") + df_1_pr0 = df2pr0(df_1) + df_2_pr0 = df2pr0(df_2) + df_1_pr1 = df2pr1(df_1) + df_2_pr1 = df2pr1(df_2) + for p in test_threads: + pb.ctx.set_option("datafusion.execution.target_partitions", str(p)) + for func in functions: + times = None + print(f"Running {func.__name__}...") + if func == polars_bio: + times = timeit.repeat( + lambda: func(t["df_path_1"], t["df_path_2"]), + repeat=num_repeats, + number=num_executions, + ) + elif func == pyranges0 and p == 1: + times = timeit.repeat( + lambda: func(df_1_pr0, df_2_pr0), + repeat=num_repeats, + number=num_executions, + ) + else: + continue + per_run_times = [ + time / num_executions for time in times + ] # Convert to per-run times + results.append( + { + "name": f"{func.__name__}-{p}", + "min": min(per_run_times), + "max": max(per_run_times), + "mean": np.mean(per_run_times), + } + ) + + # fastest_mean = min(result["mean"] for result in results) + fastest_mean = results[1]["mean"] + for result in results: + result["speedup"] = fastest_mean / result["mean"] + + # Create Rich table + table = Table(title="Benchmark Results", box=MARKDOWN) + table.add_column("Library", justify="left", style="cyan", no_wrap=True) + table.add_column("Min (s)", justify="right", style="green") + table.add_column("Max (s)", justify="right", style="green") + table.add_column("Mean (s)", justify="right", style="green") + table.add_column("Speedup", justify="right", style="magenta") + + # Add rows to the table + for result in results: + table.add_row( + result["name"], + f"{result['min']:.6f}", + f"{result['max']:.6f}", + f"{result['mean']:.6f}", + f"{result['speedup']:.2f}x", + ) + + # Display the table + benchmark_results = { + "inputs": { + "df_1_num": len(df_1), + "df_2_num": len(df_2), + }, + "output_num": len( + bf.overlap(df_1, df_2, cols1=columns, cols2=columns, how="inner") + ), + "results": results, + } + print(t["name"]) + print(json.dumps(benchmark_results, indent=4)) + json.dump(benchmark_results, open(f"results/{t['name']}.json", "w")) + print(table) diff --git a/docs/performance.md b/docs/performance.md index e891f3f..a3a6eb4 100644 --- a/docs/performance.md +++ b/docs/performance.md @@ -378,6 +378,12 @@ ##### L-size (7-8), output: 307, 184,634 +| Library | Min (s) | Max (s) | Mean (s) | Speedup | +|------------|-----------|-----------|-----------|-----------| +| bioframe | 51.923368 | 52.840132 | 52.354141 | 0.14x | +| polars_bio | 6.604371 | 7.975253 | 7.151908 | **1.00x** | +| pyranges0 | 41.702499 | 42.557826 | 42.027393 | **0.17x** | +| pyranges1 | 73.713501 | 76.161131 | 74.770918 | 0.10x | ### Google Axion @@ -407,12 +413,74 @@ ### Parallel execution and scalability +#### Intel + +- cpu architecture: `x86_64` +- cpu name: `INTEL(R) XEON(R) PLATINUM 8581C CPU @ 2.30GHz` +- cpu cores: `16` +- memory: `118 GB` +- kernel: `#27~22.04.1-Ubuntu SMP Tue Jul 16 23:03:39 UTC 2024` +- system: `Linux` +- os-release: `Linux-6.5.0-1025-gcp-x86_64-with-glibc2.35` +- python: `3.12.8` +- polars-bio: `0.3.0` + +#### 0-8 (input: 2,350,965 and 9,944,559, output: 164,196,784) + +##### Apple Silicon +| Library | Min (s) | Max (s) | Mean (s) | Speedup | +|---------------|----------|----------|----------|-----------| +| pyranges0-1 | 9.331440 | 9.399316 | 9.358115 | 0.31x | +| polars_bio-1 | 2.810053 | 3.163260 | 2.935647 | **1.00x** | +| polars_bio-2 | 1.353191 | 1.422477 | 1.376621 | 2.13x | +| polars_bio-4 | 1.020456 | 1.029563 | 1.024929 | 2.86x | +| polars_bio-8 | 0.734393 | 0.738268 | 0.735762 | **3.99x** | + + + +##### Intel +| Library | Min (s) | Max (s) | Mean (s) | Speedup | +|---------------|-----------|-----------|-----------|-----------| +| pyranges0-1 | 22.856168 | 23.086879 | 22.958235 | 0.27x | +| polars_bio-1 | 5.935124 | 6.694116 | 6.203911 | **1.00x** | +| polars_bio-2 | 3.763082 | 3.913454 | 3.815991 | 1.63x | +| polars_bio-4 | 2.331916 | 2.358274 | 2.342218 | 2.65x | +| polars_bio-8 | 1.317331 | 1.326317 | 1.322318 | **4.69x** | + + + +#### 2-5 (input: 438,694 and 50,980,975, output: 52,395,369) + +##### Apple Silicon +| Library | Min (s) | Max (s) | Mean (s) | Speedup | +|---------------|-----------|-----------|-----------|-----------| +| pyranges0-1 | 11.836572 | 12.033881 | 11.943536 | 0.41x | +| polars_bio-1 | 4.878542 | 4.944363 | 4.912092 | **1.00x** | +| polars_bio-2 | 3.109014 | 3.113733 | 3.111639 | 1.58x | +| polars_bio-4 | 1.928374 | 1.944733 | 1.935807 | 2.54x | +| polars_bio-8 | 1.319147 | 1.334540 | 1.324507 | 3.71x | +| polars_bio-16 | 0.751453 | 0.758128 | 0.754517 | **6.51x** | + + +#### 2-6 (input: 438,694 and 128,186,542, output: 116,300,901) + +| Library | Min (s) | Max (s) | Mean (s) | Speedup | +|---------------|-----------|-----------|-----------|-----------| +| pyranges0-1 | 29.674772 | 31.891295 | 30.546541 | 0.37x | +| polars_bio-1 | 11.379310 | 11.423765 | 11.399042 | **1.00x** | +| polars_bio-2 | 7.134765 | 7.209546 | 7.163538 | 1.59x | +| polars_bio-4 | 4.409859 | 4.462592 | 4.429911 | 2.57x | +| polars_bio-8 | 3.069381 | 3.080261 | 3.073801 | 3.71x | +| polars_bio-16 | 1.698058 | 1.736596 | 1.717683 | **6.64x** | + + ### Native, Pandas, Polars performance comparison ## How to run the benchmarks ```bash poetry env use python3.12 poetry update +poetry shell RUSTFLAGS="-Ctarget-cpu=native" maturin develop --release -m Cargo.toml -poetry run python benchmark/src/bench_overlap.py +python benchmark/src/bench_overlap.py ``` \ No newline at end of file