Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pyperf/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ def cpu_affinity(cmd):
cmd.add_argument("--table-format", type=str, default="rest",
choices=["rest", "md"],
help="Format of table rendering")
cmd.add_argument("--extra-metadata", type=str,
help="Comma-separated metadata keys to include in comparison output")

input_filenames(cmd)

# stats
Expand Down Expand Up @@ -389,6 +392,9 @@ def cmd_compare_to(args):
from pyperf._compare import compare_suites, CompareError

data = load_benchmarks(args)
if getattr(args, "extra_metadata", None):
display_title("Benchmark")
print()
if data.get_nsuite() < 2:
print("ERROR: need at least two benchmark files")
sys.exit(1)
Expand Down
85 changes: 76 additions & 9 deletions pyperf/_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_tags_for_result(result):


class CompareResult:
def __init__(self, ref, changed, min_speed=None):
def __init__(self, ref, changed, min_speed=None, extra_metadata=None):
# CompareData object
self.ref = ref
# CompareData object
Expand All @@ -63,6 +63,7 @@ def __init__(self, ref, changed, min_speed=None):
self._significant = None
self._t_score = None
self._norm_mean = None
self.extra_metadata = extra_metadata or []

def __repr__(self):
return '<CompareResult ref=%r changed=%r>' % (self.ref, self.changed)
Expand Down Expand Up @@ -110,21 +111,37 @@ def oneliner(self, verbose=True, show_name=True, check_significant=True):

ref_text = format_result_value(self.ref.benchmark)
chg_text = format_result_value(self.changed.benchmark)

if verbose:
if show_name:
ref_text = "[%s] %s" % (self.ref.name, ref_text)
chg_text = "[%s] %s" % (self.changed.name, chg_text)
if (self.ref.benchmark.get_nvalue() > 1
or self.changed.benchmark.get_nvalue() > 1):

if (self.ref.benchmark.get_nvalue() > 1
or self.changed.benchmark.get_nvalue() > 1):
text = "Mean +- std dev: %s -> %s" % (ref_text, chg_text)
else:
text = "%s -> %s" % (ref_text, chg_text)
else:
text = "%s -> %s" % (ref_text, chg_text)

# normalized mean
text = "%s: %s" % (text, format_normalized_mean(self.norm_mean))
return text

# Extra metadata support
if self.extra_metadata:
ref_meta = self.ref.benchmark.get_metadata()
chg_meta = self.changed.benchmark.get_metadata()
meta_parts = []
for key in self.extra_metadata:
if key in ref_meta:
meta_parts.append(f"{key}={ref_meta[key]}")
if key in chg_meta:
meta_parts.append(f"{key}={chg_meta[key]}")
if meta_parts:
text += " [" + ", ".join(meta_parts) + "]"
return text

def format(self, verbose=True, show_name=True):
text = self.oneliner(show_name=show_name, check_significant=False)
lines = [text]
Expand Down Expand Up @@ -225,14 +242,27 @@ class CompareError(Exception):
class CompareSuites:
def __init__(self, benchmarks, args):
self.benchmarks = benchmarks

self.extra_metadata = getattr(args, "extra_metadata", None)
if self.extra_metadata:
self.extra_metadata = [
key.strip() for key in self.extra_metadata.split(",")
]
else:
self.extra_metadata = []
self.table = args.table
self.table_format = args.table_format
self.min_speed = args.min_speed
self.group_by_speed = args.group_by_speed
self.verbose = args.verbose
self.quiet = args.quiet

# Handle extra metadata argument
self.extra_metadata = getattr(args, "extra_metadata", None)
if self.extra_metadata:
self.extra_metadata = [key.strip() for key in self.extra_metadata.split(",")]
else:
self.extra_metadata = []

grouped_by_name = self.benchmarks.group_by_name()
if not grouped_by_name:
raise CompareError("Benchmark suites have no benchmark in common")
Expand Down Expand Up @@ -262,7 +292,8 @@ def compare_benchmarks(self, name, benchmarks):

for item in benchmarks[1:]:
changed = CompareData(item.filename, item.benchmark)
result = CompareResult(ref, changed, min_speed)
result = CompareResult(ref,changed,min_speed,
extra_metadata=self.extra_metadata)
results.append(result)

return results
Expand All @@ -280,46 +311,82 @@ def sort_key(results):

self.all_results.sort(key=sort_key)

# Build Headers
# Structure: [Benchmark] [Ref Name] [Ref Meta...] [Changed Name] [Changed Meta...] ...
headers = ['Benchmark', self.all_results[0][0].ref.name]

# Add Reference Metadata Headers
for key in self.extra_metadata:
headers.append(key)

# Add Changed Metadata Headers
for item in self.all_results[0]:
headers.append(item.changed.name)
for key in self.extra_metadata:
headers.append(key)

all_norm_means = [[] for _ in range(len(headers[2:]))]
# Initialize storage for geometric mean calculation
# We assume 1 normalized mean per changed benchmark
num_changed_benchmarks = len(self.all_results[0])
all_norm_means = [[] for _ in range(num_changed_benchmarks)]

rows = []
not_significant = []
for results in all_results:
row = [results.name]

# Reference Data
ref_bench = results[0].ref.benchmark
text = ref_bench.format_value(ref_bench.mean())
row.append(text)

# Reference Metadata Values
for key in self.extra_metadata:
value = ref_bench.get_metadata().get(key, "-")
row.append(str(value))

significants = []
for index, result in enumerate(results):
bench = result.changed.benchmark
significant = result.significant

# Comparison Result
if significant:
text = format_normalized_mean(result.norm_mean)
if not self.quiet:
text = "%s: %s" % (bench.format_value(bench.mean()), text)
else:
text = "not significant"

significants.append(significant)
all_norm_means[index].append(result.norm_mean)
row.append(text)

# Changed Metadata Values
for key in self.extra_metadata:
value = bench.get_metadata().get(key, "-")
row.append(str(value))

if any(significants):
rows.append(row)
else:
not_significant.append(results.name)

# Geometric Mean Row
# only compute the geometric mean if there is at least two benchmarks
# and if at least one is signicant.
# and if at least one is significant (which means rows is not empty)
if len(all_norm_means[0]) > 1 and rows:
row = ['Geometric mean', '(ref)']

# Empty slots for Reference Metadata (to align columns)
for _ in self.extra_metadata:
row.append('')

for norm_means in all_norm_means:
row.append(format_geometric_mean(norm_means))
# Empty slots for Changed Metadata (to align columns)
for _ in self.extra_metadata:
row.append('')
rows.append(row)

if rows:
Expand Down Expand Up @@ -491,4 +558,4 @@ def timeit_compare_benchs(name1, bench1, name2, bench2, args):
print(line)
else:
line = compare.oneliner()
print(line)
print(line)
90 changes: 90 additions & 0 deletions pyperf/tests/test_extra_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import json
import os
import subprocess
import sys
import tempfile
import pyperf


def create_temp_benchmark(tmpdir, data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to reuse one of the existing benchmark file in pyperf/tests:

mult_list_py36.json
mult_list_py36_tags.json
mult_list_py37.json
mult_list_py37_tags.json
mult_list_py38.json
telco.json

import uuid
"""
Create a valid pyperf JSON benchmark file.

pyperf requires the structure:
{
"version": "1.0",
"benchmarks": [
{
"metadata": {...},
"runs": [...]
}
]
}
"""

# pyperf requires a benchmark name + unit
metadata = {
"name": "test_bench",
"unit": "second"
}
metadata.update(data.get("metadata", {}))

benchmark = {
"metadata": metadata,
"runs": data.get("runs", [])
}

suite = {
"version": "1.0",
"benchmarks": [benchmark]
}

path = os.path.join(tmpdir, f"bench_{uuid.uuid4().hex}.json")
with open(path, "w", encoding="utf-8") as f:
json.dump(suite, f)

return path


def run_command(cmd):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pyperf/tests/test_perf_cli.py already has a run_command() method.

proc = subprocess.Popen(
[sys.executable, "-m", "pyperf"] + cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
stdout, stderr = proc.communicate()
return stdout, stderr


def test_compare_to_with_extra_metadata(tmpdir):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a test to pyperf/tests/test_perf_cli.py instead?

# 1. Create benchmark files with metadata
bench1 = create_temp_benchmark(tmpdir, {
"metadata": {"os": "linux", "cpu": "amd"},
"runs": [{"values": [1.0]}]
})

bench2 = create_temp_benchmark(tmpdir, {
"metadata": {"os": "linux", "cpu": "intel"},
"runs": [{"values": [1.0]}]
})

# 2. Run compare_to
cmd = [
"compare_to",
"--extra-metadata=os,cpu",
bench1,
bench2,
]

stdout, stderr = run_command(cmd)

# 3. Assertions
assert stderr == ""
assert "os" in stdout
assert "cpu" in stdout
assert "linux" in stdout
assert "amd" in stdout
assert "intel" in stdout
assert "Benchmark" in stdout
7 changes: 7 additions & 0 deletions pyperf/tests/test_perf_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,13 @@ def test_hook(self):
assert metadata.get("_test_hook", 0) > 0
assert metadata.get("hooks", None) == "_test_hook"

def test_compare_to_extra_metadata(self):
ref_result = self.create_bench((1.0,), metadata={'name': 'bench', 'os': 'linux', 'cpu': 'amd'})
changed_result = self.create_bench((1.0,), metadata={'name': 'bench', 'os': 'linux', 'cpu': 'intel'})
stdout = self.compare('compare_to', ref_result, changed_result, '--extra-metadata=os,cpu')
self.assertIn('os=linux', stdout)
self.assertIn('cpu=amd', stdout)
self.assertIn('cpu=intel', stdout)

class TestConvert(BaseTestCase, unittest.TestCase):
def test_stdout(self):
Expand Down