Skip to content

Commit

Permalink
Merge pull request #235 from oqc-community/feature/lc/benchmarking_wo…
Browse files Browse the repository at this point in the history
…rkflow

Performance regression checks: workflow
  • Loading branch information
lcauser-oqc authored Nov 18, 2024
2 parents 320ea30 + 015a90c commit 11349b3
Show file tree
Hide file tree
Showing 6 changed files with 277 additions and 30 deletions.
124 changes: 124 additions & 0 deletions .github/workflows/benchmarking-regression.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
name: Performance regression tests

on:
pull_request:
branches: [main]
workflow_dispatch:

permissions:
contents: write
actions: write
pull-requests: write

run-name: Performance regression tests from ${{ github.ref }}

jobs:
benchmarking:
name: Benchmarking
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest]
python: ["3.10"]
defaults:
run:
shell: bash

steps:
- name: Set up Python 3.x
id: setup-python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}

- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true

- name: Load cached venv
id: cached-pip-wheels
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}

- uses: actions/checkout@v4
with:
ref: main

- name: Install dependancies
run: poetry install --sync

- name: Benchmarking on main
run: poetry run pytest benchmarks/run.py --benchmark-only --benchmark-save="benchmark"

- uses: actions/checkout@v4
with:
clean: false

- name: Install dependancies
run:
poetry install --sync

- name: Benchmarking on branch
run: poetry run pytest benchmarks/run.py --benchmark-only --benchmark-save="benchmark" --benchmark-compare --benchmark-compare-fail=min:50%

- name: Generate report
id: generate-report
if: always()
shell: bash
run: |
poetry run python benchmarks/generate_report.py "benchmark" ".benchmarks/summary.md"
SUMMARY=$(cat .benchmarks/summary.md)
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
{
echo 'markdown-summary<<EOF'
echo "$SUMMARY"
echo EOF
} >> "$GITHUB_OUTPUT"
- name: Update PR
if: github.event_name == 'pull_request' && always()
uses: actions/github-script@v7
env:
SUMMARY: ${{ steps.generate-report.outputs.markdown-summary }}
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const maxGitHubBodyCharacters = 65536;
const output = process.env.SUMMARY + `
*Pusher: @${{ github.actor }}, Action: \`${{ github.event_name }}\`, Workflow: \`${{ github.workflow }}\`*`;
const {data: comments} = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.number,
})
const botComment = comments.find(
comment => comment.user.id === 41898282 &&
comment.body.includes("Performance Regression Tests")
)
if (context.payload.pull_request.head.repo.full_name !== 'oqc-community/qat') {
console.log('Not attempting to write comment on fork.');
} else {
if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: output
})
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.number,
body: output
})
}
}
142 changes: 142 additions & 0 deletions benchmarks/generate_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import argparse
import json
import os
from pathlib import Path

import numpy as np
from jinja2 import Environment, FileSystemLoader, select_autoescape


def get_directory(dir):
"""
Save directory for benchmarks depends on the environment: determine this.
"""
subdir = [name for name in os.listdir(dir) if os.path.isdir(dir + name)]
return dir + subdir[0]


def round_sf(x, sf=4):
return np.round(x, -int(np.floor(np.log10(abs(x)))) + sf - 1)


def compare_tests(
warn_threshold=1.2,
fail_threshold=1.5,
improve_threshold=0.9,
benchmark_name="benchmark",
return_successes=False,
return_improvements=True,
dir=".benchmarks/",
):
"""
Generate a dictonary of tests that contains the key information for the report.
"""
# load in the two benchmarks
dir = get_directory(dir)
with open(f"{dir}/0001_{benchmark_name}.json", "r") as f:
benchmark_before = json.load(f)["benchmarks"]
with open(f"{dir}/0002_{benchmark_name}.json", "r") as f:
benchmark_after = json.load(f)["benchmarks"]

# compare the benchmarks
benchmarks = {}
for bm_after in benchmark_after:
name = bm_after["name"]
data = {
"min_after": round_sf(bm_after["stats"]["min"]),
"min_before": "-",
"rel_diff": "-",
"outcome": "success",
}
for bm_before in benchmark_before:
if bm_before["name"] == name:
data["min_before"] = round_sf(bm_before["stats"]["min"])
data["rel_diff"] = round_sf(
bm_after["stats"]["min"] / bm_before["stats"]["min"]
)
if data["rel_diff"] > fail_threshold:
data["outcome"] = "fail"
elif data["rel_diff"] > warn_threshold:
data["outcome"] = "warning"
elif data["rel_diff"] < improve_threshold:
data["outcome"] = "improvement"
break

# if not warn/fail, decide if it should be added
if not (data["outcome"] == "success" and return_successes == False) and not (
data["outcome"] == "improvement" and return_improvements == False
):
benchmarks[name] = data
return benchmarks


def create_report(
benchmarks,
input_path="benchmarks/report_template.md",
output_path=".benchmarks/summary.md",
):
"""
Create the report using the template
"""
env = Environment(loader=FileSystemLoader("."), autoescape=select_autoescape())
template = env.get_template(input_path)
summary = template.render(tests=benchmarks)
Path(output_path).write_text(summary)


def get_args():
"""
Allow arguments to be parsed to the program.
"""

parser = argparse.ArgumentParser(
prog="Performance regression report",
description="Generates a report that compares performance between two benchmarks.",
)
parser.add_argument("benchmark_name")
parser.add_argument("output_path")
parser.add_argument(
"--template",
default="benchmarks/report_template.md",
type=str,
help="Path to the template file",
)
parser.add_argument(
"--warn_threshold", default=1.2, type=float, help="Slow down needed to warn"
)
parser.add_argument(
"--fail_threshold", default=1.5, type=float, help="Slow down needed to fail"
)
parser.add_argument(
"--improvement",
default=True,
type=bool,
help="Should performance improvements be shown",
)
parser.add_argument(
"--improvement_threshold",
default=0.9,
type=float,
help="The speed-up needed to notify the improvement",
)
parser.add_argument(
"--success",
default=False,
type=bool,
help="Should successes be shown (tests that do not give a performance regression)",
)

return parser.parse_args()


if __name__ == "__main__":
args = get_args()
report = compare_tests(
args.warn_threshold,
args.fail_threshold,
args.improvement_threshold,
args.benchmark_name,
args.success,
args.improvement,
)
create_report(report, input_path=args.template, output_path=args.output_path)
8 changes: 8 additions & 0 deletions benchmarks/report_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#### Performance Regression Tests
Performance changes detected in the following benchmarks: {% if not tests %} none {% else %}
| Test | Main Exec Time (s) | PR Exec Time (s) | Slow-down | Status |
| ------- | ------ | ------ | ------ | ------ |
{% for name, test in tests.items() -%}
| {{name}} | {{ test.min_before }} | {{test.min_after}} | {{test.rel_diff}}x | {% if test.outcome == "success" %} :white_check_mark: {% elif test.outcome == "warning" %} :warning: {% elif test.outcome == "improvement" %} :rocket: {% else %} :x: {% endif %} |
{% endfor %}
{% endif %}
17 changes: 0 additions & 17 deletions benchmarks/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import time

import pytest

from benchmarks.utils.models import get_mock_live_hardware
Expand Down Expand Up @@ -57,18 +55,3 @@ def run():

benchmark(run)
assert True


@pytest.mark.benchmark(disable_gc=True, max_time=2, min_rounds=10)
def test_regression_report_warn(benchmark):
benchmark(time.sleep, 0.1)


@pytest.mark.benchmark(disable_gc=True, max_time=2, min_rounds=10)
def test_regression_report_fail(benchmark):
benchmark(time.sleep, 0.1)


@pytest.mark.benchmark(disable_gc=True, max_time=2, min_rounds=10)
def test_regression_report_improvement(benchmark):
benchmark(time.sleep, 0.1)
15 changes: 2 additions & 13 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ isort = "^5.13.2"
docplex = "^2.21.207"
pre-commit = "^3.2.0"
pytest-benchmark = "^4.0.0"
jinja2 = "^3.1.4"

[tool.poetry.group.licenses]
optional = true
Expand Down

0 comments on commit 11349b3

Please sign in to comment.