Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
python-version: "3.11"
cache: pip
- name: Install dependencies
run: python -m pip install .[test]
- name: Run lints
run: ./lint.sh
run: python -m pip install .[test] pre-commit
- name: Run pre-commit
run: pre-commit run --all-files

test:
runs-on: ubuntu-latest
Expand Down
18 changes: 18 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: e05c5c0818279e5ac248ac9e954431ba58865e61 # frozen: v0.15.7
hooks:
- id: ruff-check
args: [--exit-non-zero-on-fix]
- id: ruff-format
args: [--exit-non-zero-on-fix]

- repo: local
hooks:
- id: pyright
name: pyright
entry: python -m pyright bench_runner
language: system
pass_filenames: false
types: [python]
stages: [pre-commit]
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
## Unreleased

### Migrate to ruff and add pre-commit

`black` and `flake8` have been replaced with `ruff` (lint + format). A
`.pre-commit-config.yaml` is now included so contributors can install
pre-commit hooks to catch lint, format, and `pyright` type errors before
pushing. CI runs the same checks via `pre-commit run --all-files`.

To set up locally:

```sh
pip install pre-commit
pre-commit install
```

### Bugfixes

#### Use PGO on weekly builds
Expand Down
4 changes: 3 additions & 1 deletion bench_runner/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@

init()
except Exception:
sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.")
sys.stderr.write(
"pytest-cov: Failed to setup subprocess coverage."
)

command = len(sys.argv) >= 2 and sys.argv[1] or ""

Expand Down
25 changes: 18 additions & 7 deletions bench_runner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,16 @@ def __post_init__(self):
class Config:
bases: Bases
runners: dict[str, mrunners.Runner]
publish_mirror: PublishMirror = dataclasses.field(default_factory=PublishMirror)
publish_mirror: PublishMirror = dataclasses.field(
default_factory=PublishMirror
)
benchmarks: Benchmarks = dataclasses.field(default_factory=Benchmarks)
notify: Notify = dataclasses.field(default_factory=Notify)
longitudinal_plot: mplot.LongitudinalPlotConfig | None = None
flag_effect_plot: mplot.FlagEffectPlotConfig | None = None
benchmark_longitudinal_plot: mplot.BenchmarkLongitudinalPlotConfig | None = None
benchmark_longitudinal_plot: (
mplot.BenchmarkLongitudinalPlotConfig | None
) = None
weekly: dict[str, Weekly] = dataclasses.field(default_factory=dict)

def __post_init__(self):
Expand All @@ -82,7 +86,8 @@ def __post_init__(self):
)
self.runners = {
name: mrunners.Runner(
nickname=name, **runner # pyright: ignore[reportCallIssue]
nickname=name,
**runner, # pyright: ignore[reportCallIssue]
)
for name, runner in self.runners.items()
}
Expand All @@ -97,13 +102,19 @@ def __post_init__(self):
**self.longitudinal_plot
)
if isinstance(self.flag_effect_plot, dict):
self.flag_effect_plot = mplot.FlagEffectPlotConfig(**self.flag_effect_plot)
self.flag_effect_plot = mplot.FlagEffectPlotConfig(
**self.flag_effect_plot
)
if isinstance(self.benchmark_longitudinal_plot, dict):
self.benchmark_longitudinal_plot = mplot.BenchmarkLongitudinalPlotConfig(
**self.benchmark_longitudinal_plot
self.benchmark_longitudinal_plot = (
mplot.BenchmarkLongitudinalPlotConfig(
**self.benchmark_longitudinal_plot
)
)
if len(self.weekly) == 0:
self.weekly = {"default": Weekly(runners=list(self.runners.keys()))}
self.weekly = {
"default": Weekly(runners=list(self.runners.keys()))
}
else:
self.weekly = {
name: Weekly(**weekly) # pyright: ignore[reportCallIssue]
Expand Down
4 changes: 3 additions & 1 deletion bench_runner/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ class Flag:
def parse_flags(flag_str: str | None) -> list[str]:
if flag_str is None:
return []
flags = [flag.strip() for flag in flag_str.split(",") if flag.strip() != ""]
flags = [
flag.strip() for flag in flag_str.split(",") if flag.strip() != ""
]
internal_flags = []
for flag in flags:
if flag not in FLAG_MAPPING:
Expand Down
4 changes: 3 additions & 1 deletion bench_runner/gh.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def benchmark(
raise ValueError(f"machine must be one of {machines}")

if not (benchmark_base is None or isinstance(benchmark_base, bool)):
raise TypeError(f"benchmark_base must be bool, got {type(benchmark_base)}")
raise TypeError(
f"benchmark_base must be bool, got {type(benchmark_base)}"
)

if flags is None:
flags = []
Expand Down
15 changes: 13 additions & 2 deletions bench_runner/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@ def get_log(
n_args = [] if n < 1 else ["-n", str(n)]

return subprocess.check_output(
["git", "log", f"--pretty=format:{format}", *n_args, *ref_args, *extra],
[
"git",
"log",
f"--pretty=format:{format}",
*n_args,
*ref_args,
*extra,
],
encoding="utf-8",
cwd=dirname,
).strip()
Expand Down Expand Up @@ -144,7 +151,11 @@ def clone(

dirname = Path(dirname)
if dirname.is_dir():
if is_hash and (dirname / ".git").is_dir() and get_git_hash(dirname) == branch:
if (
is_hash
and (dirname / ".git").is_dir()
and get_git_hash(dirname) == branch
):
# This is a git repo, and the hash matches
return
util.smart_rmtree(dirname)
Expand Down
4 changes: 3 additions & 1 deletion bench_runner/hpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,8 @@ def make_report(ref: PathLike, head: PathLike, alpha=0.1):
for reli in [0.9, 0.95, 0.99]:
ret = maxspeedup(reli, better, alpha, mtx_a, mtx_b)
if ret > 0:
result.write(f"- {reli:.0%} likely to have a {effect} of {ret:.2f}x\n")
result.write(
f"- {reli:.0%} likely to have a {effect} of {ret:.2f}x\n"
)

return result.getvalue()
63 changes: 43 additions & 20 deletions bench_runner/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def plot_diff(
names.append("ALL")
axs.set_yticks(np.arange(len(names)) + 1, names)
axs.set_ylim(0, len(names) + 1)
axs.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=True)
axs.tick_params(
axis="x", bottom=True, top=True, labelbottom=True, labeltop=True
)
axs.xaxis.set_major_formatter(formatter)
xlim = axs.get_xlim()
if xlim[0] > 0.75 and xlim[1] < 1.25:
Expand Down Expand Up @@ -226,7 +228,9 @@ class Subplot:

def __post_init__(self):
if not util.valid_version(self.base):
raise RuntimeError(f"Invalid base '{self.base}' in `longitudinal_plot`")
raise RuntimeError(
f"Invalid base '{self.base}' in `longitudinal_plot`"
)
if (
not util.valid_version(self.version)
or len(self.version.split(".")) != 2
Expand All @@ -248,9 +252,9 @@ def __post_init__(self):
def longitudinal_plot(
results: Iterable[result.Result],
output_filename: PathLike,
getter: Callable[
[result.BenchmarkComparison], float | None
] = lambda r: r.geometric_mean_float,
getter: Callable[[result.BenchmarkComparison], float | None] = lambda r: (
r.geometric_mean_float
),
differences: tuple[str, str] = ("slower", "faster"),
title="Performance improvement by configuration",
):
Expand Down Expand Up @@ -297,7 +301,9 @@ def get_comparison_value(ref, r, base):
for subcfg, ax in zip(all_cfg, axs):
version = [int(x) for x in subcfg.version.split(".")]
ver_results = [
r for r in results if list(r.parsed_version.release[0:2]) == version
r
for r in results
if list(r.parsed_version.release[0:2]) == version
]
if subcfg.runners:
cfg_runners = [r for r in runners if r.nickname in subcfg.runners]
Expand Down Expand Up @@ -333,14 +339,17 @@ def get_comparison_value(ref, r, base):
continue

runner_results.sort(
key=lambda x: datetime.datetime.fromisoformat(x.commit_datetime)
key=lambda x: datetime.datetime.fromisoformat(
x.commit_datetime
)
)
dates = [
datetime.datetime.fromisoformat(x.commit_datetime)
for x in runner_results
]
changes = [
get_comparison_value(ref, r, subcfg.base) for r in runner_results
get_comparison_value(ref, r, subcfg.base)
for r in runner_results
]

if any(x is not None for x in changes):
Expand All @@ -359,7 +368,9 @@ def get_comparison_value(ref, r, base):
annotations = set()
for r, date, change in zip(runner_results, dates, changes):
micro = get_micro_version(r.version)
if micro not in annotations and not r.version.endswith("+"):
if micro not in annotations and not r.version.endswith(
"+"
):
annotations.add(micro)
text = ax.annotate(
micro,
Expand All @@ -368,7 +379,9 @@ def get_comparison_value(ref, r, base):
xytext=(-3, 15),
textcoords="offset points",
rotation=90,
arrowprops=dict(arrowstyle="-", connectionstyle="arc"),
arrowprops=dict(
arrowstyle="-", connectionstyle="arc"
),
)
text.set_color("#888")
text.set_size(8)
Expand Down Expand Up @@ -442,9 +455,9 @@ def __post_init__(self):
def flag_effect_plot(
results: Iterable[result.Result],
output_filename: PathLike,
getter: Callable[
[result.BenchmarkComparison], float | None
] = lambda r: r.geometric_mean_float,
getter: Callable[[result.BenchmarkComparison], float | None] = lambda r: (
r.geometric_mean_float
),
differences: tuple[str, str] = ("slower", "faster"),
title="Performance improvement by configuration",
):
Expand All @@ -465,7 +478,9 @@ def get_comparison_value(ref, r, force_valid):
return data[key]
else:
value = getter(
result.BenchmarkComparison(ref, r, "default", force_valid=force_valid)
result.BenchmarkComparison(
ref, r, "default", force_valid=force_valid
)
)
data[key] = value
return value
Expand Down Expand Up @@ -496,11 +511,10 @@ def get_comparison_value(ref, r, force_valid):
] = r

for subplot, ax in zip(subplots, axs):

ax.set_title(f"Effect of {subplot.name}")
version = tuple(int(x) for x in subplot.version.split("."))
assert len(version) == 2, (
"Version config in {subplot.name}" " should only be major.minor"
"Version config in {subplot.name} should only be major.minor"
)

for runner in cfg.runners.values():
Expand Down Expand Up @@ -573,7 +587,10 @@ def __post_init__(self):
raise RuntimeError(
f"Invalid base '{self.base}' in `benchmark_longitudinal_plot`"
)
if not util.valid_version(self.version) or len(self.version.split(".")) != 2:
if (
not util.valid_version(self.version)
or len(self.version.split(".")) != 2
):
raise RuntimeError(
f"Invalid version '{self.version}' in `benchmark_longitudinal_plot`"
)
Expand All @@ -599,7 +616,9 @@ def benchmark_longitudinal_plot(
else:
cache = {}

results = [r for r in results if r.fork == "python" and r.nickname in cfg.runners]
results = [
r for r in results if r.fork == "python" and r.nickname in cfg.runners
]

base = None
for r in results:
Expand Down Expand Up @@ -638,7 +657,9 @@ def benchmark_longitudinal_plot(
# Exclude any benchmarks where we don't have enough data to make a
# meaningful plot
by_benchmark = {
k: v for k, v in by_benchmark.items() if any(len(x) > 2 for x in v.values())
k: v
for k, v in by_benchmark.items()
if any(len(x) > 2 for x in v.values())
}

fig, axs = plt.subplots(
Expand All @@ -650,7 +671,9 @@ def benchmark_longitudinal_plot(
if len(by_benchmark) == 1:
axs = [axs]

plt.suptitle(f"Performance change by benchmark on {cfg.version} vs. {cfg.base}")
plt.suptitle(
f"Performance change by benchmark on {cfg.version} vs. {cfg.base}"
)

first = True
for (benchmark, runners), ax in zip(sorted(by_benchmark.items()), axs):
Expand Down
Loading
Loading