Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Benchmarks

This directory contains performance benchmarks for k-wave-python. These are not standard examples: they are intended to measure runtime and memory behavior and can become expensive as the grid size grows.

## 3D Solver Scaling Benchmark

`benchmark.py` ports MATLAB k-Wave's `benchmark.m`. It runs `kspaceFirstOrder` on a sequence of 3D grids with increasing sizes, averages runtime over repeated runs, and saves partial results after each run.

The default benchmark uses:

- heterogeneous absorbing medium
- smoothed initial pressure ball source
- binary sensor mask built from 100 Cartesian points on a sphere
- 1000 time steps
- 3 averages per grid size
- grid sizes based on MATLAB's original scale arrays, starting at `32 x 32 x 32`

By default, this can run for a long time and may stop once memory limits are reached.

## Usage

Run a small smoke benchmark:

```bash
uv run benchmarks/benchmark.py --max-cases 1 --num-averages 1 --number-time-points 20
```

Run the default CPU benchmark:

```bash
uv run benchmarks/benchmark.py
```

Run with single-precision arrays:

```bash
uv run benchmarks/benchmark.py --data-cast single
```

Run on the Python GPU backend:

```bash
uv run benchmarks/benchmark.py --device gpu
```

Choose an output file:

```bash
uv run benchmarks/benchmark.py --output-path benchmark_data.json
```

## Output

The benchmark writes a JSON file containing:

- `comp_size`: total grid points for each completed grid size
- `comp_time`: rolling average elapsed seconds for each grid size
- `options`: benchmark settings and environment metadata
- `output_path`: path to the JSON output file
- `error_reached`: whether the benchmark stopped after an exception
- `error_message`: exception message, if any
- `mem_usage`: optional process peak memory estimate when `--report-mem-usage` is set

Partial results are saved after each run so completed timings are preserved if a later grid fails.
1 change: 1 addition & 0 deletions benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Performance benchmarks ported from MATLAB k-Wave."""
181 changes: 181 additions & 0 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""
k-Wave 3D Performance Benchmark

Ported from: k-Wave/benchmark.m

Runs a sequence of 3D initial-value simulations with increasing grid sizes and
records average execution time for each grid.
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path
from time import perf_counter
from typing import Any, Callable

from benchmarks.helpers import (
BenchmarkOptions,
ChildPeakMemorySampler,
PeakMemorySampler,
build_case,
current_memory_bytes,
default_output_path,
grid_sizes,
options_payload,
rolling_average,
save_results,
store_case_result,
validate_memory_bytes,
)
from kwave.kspaceFirstOrder import kspaceFirstOrder


def run(
options: BenchmarkOptions = BenchmarkOptions(),
*,
backend: str = "python",
device: str = "cpu",
max_cases: int | None = None,
output_path: str | Path | None = None,
quiet: bool = True,
solver: Callable[..., Any] | None = None,
timer: Callable[[], float] = perf_counter,
memory_reader: Callable[[], float] = current_memory_bytes,
memory_sampling_interval: float = 0.05,
mem_sampler_factory: Callable[[], Any] | None = None,
) -> dict[str, Any]:
solver = kspaceFirstOrder if solver is None else solver
cases = grid_sizes(options)
if max_cases is not None:
if max_cases <= 0:
raise ValueError("max_cases must be positive")
cases = cases[:max_cases]

# Sampler picks itself based on backend: cpp runs the simulation in a
# subprocess, so Python-process RSS sampling is meaningless — use
# getrusage(RUSAGE_CHILDREN) instead. Override via mem_sampler_factory
# for tests.
def _default_sampler_factory() -> Any:
if backend == "cpp":
return ChildPeakMemorySampler()
return PeakMemorySampler(memory_reader, memory_sampling_interval)

sampler_factory = mem_sampler_factory or _default_sampler_factory

path = default_output_path(options) if output_path is None else Path(output_path)
result: dict[str, Any] = {
"comp_size": [],
"comp_time": [],
"options": options_payload(options, backend, device),
"output_path": str(path),
"error_reached": False,
"error_message": "",
}
if options.report_mem_usage:
# Probe early so unsupported combinations (e.g. Windows + cpp) fail
# before any output file is written.
try:
sampler_factory()
except NotImplementedError as exc:
raise ValueError(str(exc)) from exc
if backend != "cpp":
# For python backend, also probe the reader so platform-missing
# /proc or ps surfaces here rather than mid-simulation.
try:
validate_memory_bytes(memory_reader())
except Exception as exc:
raise ValueError("report_mem_usage is not supported on this platform") from exc
result["mem_usage"] = []

for case_index, (nx, ny, nz, scale) in enumerate(cases):
loop_time = 0.0
loop_mem_usage = 0.0
try:
kgrid, medium, source, sensor = build_case(options, nx, ny, nz, scale)
for loop_num in range(1, options.num_averages + 1):
if options.report_mem_usage:
with sampler_factory() as memory_sampler:
start = timer()
solver(
kgrid,
medium,
source,
sensor,
backend=backend,
device=device,
quiet=quiet,
pml_size=options.pml_size,
pml_inside=options.pml_inside,
smooth_p0=False,
)
elapsed_time = timer() - start
loop_mem_usage = rolling_average(loop_mem_usage, memory_sampler.peak_bytes, loop_num)
else:
start = timer()
solver(
kgrid,
medium,
source,
sensor,
backend=backend,
device=device,
quiet=quiet,
pml_size=options.pml_size,
pml_inside=options.pml_inside,
smooth_p0=False,
)
elapsed_time = timer() - start
loop_time = rolling_average(loop_time, elapsed_time, loop_num)
store_case_result(result, case_index, nx * ny * nz, loop_time, loop_mem_usage, options.report_mem_usage)
save_results(path, result)
except Exception as exc:
result["error_reached"] = True
result["error_message"] = str(exc)
save_results(path, result)
break

return result


def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Run the k-Wave 3D performance benchmark.")
parser.add_argument("--data-cast", choices=("off", "single"), default="off")
parser.add_argument("--backend", choices=("python", "cpp"), default="python")
parser.add_argument("--device", choices=("cpu", "gpu"), default="cpu")
parser.add_argument("--max-cases", type=int, default=None)
parser.add_argument("--num-averages", type=int, default=3)
parser.add_argument("--number-time-points", type=int, default=1000)
parser.add_argument("--output-path", type=Path, default=None)
parser.add_argument("--report-mem-usage", action="store_true")
parser.add_argument("--verbose", action="store_true")
return parser.parse_args()


def main() -> int:
args = _parse_args()
benchmark_options = BenchmarkOptions(
data_cast=args.data_cast,
num_averages=args.num_averages,
number_time_points=args.number_time_points,
report_mem_usage=args.report_mem_usage,
)
result = run(
benchmark_options,
backend=args.backend,
device=args.device,
max_cases=args.max_cases,
output_path=args.output_path,
quiet=not args.verbose,
)
print(f"Benchmark results saved to {result['output_path']}")
if result["error_reached"]:
print("Memory limit reached or error encountered, exiting benchmark. Error message:", file=sys.stderr)
print(f" {result['error_message']}", file=sys.stderr)
return 1
return 0


if __name__ == "__main__":
raise SystemExit(main())
Loading
Loading