waltsims · waltsims · May 14, 2026 · May 14, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,64 @@
+# Benchmarks
+
+This directory contains performance benchmarks for k-wave-python. These are not standard examples: they are intended to measure runtime and memory behavior and can become expensive as the grid size grows.
+
+## 3D Solver Scaling Benchmark
+
+`benchmark.py` ports MATLAB k-Wave's `benchmark.m`. It runs `kspaceFirstOrder` on a sequence of 3D grids with increasing sizes, averages runtime over repeated runs, and saves partial results after each run.
+
+The default benchmark uses:
+
+- heterogeneous absorbing medium
+- smoothed initial pressure ball source
+- binary sensor mask built from 100 Cartesian points on a sphere
+- 1000 time steps
+- 3 averages per grid size
+- grid sizes based on MATLAB's original scale arrays, starting at `32 x 32 x 32`
+
+By default, this can run for a long time and may stop once memory limits are reached.
+
+## Usage
+
+Run a small smoke benchmark:
+
+```bash
+uv run benchmarks/benchmark.py --max-cases 1 --num-averages 1 --number-time-points 20
+```
+
+Run the default CPU benchmark:
+
+```bash
+uv run benchmarks/benchmark.py
+```
+
+Run with single-precision arrays:
+
+```bash
+uv run benchmarks/benchmark.py --data-cast single
+```
+
+Run on the Python GPU backend:
+
+```bash
+uv run benchmarks/benchmark.py --device gpu
+```
+
+Choose an output file:
+
+```bash
+uv run benchmarks/benchmark.py --output-path benchmark_data.json
+```
+
+## Output
+
+The benchmark writes a JSON file containing:
+
+- `comp_size`: total grid points for each completed grid size
+- `comp_time`: rolling average elapsed seconds for each grid size
+- `options`: benchmark settings and environment metadata
+- `output_path`: path to the JSON output file
+- `error_reached`: whether the benchmark stopped after an exception
+- `error_message`: exception message, if any
+- `mem_usage`: optional process peak memory estimate when `--report-mem-usage` is set
+
+Partial results are saved after each run so completed timings are preserved if a later grid fails.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Performance benchmarks ported from MATLAB k-Wave."""
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
@@ -0,0 +1,175 @@
+"""
+k-Wave 3D Performance Benchmark
+
+Ported from: k-Wave/benchmark.m
+
+Runs a sequence of 3D initial-value simulations with increasing grid sizes and
+records average execution time for each grid.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from pathlib import Path
+from time import perf_counter
+from typing import Any, Callable
+
+from benchmarks.helpers import (
+    BenchmarkOptions,
+    ChildPeakMemorySampler,
+    PeakMemorySampler,
+    build_case,
+    current_memory_bytes,
+    default_output_path,
+    grid_sizes,
+    options_payload,
+    rolling_average,
+    save_results,
+    store_case_result,
+    validate_memory_bytes,
+)
+from kwave.kspaceFirstOrder import kspaceFirstOrder
+
+
+def run(
+    options: BenchmarkOptions = BenchmarkOptions(),
+    *,
+    backend: str = "python",
+    device: str = "cpu",
+    max_cases: int | None = None,
+    output_path: str | Path | None = None,
+    quiet: bool = True,
+    solver: Callable[..., Any] | None = None,
+    timer: Callable[[], float] = perf_counter,
+    memory_reader: Callable[[], float] = current_memory_bytes,
+    memory_sampling_interval: float = 0.05,
+) -> dict[str, Any]:
+    solver = kspaceFirstOrder if solver is None else solver
+    cases = grid_sizes(options)
+    if max_cases is not None:
+        if max_cases <= 0:
+            raise ValueError("max_cases must be positive")
+        cases = cases[:max_cases]
+
+    path = default_output_path(options) if output_path is None else Path(output_path)
+    result: dict[str, Any] = {
+        "comp_size": [],
+        "comp_time": [],
+        "options": options_payload(options, backend, device),
+        "output_path": str(path),
+        "error_reached": False,
+        "error_message": "",
+    }
+    if options.report_mem_usage:
+        # Probe early so unsupported combinations (e.g. Windows + cpp) fail
+        # before any output file is written. cpp probes the sampler class
+        # (Windows refusal happens in __init__); python probes the reader
+        # so platform-missing /proc or ps surfaces here, not mid-simulation.
+        try:
+            if backend == "cpp":
+                ChildPeakMemorySampler()
+            else:
+                validate_memory_bytes(memory_reader())
+        except NotImplementedError as exc:
+            raise ValueError(str(exc)) from exc
+        except Exception as exc:
+            raise ValueError("report_mem_usage is not supported on this platform") from exc
+        result["mem_usage"] = []
+
+    for case_index, (nx, ny, nz, scale) in enumerate(cases):
+        loop_time = 0.0
+        loop_mem_usage = 0.0
+        try:
+            kgrid, medium, source, sensor = build_case(options, nx, ny, nz, scale)
+            # cpp backend: ChildPeakMemorySampler's baseline must be captured
+            # once per grid (ru_maxrss is cumulative — re-baselining per
+            # iteration would silently zero the delta). Python backend's
+            # PeakMemorySampler is per-iteration by design.
+            grid_sampler = ChildPeakMemorySampler() if (options.report_mem_usage and backend == "cpp") else None
+            for loop_num in range(1, options.num_averages + 1):
+                if options.report_mem_usage:
+                    memory_cm = grid_sampler if grid_sampler is not None else PeakMemorySampler(memory_reader, memory_sampling_interval)
+                    with memory_cm as memory_sampler:
+                        start = timer()
+                        solver(
+                            kgrid,
+                            medium,
+                            source,
+                            sensor,
+                            backend=backend,
+                            device=device,
+                            quiet=quiet,
+                            pml_size=options.pml_size,
+                            pml_inside=options.pml_inside,
+                            smooth_p0=False,
+                        )
+                        elapsed_time = timer() - start
+                    loop_mem_usage = rolling_average(loop_mem_usage, memory_sampler.peak_bytes, loop_num)
+                else:
+                    start = timer()
+                    solver(
+                        kgrid,
+                        medium,
+                        source,
+                        sensor,
+                        backend=backend,
+                        device=device,
+                        quiet=quiet,
+                        pml_size=options.pml_size,
+                        pml_inside=options.pml_inside,
+                        smooth_p0=False,
+                    )
+                    elapsed_time = timer() - start
+                loop_time = rolling_average(loop_time, elapsed_time, loop_num)
+                store_case_result(result, case_index, nx * ny * nz, loop_time, loop_mem_usage, options.report_mem_usage)
+                save_results(path, result)
+        except Exception as exc:
+            result["error_reached"] = True
+            result["error_message"] = str(exc)
+            save_results(path, result)
+            break
+
+    return result
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run the k-Wave 3D performance benchmark.")
+    parser.add_argument("--data-cast", choices=("off", "single"), default="off")
+    parser.add_argument("--backend", choices=("python", "cpp"), default="python")
+    parser.add_argument("--device", choices=("cpu", "gpu"), default="cpu")
+    parser.add_argument("--max-cases", type=int, default=None)
+    parser.add_argument("--num-averages", type=int, default=3)
+    parser.add_argument("--number-time-points", type=int, default=1000)
+    parser.add_argument("--output-path", type=Path, default=None)
+    parser.add_argument("--report-mem-usage", action="store_true")
+    parser.add_argument("--verbose", action="store_true")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = _parse_args()
+    benchmark_options = BenchmarkOptions(
+        data_cast=args.data_cast,
+        num_averages=args.num_averages,
+        number_time_points=args.number_time_points,
+        report_mem_usage=args.report_mem_usage,
+    )
+    result = run(
+        benchmark_options,
+        backend=args.backend,
+        device=args.device,
+        max_cases=args.max_cases,
+        output_path=args.output_path,
+        quiet=not args.verbose,
+    )
+    print(f"Benchmark results saved to {result['output_path']}")
+    if result["error_reached"]:
+        print("Memory limit reached or error encountered, exiting benchmark. Error message:", file=sys.stderr)
+        print(f"  {result['error_message']}", file=sys.stderr)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Performance benchmarks ported from MATLAB k-Wave."""