diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..9878f6a5 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,64 @@ +# Benchmarks + +This directory contains performance benchmarks for k-wave-python. These are not standard examples: they are intended to measure runtime and memory behavior and can become expensive as the grid size grows. + +## 3D Solver Scaling Benchmark + +`benchmark.py` ports MATLAB k-Wave's `benchmark.m`. It runs `kspaceFirstOrder` on a sequence of 3D grids with increasing sizes, averages runtime over repeated runs, and saves partial results after each run. + +The default benchmark uses: + +- heterogeneous absorbing medium +- smoothed initial pressure ball source +- binary sensor mask built from 100 Cartesian points on a sphere +- 1000 time steps +- 3 averages per grid size +- grid sizes based on MATLAB's original scale arrays, starting at `32 x 32 x 32` + +By default, this can run for a long time and may stop once memory limits are reached. + +## Usage + +Run a small smoke benchmark: + +```bash +uv run benchmarks/benchmark.py --max-cases 1 --num-averages 1 --number-time-points 20 +``` + +Run the default CPU benchmark: + +```bash +uv run benchmarks/benchmark.py +``` + +Run with single-precision arrays: + +```bash +uv run benchmarks/benchmark.py --data-cast single +``` + +Run on the Python GPU backend: + +```bash +uv run benchmarks/benchmark.py --device gpu +``` + +Choose an output file: + +```bash +uv run benchmarks/benchmark.py --output-path benchmark_data.json +``` + +## Output + +The benchmark writes a JSON file containing: + +- `comp_size`: total grid points for each completed grid size +- `comp_time`: rolling average elapsed seconds for each grid size +- `options`: benchmark settings and environment metadata +- `output_path`: path to the JSON output file +- `error_reached`: whether the benchmark stopped after an exception +- `error_message`: exception message, if any +- `mem_usage`: optional process peak memory estimate when `--report-mem-usage` is set + +Partial results are saved after each run so completed timings are preserved if a later grid fails. diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..522d2cce --- /dev/null +++ b/benchmarks/__init__.py @@ -0,0 +1 @@ +"""Performance benchmarks ported from MATLAB k-Wave.""" diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 00000000..7594dd5b --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,181 @@ +""" +k-Wave 3D Performance Benchmark + +Ported from: k-Wave/benchmark.m + +Runs a sequence of 3D initial-value simulations with increasing grid sizes and +records average execution time for each grid. +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from time import perf_counter +from typing import Any, Callable + +from benchmarks.helpers import ( + BenchmarkOptions, + ChildPeakMemorySampler, + PeakMemorySampler, + build_case, + current_memory_bytes, + default_output_path, + grid_sizes, + options_payload, + rolling_average, + save_results, + store_case_result, + validate_memory_bytes, +) +from kwave.kspaceFirstOrder import kspaceFirstOrder + + +def run( + options: BenchmarkOptions = BenchmarkOptions(), + *, + backend: str = "python", + device: str = "cpu", + max_cases: int | None = None, + output_path: str | Path | None = None, + quiet: bool = True, + solver: Callable[..., Any] | None = None, + timer: Callable[[], float] = perf_counter, + memory_reader: Callable[[], float] = current_memory_bytes, + memory_sampling_interval: float = 0.05, + mem_sampler_factory: Callable[[], Any] | None = None, +) -> dict[str, Any]: + solver = kspaceFirstOrder if solver is None else solver + cases = grid_sizes(options) + if max_cases is not None: + if max_cases <= 0: + raise ValueError("max_cases must be positive") + cases = cases[:max_cases] + + # Sampler picks itself based on backend: cpp runs the simulation in a + # subprocess, so Python-process RSS sampling is meaningless — use + # getrusage(RUSAGE_CHILDREN) instead. Override via mem_sampler_factory + # for tests. + def _default_sampler_factory() -> Any: + if backend == "cpp": + return ChildPeakMemorySampler() + return PeakMemorySampler(memory_reader, memory_sampling_interval) + + sampler_factory = mem_sampler_factory or _default_sampler_factory + + path = default_output_path(options) if output_path is None else Path(output_path) + result: dict[str, Any] = { + "comp_size": [], + "comp_time": [], + "options": options_payload(options, backend, device), + "output_path": str(path), + "error_reached": False, + "error_message": "", + } + if options.report_mem_usage: + # Probe early so unsupported combinations (e.g. Windows + cpp) fail + # before any output file is written. + try: + sampler_factory() + except NotImplementedError as exc: + raise ValueError(str(exc)) from exc + if backend != "cpp": + # For python backend, also probe the reader so platform-missing + # /proc or ps surfaces here rather than mid-simulation. + try: + validate_memory_bytes(memory_reader()) + except Exception as exc: + raise ValueError("report_mem_usage is not supported on this platform") from exc + result["mem_usage"] = [] + + for case_index, (nx, ny, nz, scale) in enumerate(cases): + loop_time = 0.0 + loop_mem_usage = 0.0 + try: + kgrid, medium, source, sensor = build_case(options, nx, ny, nz, scale) + for loop_num in range(1, options.num_averages + 1): + if options.report_mem_usage: + with sampler_factory() as memory_sampler: + start = timer() + solver( + kgrid, + medium, + source, + sensor, + backend=backend, + device=device, + quiet=quiet, + pml_size=options.pml_size, + pml_inside=options.pml_inside, + smooth_p0=False, + ) + elapsed_time = timer() - start + loop_mem_usage = rolling_average(loop_mem_usage, memory_sampler.peak_bytes, loop_num) + else: + start = timer() + solver( + kgrid, + medium, + source, + sensor, + backend=backend, + device=device, + quiet=quiet, + pml_size=options.pml_size, + pml_inside=options.pml_inside, + smooth_p0=False, + ) + elapsed_time = timer() - start + loop_time = rolling_average(loop_time, elapsed_time, loop_num) + store_case_result(result, case_index, nx * ny * nz, loop_time, loop_mem_usage, options.report_mem_usage) + save_results(path, result) + except Exception as exc: + result["error_reached"] = True + result["error_message"] = str(exc) + save_results(path, result) + break + + return result + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run the k-Wave 3D performance benchmark.") + parser.add_argument("--data-cast", choices=("off", "single"), default="off") + parser.add_argument("--backend", choices=("python", "cpp"), default="python") + parser.add_argument("--device", choices=("cpu", "gpu"), default="cpu") + parser.add_argument("--max-cases", type=int, default=None) + parser.add_argument("--num-averages", type=int, default=3) + parser.add_argument("--number-time-points", type=int, default=1000) + parser.add_argument("--output-path", type=Path, default=None) + parser.add_argument("--report-mem-usage", action="store_true") + parser.add_argument("--verbose", action="store_true") + return parser.parse_args() + + +def main() -> int: + args = _parse_args() + benchmark_options = BenchmarkOptions( + data_cast=args.data_cast, + num_averages=args.num_averages, + number_time_points=args.number_time_points, + report_mem_usage=args.report_mem_usage, + ) + result = run( + benchmark_options, + backend=args.backend, + device=args.device, + max_cases=args.max_cases, + output_path=args.output_path, + quiet=not args.verbose, + ) + print(f"Benchmark results saved to {result['output_path']}") + if result["error_reached"]: + print("Memory limit reached or error encountered, exiting benchmark. Error message:", file=sys.stderr) + print(f" {result['error_message']}", file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/benchmarks/helpers.py b/benchmarks/helpers.py new file mode 100644 index 00000000..9290d320 --- /dev/null +++ b/benchmarks/helpers.py @@ -0,0 +1,357 @@ +from __future__ import annotations + +import json +import math +import os +import platform +import subprocess +import threading +from dataclasses import asdict, dataclass +from datetime import datetime +from pathlib import Path +from typing import Any, Callable + +import numpy as np + +import kwave +from kwave.data import Vector +from kwave.kgrid import kWaveGrid +from kwave.kmedium import kWaveMedium +from kwave.ksensor import kSensor +from kwave.ksource import kSource +from kwave.utils.conversion import cart2grid +from kwave.utils.filters import smooth +from kwave.utils.mapgen import make_ball, make_cart_sphere + + +@dataclass(frozen=True) +class BenchmarkOptions: + data_cast: str = "off" + heterogeneous_media: bool = True + absorbing_media: bool = True + nonlinear_media: bool = False + binary_sensor_mask: bool = True + number_sensor_points: int = 100 + number_time_points: int = 1000 + num_averages: int = 3 + start_size: int = 32 + x_scale_array: tuple[int, ...] = (1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16) + y_scale_array: tuple[int, ...] = (1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16) + z_scale_array: tuple[int, ...] = (1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8) + domain_size: float = 22e-3 + sensor_radius: float = 10e-3 + pml_size: int = 10 + pml_inside: bool = True + report_mem_usage: bool = False + + def __post_init__(self) -> None: + if self.data_cast not in {"off", "single"}: + raise ValueError("data_cast must be 'off' or 'single'. Use device='gpu' to run on a GPU.") + if not (len(self.x_scale_array) == len(self.y_scale_array) == len(self.z_scale_array)): + raise ValueError("scale arrays must have the same length") + if self.number_time_points <= 0 or self.num_averages <= 0: + raise ValueError("number_time_points and num_averages must be positive") + if self.start_size <= 0: + raise ValueError("start_size must be positive") + if self.number_sensor_points <= 1: + raise ValueError("number_sensor_points must be greater than 1") + if self.domain_size <= 0: + raise ValueError("domain_size must be positive") + if self.sensor_radius <= 0: + raise ValueError("sensor_radius must be positive") + if self.pml_size < 0: + raise ValueError("pml_size must be non-negative") + + @property + def dtype(self) -> type[np.floating[Any]]: + return np.float32 if self.data_cast == "single" else np.float64 + + +def grid_sizes(options: BenchmarkOptions = BenchmarkOptions()) -> list[tuple[int, int, int, int]]: + return [ + ( + options.start_size * xscale, + options.start_size * yscale, + options.start_size * zscale, + min(xscale, yscale, zscale), + ) + for xscale, yscale, zscale in zip(options.x_scale_array, options.y_scale_array, options.z_scale_array) + ] + + +def build_case(options: BenchmarkOptions, nx: int, ny: int, nz: int, scale: int) -> tuple[kWaveGrid, kWaveMedium, kSource, kSensor]: + dtype = options.dtype + dx = options.domain_size / nx + dy = options.domain_size / ny + dz = options.domain_size / nz + kgrid = kWaveGrid(Vector([nx, ny, nz]), Vector([dx, dy, dz])) + + c0 = dtype(1500) + rho0 = dtype(1000) + alpha_coeff = dtype(0.75) + alpha_power = dtype(1.5) + + if options.heterogeneous_media: + sound_speed = c0 * np.ones((nx, ny, nz), dtype=dtype) + # MATLAB: sound_speed(1:Nx/4, :, :) → Python: [:nx//4, :, :] (head slice, no -1). + sound_speed[: nx // 4, :, :] = c0 * dtype(1.2) + density = rho0 * np.ones((nx, ny, nz), dtype=dtype) + # MATLAB: density(:, Ny/4:end, :) → Python: [:, ny//4-1:, :] (tail slice — the + # -1 converts MATLAB's 1-indexed inclusive start to Python's 0-indexed start). + # max(..., 0) guards tiny grids where ny//4 - 1 would be negative. + density[:, max(ny // 4 - 1, 0) :, :] = rho0 * dtype(1.2) + else: + sound_speed = np.array(c0, dtype=dtype) + density = np.array(rho0, dtype=dtype) + + medium = kWaveMedium(sound_speed=sound_speed, density=density) + if options.absorbing_media: + medium.alpha_coeff = alpha_coeff + medium.alpha_power = alpha_power + if options.nonlinear_media: + medium.BonA = dtype(6) + + source = kSource() + # make_ball treats the supplied center as 1-indexed internally (mapgen.py), + # so passing nx//2 (= MATLAB's Nx/2 1-indexed value) yields the same centroid. + source.p0 = dtype(10) * make_ball(Vector([nx, ny, nz]), Vector([nx // 2, ny // 2, nz // 2]), 2 * scale) + # smooth() upcasts to float64 via the FFT path even when given float32; + # the trailing .astype(dtype) restores user-requested precision. + source.p0 = smooth(source.p0.astype(dtype, copy=False), restore_max=True).astype(dtype, copy=False) + + sensor_mask = make_cart_sphere(options.sensor_radius, options.number_sensor_points) + if options.binary_sensor_mask: + sensor_mask, _, _ = cart2grid(kgrid, sensor_mask, order="C") + sensor_mask = sensor_mask.astype(bool) + sensor = kSensor(mask=sensor_mask) + sensor.record = ["p"] + + kgrid.makeTime(np.max(np.asarray(medium.sound_speed))) + kgrid.setTime(options.number_time_points, kgrid.dt) + + return kgrid, medium, source, sensor + + +def default_output_path(options: BenchmarkOptions) -> Path: + computer_name = platform.node() or "unknown-computer" + date = datetime.now().strftime("%Y%m%d-%H%M%S") + return Path(f"benchmark_data-{computer_name}-{options.data_cast}-{date}.json") + + +def rolling_average(previous_average: float, new_value: float, count: int) -> float: + return (previous_average * (count - 1) + new_value) / count + + +def store_case_result( + result: dict[str, Any], case_index: int, comp_size: int, comp_time: float, mem_usage: float, report_mem_usage: bool +) -> None: + if case_index == len(result["comp_size"]): + result["comp_size"].append(comp_size) + result["comp_time"].append(comp_time) + if report_mem_usage: + result["mem_usage"].append(mem_usage) + return + + result["comp_size"][case_index] = comp_size + result["comp_time"][case_index] = comp_time + if report_mem_usage: + result["mem_usage"][case_index] = mem_usage + + +def save_results(path: Path, result: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "comp_size": [int(size) for size in result["comp_size"]], + "comp_time": [float(time) for time in result["comp_time"]], + "options": result["options"], + "output_path": result["output_path"], + "error_reached": bool(result["error_reached"]), + "error_message": result["error_message"], + } + if "mem_usage" in result: + payload["mem_usage"] = [float(usage) for usage in result["mem_usage"]] + path.write_text(json.dumps(payload, indent=2, allow_nan=False) + "\n") + + +def options_payload(options: BenchmarkOptions, backend: str, device: str) -> dict[str, Any]: + payload = asdict(options) + payload.update( + { + "backend": backend, + "device": device, + "computer_name": platform.node(), + "python_version": platform.python_version(), + "platform": platform.platform(), + "kwave_python_version": kwave.__version__, + } + ) + return payload + + +def validate_memory_bytes(value: float) -> float: + memory_bytes = float(value) + if not math.isfinite(memory_bytes) or memory_bytes < 0: + raise ValueError("memory usage must be a finite non-negative value") + return memory_bytes + + +def _linux_current_memory_bytes() -> float: + pages = int(Path("/proc/self/statm").read_text().split()[1]) + return float(pages * os.sysconf("SC_PAGE_SIZE")) + + +def _ps_current_memory_bytes() -> float: + completed = subprocess.run( + ["ps", "-o", "rss=", "-p", str(os.getpid())], + check=True, + capture_output=True, + text=True, + ) + return float(int(completed.stdout.strip()) * 1024) + + +def _windows_current_memory_bytes() -> float: + import ctypes + from ctypes import wintypes + + class ProcessMemoryCounters(ctypes.Structure): + _fields_ = [ + ("cb", wintypes.DWORD), + ("PageFaultCount", wintypes.DWORD), + ("PeakWorkingSetSize", ctypes.c_size_t), + ("WorkingSetSize", ctypes.c_size_t), + ("QuotaPeakPagedPoolUsage", ctypes.c_size_t), + ("QuotaPagedPoolUsage", ctypes.c_size_t), + ("QuotaPeakNonPagedPoolUsage", ctypes.c_size_t), + ("QuotaNonPagedPoolUsage", ctypes.c_size_t), + ("PagefileUsage", ctypes.c_size_t), + ("PeakPagefileUsage", ctypes.c_size_t), + ] + + counters = ProcessMemoryCounters() + counters.cb = ctypes.sizeof(ProcessMemoryCounters) + handle = ctypes.windll.kernel32.GetCurrentProcess() + if not ctypes.windll.psapi.GetProcessMemoryInfo(handle, ctypes.byref(counters), counters.cb): + raise RuntimeError("could not read process memory usage") + return float(counters.WorkingSetSize) + + +def current_memory_bytes() -> float: + """Return the current resident-set-size of THIS Python process, in bytes. + + Note: this returns the *current* RSS at the moment of the call, not a + historical peak (despite ``ru_maxrss``-style fields existing on most + platforms). Use ``PeakMemorySampler`` to track peak-over-time, or + ``ChildPeakMemorySampler`` for subprocess (``backend="cpp"``) measurement. + """ + system = platform.system().lower() + if system == "linux": + try: + return validate_memory_bytes(_linux_current_memory_bytes()) + except (OSError, ValueError, IndexError): + return validate_memory_bytes(_ps_current_memory_bytes()) + if system == "darwin": + return validate_memory_bytes(_ps_current_memory_bytes()) + if system == "windows": + return validate_memory_bytes(_windows_current_memory_bytes()) + + raise RuntimeError("report_mem_usage is not supported on this platform") + + +# Back-compat alias — the old name is misleading but kept so external callers +# (and any pre-merge benchmark JSON tooling) don't break. +peak_memory_bytes = current_memory_bytes + + +class ChildPeakMemorySampler: + """Capture the peak RSS of child processes spawned during the ``with`` block. + + Use this for ``backend="cpp"`` benchmark runs where the simulation work + happens in a subprocess — ``PeakMemorySampler`` only sees the Python + parent's RSS and reports a number that has nothing to do with the C++ + process's actual footprint. + + Implementation: ``resource.getrusage(RUSAGE_CHILDREN).ru_maxrss`` is the + cumulative max across all reaped children, so we record before/after and + return the increment. On Linux ``ru_maxrss`` is reported in kilobytes; on + macOS it's in bytes (per BSD historical convention). Windows has no + equivalent in ``resource`` — the sampler raises ``NotImplementedError`` + on Windows so the caller can refuse the combination cleanly. + """ + + def __init__(self) -> None: + if platform.system().lower() == "windows": + raise NotImplementedError( + "ChildPeakMemorySampler is not supported on Windows " + "(resource.RUSAGE_CHILDREN is POSIX-only). " + "Use backend='python' to measure simulation memory on Windows." + ) + self._before = 0 + self._after = 0 + + def __enter__(self) -> ChildPeakMemorySampler: + import resource # POSIX-only; gated by the Windows check above + + self._before = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss + return self + + def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: Any) -> None: + import resource + + self._after = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss + + @property + def peak_bytes(self) -> float: + delta = max(0, self._after - self._before) + # ru_maxrss units differ by platform (Linux: KB, macOS: bytes). + if platform.system().lower() == "linux": + return float(delta * 1024) + return float(delta) + + +class PeakMemorySampler: + def __init__(self, reader: Callable[[], float] = current_memory_bytes, interval: float = 0.05): + self._reader = reader + self._interval = interval + self._peak_bytes = 0.0 + self._error: Exception | None = None + self._lock = threading.Lock() + self._stop_event = threading.Event() + self._thread: threading.Thread | None = None + + @property + def peak_bytes(self) -> float: + with self._lock: + return self._peak_bytes + + def __enter__(self) -> PeakMemorySampler: + self._record_sample() + if self._interval > 0: + self._thread = threading.Thread(target=self._sample_until_stopped, daemon=True) + self._thread.start() + return self + + def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: Any) -> None: + self._stop_event.set() + if self._thread is not None: + self._thread.join() + try: + self._record_sample() + except Exception as exc: + if exc_type is None: + raise exc + if exc_type is None and self._error is not None: + raise self._error + + def _sample_until_stopped(self) -> None: + while not self._stop_event.wait(self._interval): + try: + self._record_sample() + except Exception as exc: + self._error = exc + self._stop_event.set() + + def _record_sample(self) -> None: + memory_bytes = validate_memory_bytes(self._reader()) + with self._lock: + self._peak_bytes = max(self._peak_bytes, memory_bytes) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py new file mode 100644 index 00000000..69ce5d77 --- /dev/null +++ b/tests/test_benchmark.py @@ -0,0 +1,266 @@ +import json +from pathlib import Path +from unittest.mock import patch + +import numpy as np +import pytest + +from benchmarks.benchmark import BenchmarkOptions, build_case, grid_sizes, run +from benchmarks.helpers import ChildPeakMemorySampler, current_memory_bytes, peak_memory_bytes + + +def small_options(**overrides): + values = { + "start_size": 8, + "x_scale_array": (1,), + "y_scale_array": (1,), + "z_scale_array": (1,), + "number_sensor_points": 8, + "number_time_points": 5, + "num_averages": 1, + "sensor_radius": 1e-3, + } + values.update(overrides) + return BenchmarkOptions(**values) + + +def test_grid_sizes_match_matlab_scale_arrays(): + sizes = grid_sizes(BenchmarkOptions()) + + assert sizes[0] == (32, 32, 32, 1) + assert sizes[-1] == (512, 512, 256, 8) + assert len(sizes) == 12 + + +def test_build_case_matches_benchmark_defaults_for_small_grid(): + options = small_options() + + kgrid, medium, source, sensor = build_case(options, 8, 8, 8, 1) + + assert tuple(kgrid.N) == (8, 8, 8) + assert np.isclose(kgrid.dx, options.domain_size / 8) + assert kgrid.Nt == options.number_time_points + assert medium.sound_speed.shape == (8, 8, 8) + assert np.all(medium.sound_speed[:2, :, :] == 1800) + assert np.all(medium.sound_speed[2:, :, :] == 1500) + assert np.all(medium.density[:, :1, :] == 1000) + assert np.all(medium.density[:, 1:, :] == 1200) + assert medium.alpha_coeff == pytest.approx(0.75) + assert medium.alpha_power == pytest.approx(1.5) + assert source.p0.shape == (8, 8, 8) + assert np.max(source.p0) == pytest.approx(10) + assert sensor.mask.shape == (8, 8, 8) + assert sensor.mask.dtype == bool + assert 0 < np.count_nonzero(sensor.mask) <= options.number_sensor_points + assert sensor.record == ["p"] + + +def test_single_data_cast_uses_float32_arrays(): + options = small_options(data_cast="single") + + _, medium, source, _ = build_case(options, 8, 8, 8, 1) + + assert medium.sound_speed.dtype == np.float32 + assert medium.density.dtype == np.float32 + assert source.p0.dtype == np.float32 + + +def test_run_aggregates_timings_and_saves_json_file(tmp_path: Path): + options = small_options(num_averages=2) + output_path = tmp_path / "benchmark.json" + times = iter([0.0, 1.0, 1.0, 3.0]) + calls = [] + + def fake_solver(kgrid, medium, source, sensor, **kwargs): + calls.append((kgrid, medium, source, sensor, kwargs)) + return {"p": np.zeros((1, 1))} + + result = run(options, max_cases=1, output_path=output_path, solver=fake_solver, timer=lambda: next(times)) + + assert len(calls) == 2 + assert calls[0][4]["pml_size"] == options.pml_size + assert calls[0][4]["pml_inside"] is True + assert calls[0][4]["smooth_p0"] is False + assert result["comp_size"] == [8 * 8 * 8] + assert result["comp_time"] == [pytest.approx(1.5)] + assert result["output_path"] == str(output_path) + assert result["error_reached"] is False + assert output_path.exists() + + saved = json.loads(output_path.read_text()) + assert saved["comp_size"] == [8 * 8 * 8] + assert saved["comp_time"] == [pytest.approx(1.5)] + assert saved["output_path"] == str(output_path) + assert saved["options"]["start_size"] == 8 + + +def test_run_keeps_distinct_cases_with_the_same_total_grid_points(tmp_path: Path): + options = small_options( + x_scale_array=(1, 2), + y_scale_array=(2, 1), + z_scale_array=(1, 1), + ) + output_path = tmp_path / "benchmark.json" + times = iter([0.0, 1.0, 1.0, 3.0]) + shapes = [] + + def fake_solver(kgrid, *args, **kwargs): + shapes.append(tuple(kgrid.N)) + return {"p": np.zeros((1, 1))} + + result = run(options, output_path=output_path, solver=fake_solver, timer=lambda: next(times)) + + assert shapes == [(8, 16, 8), (16, 8, 8)] + assert result["comp_size"] == [8 * 16 * 8, 16 * 8 * 8] + assert result["comp_time"] == [pytest.approx(1.0), pytest.approx(2.0)] + + saved = json.loads(output_path.read_text()) + assert saved["comp_size"] == [8 * 16 * 8, 16 * 8 * 8] + assert saved["comp_time"] == [pytest.approx(1.0), pytest.approx(2.0)] + + +def test_start_size_must_be_positive(): + with pytest.raises(ValueError, match="start_size must be positive"): + small_options(start_size=0) + + +def test_run_reports_memory_usage_and_saves_valid_json(tmp_path: Path): + options = small_options(report_mem_usage=True, num_averages=2) + output_path = tmp_path / "benchmark.json" + times = iter([0.0, 1.0, 1.0, 3.0]) + memory_samples = iter([512.0, 1024.0, 2048.0, 4096.0, 8192.0]) + + def fake_solver(*args, **kwargs): + return {"p": np.zeros((1, 1))} + + result = run( + options, + max_cases=1, + output_path=output_path, + solver=fake_solver, + timer=lambda: next(times), + memory_reader=lambda: next(memory_samples), + memory_sampling_interval=0, + ) + + assert result["mem_usage"] == [pytest.approx(5120.0)] + + saved = json.loads(output_path.read_text()) + assert saved["mem_usage"] == [pytest.approx(5120.0)] + + +def test_report_memory_usage_fails_before_writing_when_unavailable(tmp_path: Path): + options = small_options(report_mem_usage=True) + output_path = tmp_path / "benchmark.json" + + def unsupported_memory_reader(): + raise RuntimeError("memory unavailable") + + with pytest.raises(ValueError, match="report_mem_usage is not supported"): + run(options, max_cases=1, output_path=output_path, memory_reader=unsupported_memory_reader) + + assert not output_path.exists() + + +def test_peak_memory_bytes_is_back_compat_alias_for_current_memory_bytes(): + # The old name was misleading (Linux/macOS/Windows readers return current + # RSS, not historical peak). Keep the alias so external benchmark tooling + # that imported the old name doesn't break. + assert peak_memory_bytes is current_memory_bytes + + +@pytest.mark.parametrize( + "kwargs, expected_match", + [ + ({"domain_size": 0}, "domain_size must be positive"), + ({"domain_size": -1e-3}, "domain_size must be positive"), + ({"sensor_radius": 0}, "sensor_radius must be positive"), + ({"sensor_radius": -1}, "sensor_radius must be positive"), + ({"pml_size": -1}, "pml_size must be non-negative"), + ], +) +def test_options_post_init_rejects_invalid_geometry(kwargs, expected_match): + with pytest.raises(ValueError, match=expected_match): + small_options(**kwargs) + + +def test_cpp_backend_uses_child_sampler_via_factory(tmp_path: Path): + """For backend='cpp', the subprocess sampler factory is used (not PeakMemorySampler).""" + options = small_options(report_mem_usage=True, num_averages=2) + output_path = tmp_path / "benchmark.json" + times = iter([0.0, 1.0, 1.0, 3.0]) + + sampler_calls = [] + + class FakeChildSampler: + def __enter__(self): + sampler_calls.append("enter") + return self + + def __exit__(self, *exc): + sampler_calls.append("exit") + + @property + def peak_bytes(self) -> float: + return float(1024 * len(sampler_calls)) # arbitrary but deterministic + + def fake_solver(*args, **kwargs): + return {"p": np.zeros((1, 1))} + + result = run( + options, + backend="cpp", + max_cases=1, + output_path=output_path, + solver=fake_solver, + timer=lambda: next(times), + mem_sampler_factory=FakeChildSampler, + ) + + # Two loop iterations × (enter, exit). The early-startup probe just + # constructs a sampler to verify the factory works; it doesn't enter. + assert sampler_calls == ["enter", "exit", "enter", "exit"] + assert "mem_usage" in result and len(result["mem_usage"]) == 1 + + +def test_cpp_backend_reports_clear_error_when_child_sampler_unsupported(tmp_path: Path): + """Windows + cpp + report_mem_usage should raise a clear error at startup.""" + options = small_options(report_mem_usage=True) + output_path = tmp_path / "benchmark.json" + + def unsupported_sampler_factory(): + raise NotImplementedError("ChildPeakMemorySampler is not supported on Windows (resource.RUSAGE_CHILDREN is POSIX-only).") + + with pytest.raises(ValueError, match="not supported on Windows"): + run( + options, + backend="cpp", + max_cases=1, + output_path=output_path, + mem_sampler_factory=unsupported_sampler_factory, + ) + # Failed-fast, before writing the output file. + assert not output_path.exists() + + +def test_child_peak_memory_sampler_refuses_on_windows(): + with patch("benchmarks.helpers.platform.system", return_value="Windows"): + with pytest.raises(NotImplementedError, match="not supported on Windows"): + ChildPeakMemorySampler() + + +def test_run_saves_partial_results_after_solver_error(tmp_path: Path): + options = small_options() + output_path = tmp_path / "benchmark.json" + + def failing_solver(*args, **kwargs): + raise RuntimeError("solver failed") + + result = run(options, max_cases=1, output_path=output_path, solver=failing_solver) + + assert result["error_reached"] is True + assert result["error_message"] == "solver failed" + assert output_path.exists() + saved = json.loads(output_path.read_text()) + assert saved["error_reached"] is True + assert saved["error_message"] == "solver failed"