diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 29f83c843561cd..7e6cb724c407e3 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -148,6 +148,11 @@ def tearDown(self): def create_binary_file(self, samples, interval=1000, compression="none"): """Create a test binary file and track it for cleanup.""" + filename, _ = self.write_binary_file(samples, interval, compression) + return filename + + def write_binary_file(self, samples, interval=1000, compression="none"): + """Like create_binary_file but also returns the writer collector.""" with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: filename = f.name self.temp_files.append(filename) @@ -158,7 +163,7 @@ def create_binary_file(self, samples, interval=1000, compression="none"): for sample in samples: collector.collect(sample) collector.export(None) - return filename + return filename, collector def roundtrip(self, samples, interval=1000, compression="none"): """Write samples to binary and read back.""" @@ -805,6 +810,133 @@ def test_invalid_file_path(self): with BinaryReader("/nonexistent/path/file.bin") as reader: reader.replay_samples(RawCollector()) + def test_writer_handles_empty_stack_first_sample(self): + """BinaryWriter.write_sample tolerates an empty stack on a fresh thread. + + Regression test for the C-level RLE bug in process_thread_sample: a + freshly-created ThreadEntry has prev_stack_depth == 0, so an empty + curr_stack compares as STACK_REPEAT against the zero-initialized + previous stack. Before the fix, this fell through the + `&& !is_new_thread` guard into write_sample_with_encoding, which had + no handler for STACK_REPEAT and raised + RuntimeError("Invalid stack encoding type"). Goes through + BinaryWriter.write_sample directly so the test cannot be masked by + any Python-level filtering. + """ + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + filename = f.name + self.temp_files.append(filename) + + writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0) + empty_sample = [ + make_interpreter( + 0, [make_thread(99, [], status=THREAD_STATUS_UNKNOWN)] + ) + ] + # First sample for a fresh thread has empty frame_info — the exact + # scenario that exposes the bug. + writer.write_sample(empty_sample, 1000) + writer.write_sample(empty_sample, 2000) + # Mix in a real sample to exercise the transition out of the + # empty-stack RLE buffer. + real_sample = [ + make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])]) + ] + writer.write_sample(real_sample, 3000) + writer.finalize() + + reader_collector = RawCollector() + with BinaryReader(filename) as reader: + count = reader.replay_samples(reader_collector) + # Empty-stack samples are recorded as STACK_REPEAT records with + # depth-0 stacks; the file must replay all three samples. + self.assertEqual(count, 3) + + def test_writer_handles_mixed_empty_and_real_first_sample(self): + """First sample with one empty + one real thread roundtrips through C.""" + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + filename = f.name + self.temp_files.append(filename) + + writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0) + sample = [ + make_interpreter( + 0, + [ + make_thread(1, [make_frame("a.py", 1, "f")]), + make_thread(99, [], status=THREAD_STATUS_UNKNOWN), + ], + ) + ] + # Two samples so RLE state is exercised. + writer.write_sample(sample, 1000) + writer.write_sample(sample, 2000) + writer.finalize() + + # Replay must succeed without raising RuntimeError, and the real + # thread's frames must round-trip. + reader_collector = RawCollector() + with BinaryReader(filename) as reader: + reader.replay_samples(reader_collector) + self.assertIn((0, 1), reader_collector.by_thread) + self.assertEqual(len(reader_collector.by_thread[(0, 1)]), 2) + + def test_writer_total_samples_after_finalize_matches_reader(self): + """BinaryWriter.total_samples after finalize() matches the reader's count.""" + # Five IDENTICAL samples force every sample beyond the first into the + # per-thread RLE buffer. Regression for the cached_total_samples + # ordering bug: capturing the cache BEFORE binary_writer_finalize() + # missed the buffered samples that flush_pending_rle() counts. Keep + # the samples identical to preserve coverage. See gh-149342. + samples = [ + [make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])] + ] * 5 + filename, writer_collector = self.write_binary_file(samples) + reader_collector = RawCollector() + with BinaryReader(filename) as reader: + replayed = reader.replay_samples(reader_collector) + self.assertEqual(writer_collector.total_samples, len(samples)) + self.assertEqual(writer_collector.total_samples, replayed) + + def test_writer_total_samples_after_context_manager_matches_reader(self): + """total_samples after `with BinaryWriter(...)` matches the reader's count. + + Regression for the asymmetry between finalize() and __exit__ in + module.c: __exit__ also calls binary_writer_finalize and must + preserve cached_total_samples like finalize() does, otherwise the + getter returns 0 once self->writer is NULL. + """ + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + filename = f.name + self.temp_files.append(filename) + + sample = [ + make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])]) + ] + with _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0) as w: + for i in range(5): + w.write_sample(sample, i * 1000) + self.assertEqual(w.total_samples, 5) + + reader_collector = RawCollector() + with BinaryReader(filename) as reader: + self.assertEqual(reader.replay_samples(reader_collector), 5) + + def test_writer_total_samples_after_close_returns_zero(self): + """close() discards data; total_samples reflects no cached count.""" + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + filename = f.name + self.temp_files.append(filename) + + w = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0) + sample = [ + make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])]) + ] + for i in range(5): + w.write_sample(sample, i * 1000) + w.close() + self.assertEqual(w.total_samples, 0) + class TestBinaryEncodings(BinaryFormatTestBase): """Tests specifically targeting different stack encodings.""" diff --git a/Misc/NEWS.d/next/Library/2026-05-04-04-06-36.gh-issue-149342.d3CK-y.rst b/Misc/NEWS.d/next/Library/2026-05-04-04-06-36.gh-issue-149342.d3CK-y.rst new file mode 100644 index 00000000000000..660a28ba52e679 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-04-04-06-36.gh-issue-149342.d3CK-y.rst @@ -0,0 +1,6 @@ +Fix :mod:`!_remote_debugging` binary writing so that sampling a thread +whose Python frame stack is empty (for example while it is in a C call or +mid-syscall) no longer raises ``RuntimeError("Invalid stack encoding +type")``, and so that ``BinaryWriter.total_samples`` after :meth:`!finalize` +or context-manager exit includes samples flushed from the RLE buffer. +Patch by Maurycy Pawłowski-Wieroński. diff --git a/Modules/_remote_debugging/binary_io_writer.c b/Modules/_remote_debugging/binary_io_writer.c index 0ac6c88d0373a7..bf1ba83be8cea2 100644 --- a/Modules/_remote_debugging/binary_io_writer.c +++ b/Modules/_remote_debugging/binary_io_writer.c @@ -487,7 +487,7 @@ writer_get_or_create_thread_entry(BinaryWriter *writer, uint64_t thread_id, entry->prev_stack_capacity = MAX_STACK_DEPTH; entry->pending_rle_capacity = INITIAL_RLE_CAPACITY; - entry->prev_stack = PyMem_Malloc(entry->prev_stack_capacity * sizeof(uint32_t)); + entry->prev_stack = PyMem_Calloc(entry->prev_stack_capacity, sizeof(uint32_t)); if (!entry->prev_stack) { PyErr_NoMemory(); return NULL; @@ -941,9 +941,8 @@ process_thread_sample(BinaryWriter *writer, PyObject *thread_info, } uint8_t status = (uint8_t)status_long; - int is_new_thread = 0; ThreadEntry *entry = writer_get_or_create_thread_entry( - writer, thread_id, interpreter_id, &is_new_thread); + writer, thread_id, interpreter_id, NULL); if (!entry) { return -1; } @@ -966,8 +965,15 @@ process_thread_sample(BinaryWriter *writer, PyObject *thread_info, curr_stack, curr_depth, &shared_count, &pop_count, &push_count); - if (encoding == STACK_REPEAT && !is_new_thread) { - /* Buffer this sample for RLE */ + if (encoding == STACK_REPEAT) { + /* Buffer this sample for RLE. + * + * STACK_REPEAT also covers the "first sample for a fresh thread, + * empty stack" case: a new ThreadEntry has prev_stack_depth == 0 + * and a zero-initialized prev_stack, so compare_stacks() returns + * STACK_REPEAT against an empty curr_stack (depth 0). Buffering + * it here is correct; the RLE flush path emits it as a normal + * STACK_REPEAT record. */ if (GROW_ARRAY(entry->pending_rle, entry->pending_rle_count, entry->pending_rle_capacity, PendingRLESample) < 0) { return -1; diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index c694e587e7cccb..172f8711a2a2a0 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -1544,6 +1544,24 @@ _remote_debugging_BinaryWriter_write_sample_impl(BinaryWriterObject *self, Py_RETURN_NONE; } +/* Finalize the writer, cache total_samples, and destroy it. + * + * The cache assignment must happen AFTER binary_writer_finalize(): finalize + * flushes pending RLE samples via flush_pending_rle(), which increments + * writer->total_samples for each one. Caching before finalize would lose + * those trailing samples. */ +static int +binary_writer_finalize_and_cache(BinaryWriterObject *self) +{ + if (binary_writer_finalize(self->writer) < 0) { + return -1; + } + self->cached_total_samples = self->writer->total_samples; + binary_writer_destroy(self->writer); + self->writer = NULL; + return 0; +} + /*[clinic input] _remote_debugging.BinaryWriter.finalize @@ -1561,16 +1579,10 @@ _remote_debugging_BinaryWriter_finalize_impl(BinaryWriterObject *self) return NULL; } - /* Save total_samples before finalizing */ - self->cached_total_samples = self->writer->total_samples; - - if (binary_writer_finalize(self->writer) < 0) { + if (binary_writer_finalize_and_cache(self) < 0) { return NULL; } - binary_writer_destroy(self->writer); - self->writer = NULL; - Py_RETURN_NONE; } @@ -1624,14 +1636,18 @@ _remote_debugging_BinaryWriter___exit___impl(BinaryWriterObject *self, if (self->writer) { /* Only finalize on normal exit (no exception) */ if (exc_type == Py_None) { - if (binary_writer_finalize(self->writer) < 0) { - binary_writer_destroy(self->writer); - self->writer = NULL; + if (binary_writer_finalize_and_cache(self) < 0) { + if (self->writer) { + binary_writer_destroy(self->writer); + self->writer = NULL; + } return NULL; } } - binary_writer_destroy(self->writer); - self->writer = NULL; + else { + binary_writer_destroy(self->writer); + self->writer = NULL; + } } Py_RETURN_FALSE; } @@ -1658,8 +1674,9 @@ _remote_debugging_BinaryWriter_get_stats_impl(BinaryWriterObject *self) } static PyObject * -BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure) +BinaryWriter_get_total_samples(PyObject *op, void *closure) { + BinaryWriterObject *self = BinaryWriter_CAST(op); if (!self->writer) { /* Use cached value after finalize/close */ return PyLong_FromUnsignedLong(self->cached_total_samples); @@ -1668,7 +1685,7 @@ BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure) } static PyGetSetDef BinaryWriter_getset[] = { - {"total_samples", (getter)BinaryWriter_get_total_samples, NULL, "Total samples written", NULL}, + {"total_samples", BinaryWriter_get_total_samples, NULL, "Total samples written", NULL}, {NULL} };