diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
index 29f83c843561cd..7e6cb724c407e3 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
@@ -148,6 +148,11 @@ def tearDown(self):
 
     def create_binary_file(self, samples, interval=1000, compression="none"):
         """Create a test binary file and track it for cleanup."""
+        filename, _ = self.write_binary_file(samples, interval, compression)
+        return filename
+
+    def write_binary_file(self, samples, interval=1000, compression="none"):
+        """Like create_binary_file but also returns the writer collector."""
         with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
             filename = f.name
         self.temp_files.append(filename)
@@ -158,7 +163,7 @@ def create_binary_file(self, samples, interval=1000, compression="none"):
         for sample in samples:
             collector.collect(sample)
         collector.export(None)
-        return filename
+        return filename, collector
 
     def roundtrip(self, samples, interval=1000, compression="none"):
         """Write samples to binary and read back."""
@@ -805,6 +810,133 @@ def test_invalid_file_path(self):
             with BinaryReader("/nonexistent/path/file.bin") as reader:
                 reader.replay_samples(RawCollector())
 
+    def test_writer_handles_empty_stack_first_sample(self):
+        """BinaryWriter.write_sample tolerates an empty stack on a fresh thread.
+
+        Regression test for the C-level RLE bug in process_thread_sample: a
+        freshly-created ThreadEntry has prev_stack_depth == 0, so an empty
+        curr_stack compares as STACK_REPEAT against the zero-initialized
+        previous stack. Before the fix, this fell through the
+        `&& !is_new_thread` guard into write_sample_with_encoding, which had
+        no handler for STACK_REPEAT and raised
+        RuntimeError("Invalid stack encoding type"). Goes through
+        BinaryWriter.write_sample directly so the test cannot be masked by
+        any Python-level filtering.
+        """
+        with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+            filename = f.name
+        self.temp_files.append(filename)
+
+        writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
+        empty_sample = [
+            make_interpreter(
+                0, [make_thread(99, [], status=THREAD_STATUS_UNKNOWN)]
+            )
+        ]
+        # First sample for a fresh thread has empty frame_info — the exact
+        # scenario that exposes the bug.
+        writer.write_sample(empty_sample, 1000)
+        writer.write_sample(empty_sample, 2000)
+        # Mix in a real sample to exercise the transition out of the
+        # empty-stack RLE buffer.
+        real_sample = [
+            make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
+        ]
+        writer.write_sample(real_sample, 3000)
+        writer.finalize()
+
+        reader_collector = RawCollector()
+        with BinaryReader(filename) as reader:
+            count = reader.replay_samples(reader_collector)
+        # Empty-stack samples are recorded as STACK_REPEAT records with
+        # depth-0 stacks; the file must replay all three samples.
+        self.assertEqual(count, 3)
+
+    def test_writer_handles_mixed_empty_and_real_first_sample(self):
+        """First sample with one empty + one real thread roundtrips through C."""
+        with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+            filename = f.name
+        self.temp_files.append(filename)
+
+        writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
+        sample = [
+            make_interpreter(
+                0,
+                [
+                    make_thread(1, [make_frame("a.py", 1, "f")]),
+                    make_thread(99, [], status=THREAD_STATUS_UNKNOWN),
+                ],
+            )
+        ]
+        # Two samples so RLE state is exercised.
+        writer.write_sample(sample, 1000)
+        writer.write_sample(sample, 2000)
+        writer.finalize()
+
+        # Replay must succeed without raising RuntimeError, and the real
+        # thread's frames must round-trip.
+        reader_collector = RawCollector()
+        with BinaryReader(filename) as reader:
+            reader.replay_samples(reader_collector)
+        self.assertIn((0, 1), reader_collector.by_thread)
+        self.assertEqual(len(reader_collector.by_thread[(0, 1)]), 2)
+
+    def test_writer_total_samples_after_finalize_matches_reader(self):
+        """BinaryWriter.total_samples after finalize() matches the reader's count."""
+        # Five IDENTICAL samples force every sample beyond the first into the
+        # per-thread RLE buffer. Regression for the cached_total_samples
+        # ordering bug: capturing the cache BEFORE binary_writer_finalize()
+        # missed the buffered samples that flush_pending_rle() counts. Keep
+        # the samples identical to preserve coverage. See gh-149342.
+        samples = [
+            [make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])]
+        ] * 5
+        filename, writer_collector = self.write_binary_file(samples)
+        reader_collector = RawCollector()
+        with BinaryReader(filename) as reader:
+            replayed = reader.replay_samples(reader_collector)
+        self.assertEqual(writer_collector.total_samples, len(samples))
+        self.assertEqual(writer_collector.total_samples, replayed)
+
+    def test_writer_total_samples_after_context_manager_matches_reader(self):
+        """total_samples after `with BinaryWriter(...)` matches the reader's count.
+
+        Regression for the asymmetry between finalize() and __exit__ in
+        module.c: __exit__ also calls binary_writer_finalize and must
+        preserve cached_total_samples like finalize() does, otherwise the
+        getter returns 0 once self->writer is NULL.
+        """
+        with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+            filename = f.name
+        self.temp_files.append(filename)
+
+        sample = [
+            make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
+        ]
+        with _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0) as w:
+            for i in range(5):
+                w.write_sample(sample, i * 1000)
+        self.assertEqual(w.total_samples, 5)
+
+        reader_collector = RawCollector()
+        with BinaryReader(filename) as reader:
+            self.assertEqual(reader.replay_samples(reader_collector), 5)
+
+    def test_writer_total_samples_after_close_returns_zero(self):
+        """close() discards data; total_samples reflects no cached count."""
+        with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+            filename = f.name
+        self.temp_files.append(filename)
+
+        w = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
+        sample = [
+            make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
+        ]
+        for i in range(5):
+            w.write_sample(sample, i * 1000)
+        w.close()
+        self.assertEqual(w.total_samples, 0)
+
 
 class TestBinaryEncodings(BinaryFormatTestBase):
     """Tests specifically targeting different stack encodings."""
diff --git a/Misc/NEWS.d/next/Library/2026-05-04-04-06-36.gh-issue-149342.d3CK-y.rst b/Misc/NEWS.d/next/Library/2026-05-04-04-06-36.gh-issue-149342.d3CK-y.rst
new file mode 100644
index 00000000000000..660a28ba52e679
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-05-04-04-06-36.gh-issue-149342.d3CK-y.rst
@@ -0,0 +1,6 @@
+Fix :mod:`!_remote_debugging` binary writing so that sampling a thread
+whose Python frame stack is empty (for example while it is in a C call or
+mid-syscall) no longer raises ``RuntimeError("Invalid stack encoding
+type")``, and so that ``BinaryWriter.total_samples`` after :meth:`!finalize`
+or context-manager exit includes samples flushed from the RLE buffer.
+Patch by Maurycy Pawłowski-Wieroński.
diff --git a/Modules/_remote_debugging/binary_io_writer.c b/Modules/_remote_debugging/binary_io_writer.c
index 0ac6c88d0373a7..bf1ba83be8cea2 100644
--- a/Modules/_remote_debugging/binary_io_writer.c
+++ b/Modules/_remote_debugging/binary_io_writer.c
@@ -487,7 +487,7 @@ writer_get_or_create_thread_entry(BinaryWriter *writer, uint64_t thread_id,
     entry->prev_stack_capacity = MAX_STACK_DEPTH;
     entry->pending_rle_capacity = INITIAL_RLE_CAPACITY;
 
-    entry->prev_stack = PyMem_Malloc(entry->prev_stack_capacity * sizeof(uint32_t));
+    entry->prev_stack = PyMem_Calloc(entry->prev_stack_capacity, sizeof(uint32_t));
     if (!entry->prev_stack) {
         PyErr_NoMemory();
         return NULL;
@@ -941,9 +941,8 @@ process_thread_sample(BinaryWriter *writer, PyObject *thread_info,
     }
     uint8_t status = (uint8_t)status_long;
 
-    int is_new_thread = 0;
     ThreadEntry *entry = writer_get_or_create_thread_entry(
-        writer, thread_id, interpreter_id, &is_new_thread);
+        writer, thread_id, interpreter_id, NULL);
     if (!entry) {
         return -1;
     }
@@ -966,8 +965,15 @@ process_thread_sample(BinaryWriter *writer, PyObject *thread_info,
         curr_stack, curr_depth,
         &shared_count, &pop_count, &push_count);
 
-    if (encoding == STACK_REPEAT && !is_new_thread) {
-        /* Buffer this sample for RLE */
+    if (encoding == STACK_REPEAT) {
+        /* Buffer this sample for RLE.
+         *
+         * STACK_REPEAT also covers the "first sample for a fresh thread,
+         * empty stack" case: a new ThreadEntry has prev_stack_depth == 0
+         * and a zero-initialized prev_stack, so compare_stacks() returns
+         * STACK_REPEAT against an empty curr_stack (depth 0). Buffering
+         * it here is correct; the RLE flush path emits it as a normal
+         * STACK_REPEAT record. */
         if (GROW_ARRAY(entry->pending_rle, entry->pending_rle_count,
                        entry->pending_rle_capacity, PendingRLESample) < 0) {
             return -1;
diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c
index c694e587e7cccb..172f8711a2a2a0 100644
--- a/Modules/_remote_debugging/module.c
+++ b/Modules/_remote_debugging/module.c
@@ -1544,6 +1544,24 @@ _remote_debugging_BinaryWriter_write_sample_impl(BinaryWriterObject *self,
     Py_RETURN_NONE;
 }
 
+/* Finalize the writer, cache total_samples, and destroy it.
+ *
+ * The cache assignment must happen AFTER binary_writer_finalize(): finalize
+ * flushes pending RLE samples via flush_pending_rle(), which increments
+ * writer->total_samples for each one. Caching before finalize would lose
+ * those trailing samples. */
+static int
+binary_writer_finalize_and_cache(BinaryWriterObject *self)
+{
+    if (binary_writer_finalize(self->writer) < 0) {
+        return -1;
+    }
+    self->cached_total_samples = self->writer->total_samples;
+    binary_writer_destroy(self->writer);
+    self->writer = NULL;
+    return 0;
+}
+
 /*[clinic input]
 _remote_debugging.BinaryWriter.finalize
 
@@ -1561,16 +1579,10 @@ _remote_debugging_BinaryWriter_finalize_impl(BinaryWriterObject *self)
         return NULL;
     }
 
-    /* Save total_samples before finalizing */
-    self->cached_total_samples = self->writer->total_samples;
-
-    if (binary_writer_finalize(self->writer) < 0) {
+    if (binary_writer_finalize_and_cache(self) < 0) {
         return NULL;
     }
 
-    binary_writer_destroy(self->writer);
-    self->writer = NULL;
-
     Py_RETURN_NONE;
 }
 
@@ -1624,14 +1636,18 @@ _remote_debugging_BinaryWriter___exit___impl(BinaryWriterObject *self,
     if (self->writer) {
         /* Only finalize on normal exit (no exception) */
         if (exc_type == Py_None) {
-            if (binary_writer_finalize(self->writer) < 0) {
-                binary_writer_destroy(self->writer);
-                self->writer = NULL;
+            if (binary_writer_finalize_and_cache(self) < 0) {
+                if (self->writer) {
+                    binary_writer_destroy(self->writer);
+                    self->writer = NULL;
+                }
                 return NULL;
             }
         }
-        binary_writer_destroy(self->writer);
-        self->writer = NULL;
+        else {
+            binary_writer_destroy(self->writer);
+            self->writer = NULL;
+        }
     }
     Py_RETURN_FALSE;
 }
@@ -1658,8 +1674,9 @@ _remote_debugging_BinaryWriter_get_stats_impl(BinaryWriterObject *self)
 }
 
 static PyObject *
-BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure)
+BinaryWriter_get_total_samples(PyObject *op, void *closure)
 {
+    BinaryWriterObject *self = BinaryWriter_CAST(op);
     if (!self->writer) {
         /* Use cached value after finalize/close */
         return PyLong_FromUnsignedLong(self->cached_total_samples);
@@ -1668,7 +1685,7 @@ BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure)
 }
 
 static PyGetSetDef BinaryWriter_getset[] = {
-    {"total_samples", (getter)BinaryWriter_get_total_samples, NULL, "Total samples written", NULL},
+    {"total_samples", BinaryWriter_get_total_samples, NULL, "Total samples written", NULL},
     {NULL}
 };