diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index 653f28ddbabfa4..85051cdfc75f98 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -219,7 +219,9 @@ How to obtain the best results For best results, keep frame pointers enabled. On supported GCC-compatible toolchains, CPython builds itself with ``-fno-omit-frame-pointer`` and, when -available, ``-mno-omit-leaf-frame-pointer`` by default. These flags allow +available, ``-mno-omit-leaf-frame-pointer`` by default. On 32-bit ARM, +CPython also adds ``-marm`` when supported. On s390 platforms, CPython also +adds ``-mbackchain`` when supported. These flags allow profilers to unwind using only the frame pointer and not on DWARF debug information. This is because as the code that is interposed to allow ``perf`` support is dynamically generated it doesn't have any DWARF debugging information diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 086f6bfa22ad4a..eb9a44da7bdcf4 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -784,9 +784,11 @@ also be used to improve performance. Disable frame pointers, which are enabled by default (see :pep:`831`). - By default, the build appends ``-fno-omit-frame-pointer`` (and - ``-mno-omit-leaf-frame-pointer`` when the compiler supports it) to - ``BASECFLAGS`` so profilers, debuggers, and system tracing tools + By default, the build appends ``-fno-omit-frame-pointer``, + ``-mno-omit-leaf-frame-pointer`` when the compiler supports it, + ``-marm`` on 32-bit ARM when supported, and ``-mbackchain`` on s390 + platforms when supported, to ``BASECFLAGS`` so + profilers, debuggers, and system tracing tools (``perf``, ``eBPF``, ``dtrace``, ``gdb``) can walk the C call stack without DWARF metadata. The flags propagate to third-party C extensions through :mod:`sysconfig`. On compilers that do not diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 586a1306d83c4c..5b846779847f2b 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -2305,8 +2305,9 @@ Build changes (:pep:`831`). Pass :option:`--without-frame-pointers` to opt out. Authors of C extensions and native libraries built with custom build systems should add ``-fno-omit-frame-pointer`` and - ``-mno-omit-leaf-frame-pointer`` to their own ``CFLAGS`` to keep the - unwind chain intact. + ``-mno-omit-leaf-frame-pointer`` to their own ``CFLAGS``, + ``-marm`` on 32-bit ARM, and ``-mbackchain`` on s390 platforms, + to keep the unwind chain intact. (Contributed by Pablo Galindo Salgado and Savannah Ostrowski in :gh:`149201`.) .. _whatsnew315-windows-tail-calling-interpreter: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-05-16-10-00.gh-issue-149202.W8sQeR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-05-16-10-00.gh-issue-149202.W8sQeR.rst index f82ca91f5ba000..9c8bd5deee832c 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-05-16-10-00.gh-issue-149202.W8sQeR.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-05-16-10-00.gh-issue-149202.W8sQeR.rst @@ -1,4 +1,5 @@ Enable frame pointers by default for GCC-compatible CPython builds, including -``-mno-omit-leaf-frame-pointer`` when the compiler supports it, so profilers -and debuggers can unwind native interpreter frames more reliably. Users can pass +``-mno-omit-leaf-frame-pointer``, ``-marm`` on 32-bit ARM, and ``-mbackchain`` +on s390 platforms when the compiler supports them, so profilers and debuggers +can unwind native interpreter frames more reliably. Users can pass ``--without-frame-pointers`` to opt out. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index a07675bb66d8cc..1048248f8dd82a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -59,6 +59,40 @@ static const uintptr_t min_frame_pointer_addr = 0x1000; +#ifdef __s390x__ +// Linux's s390 "Stack Frame Layout" table documents that z/Architecture +// backchain frames start with the backchain at offset 0 and store "saved r14 +// of caller function" at offset 112. The same document's register table +// identifies r14 as the return-address register, so this backchain unwinder +// reads the return address from fp + 112. +// https://www.kernel.org/doc/html/v5.3/s390/debugging390.html#stack-frame-layout +// +// This is only for Linux s390x backchain frames. The s390x ELF ABI does not +// generally mandate where RA and FP are saved, or whether they are saved at all. +// https://sourceware.org/binutils/docs/sframe-spec.html#s390x +# define S390X_FRAME_RETURN_ADDRESS_OFFSET 112 +#endif + +// The generic manual unwinder treats the frame pointer as a two-word record: +// fp[0] is the previous frame pointer and fp[1] is the return address. That is +// not true for every architecture, even with frame pointers enabled, so these +// offsets describe the actual slots used by each supported frame layout. +#if defined(__arm__) && !defined(__thumb__) && !defined(__clang__) +// GCC ARM mode keeps the caller's fp one word below fp and the saved LR at +// fp[0], so the return address is not in the generic fp[1] slot. +# define FRAME_POINTER_NEXT_OFFSET (-1) +# define FRAME_POINTER_RETURN_OFFSET 0 +#elif defined(__s390x__) +// s390x backchain frames keep the previous frame pointer at fp[0], but save the +// return-address register in the ABI register save area rather than fp[1]. +# define FRAME_POINTER_NEXT_OFFSET 0 +# define FRAME_POINTER_RETURN_OFFSET \ + (S390X_FRAME_RETURN_ADDRESS_OFFSET / (Py_ssize_t)sizeof(uintptr_t)) +#else +# define FRAME_POINTER_NEXT_OFFSET 0 +# define FRAME_POINTER_RETURN_OFFSET 1 +#endif + static PyObject * _get_current_module(void) @@ -325,16 +359,97 @@ get_jit_backend(PyObject *self, PyObject *Py_UNUSED(args)) #endif } +static int +stack_address_is_valid(uintptr_t addr, uintptr_t stack_min, uintptr_t stack_max) +{ + if (addr < min_frame_pointer_addr) { + return 0; + } + if (stack_min != 0 && (addr < stack_min || addr >= stack_max)) { + return 0; + } + return 1; +} + +static int +frame_pointer_slot_is_valid(uintptr_t *frame_pointer, Py_ssize_t offset, + uintptr_t stack_min, uintptr_t stack_max) +{ + uintptr_t fp_addr = (uintptr_t)frame_pointer; + uintptr_t slot_addr; + uintptr_t delta = (uintptr_t)Py_ABS(offset) * sizeof(uintptr_t); + if (offset < 0) { + if (fp_addr < delta) { + return 0; + } + slot_addr = fp_addr - delta; + } + else { + if (fp_addr > UINTPTR_MAX - delta) { + return 0; + } + slot_addr = fp_addr + delta; + } + if (!stack_address_is_valid(slot_addr, stack_min, stack_max)) { + return 0; + } + if (stack_max != 0) { + if (slot_addr > UINTPTR_MAX - sizeof(uintptr_t)) { + return 0; + } + if (slot_addr + sizeof(uintptr_t) > stack_max) { + return 0; + } + } + return 1; +} + +static int +next_frame_pointer_is_valid(uintptr_t *frame_pointer, uintptr_t *next_fp, + uintptr_t stack_min, uintptr_t stack_max) +{ + uintptr_t fp_addr = (uintptr_t)frame_pointer; + uintptr_t next_addr = (uintptr_t)next_fp; + if (!stack_address_is_valid(next_addr, stack_min, stack_max)) { + return 0; + } + if ((next_addr % sizeof(uintptr_t)) != 0) { + return 0; + } +#if _Py_STACK_GROWS_DOWN + return next_addr > fp_addr; +#else + return next_addr < fp_addr; +#endif +} + static PyObject * manual_unwind_from_fp(uintptr_t *frame_pointer) { Py_ssize_t max_depth = 200; - int stack_grows_down = _Py_STACK_GROWS_DOWN; + uintptr_t stack_min = 0; + uintptr_t stack_max = 0; + +#ifdef __s390x__ + Py_BUILD_ASSERT(S390X_FRAME_RETURN_ADDRESS_OFFSET % sizeof(uintptr_t) == 0); +#endif if (frame_pointer == NULL) { return PyList_New(0); } + PyThreadState *tstate = _PyThreadState_GET(); + if (tstate != NULL) { + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; +#if _Py_STACK_GROWS_DOWN + stack_min = tstate_impl->c_stack_hard_limit; + stack_max = tstate_impl->c_stack_top; +#else + stack_min = tstate_impl->c_stack_top; + stack_max = tstate_impl->c_stack_hard_limit; +#endif + } + PyObject *result = PyList_New(0); if (result == NULL) { return NULL; @@ -348,7 +463,21 @@ manual_unwind_from_fp(uintptr_t *frame_pointer) if ((fp_addr % sizeof(uintptr_t)) != 0) { break; } - uintptr_t return_addr = frame_pointer[1]; + if (!stack_address_is_valid(fp_addr, stack_min, stack_max)) { + break; + } + if (!frame_pointer_slot_is_valid(frame_pointer, + FRAME_POINTER_NEXT_OFFSET, + stack_min, stack_max)) { + break; + } + if (!frame_pointer_slot_is_valid(frame_pointer, + FRAME_POINTER_RETURN_OFFSET, + stack_min, stack_max)) { + break; + } + uintptr_t *next_fp = (uintptr_t *)frame_pointer[FRAME_POINTER_NEXT_OFFSET]; + uintptr_t return_addr = frame_pointer[FRAME_POINTER_RETURN_OFFSET]; PyObject *addr_obj = PyLong_FromUnsignedLongLong(return_addr); if (addr_obj == NULL) { @@ -362,22 +491,10 @@ manual_unwind_from_fp(uintptr_t *frame_pointer) } Py_DECREF(addr_obj); - uintptr_t *next_fp = (uintptr_t *)frame_pointer[0]; - // Stop if the frame pointer is extremely low. - if ((uintptr_t)next_fp < min_frame_pointer_addr) { + if (!next_frame_pointer_is_valid(frame_pointer, next_fp, + stack_min, stack_max)) { break; } - uintptr_t next_addr = (uintptr_t)next_fp; - if (stack_grows_down) { - if (next_addr <= fp_addr) { - break; - } - } - else { - if (next_addr >= fp_addr) { - break; - } - } frame_pointer = next_fp; } diff --git a/configure b/configure index 734aa3a6a721d1..561d820a782f4c 100755 --- a/configure +++ b/configure @@ -10343,6 +10343,106 @@ else case e in #( esac fi + case $host_cpu in #( + arm|armv*) : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -marm" >&5 +printf %s "checking whether C compiler accepts -marm... " >&6; } +if test ${ax_cv_check_cflags__Werror__marm+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -marm" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__marm=yes +else case e in #( + e) ax_cv_check_cflags__Werror__marm=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__marm" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__marm" >&6; } +if test "x$ax_cv_check_cflags__Werror__marm" = xyes +then : + + frame_pointer_cflags="$frame_pointer_cflags -marm" + +else case e in #( + e) : ;; +esac +fi + + ;; #( + *) : + ;; +esac + case $host_cpu in #( + s390*) : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mbackchain" >&5 +printf %s "checking whether C compiler accepts -mbackchain... " >&6; } +if test ${ax_cv_check_cflags__Werror__mbackchain+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mbackchain" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mbackchain=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mbackchain=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mbackchain" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mbackchain" >&6; } +if test "x$ax_cv_check_cflags__Werror__mbackchain" = xyes +then : + + frame_pointer_cflags="$frame_pointer_cflags -mbackchain" + +else case e in #( + e) : ;; +esac +fi + + ;; #( + *) : + ;; +esac else case e in #( e) : ;; diff --git a/configure.ac b/configure.ac index c8cb1686d55c07..3fccdd2a24b326 100644 --- a/configure.ac +++ b/configure.ac @@ -2548,6 +2548,16 @@ AS_VAR_IF([ac_cv_gcc_compat], [yes], [ AX_CHECK_COMPILE_FLAG([-mno-omit-leaf-frame-pointer], [ frame_pointer_cflags="$frame_pointer_cflags -mno-omit-leaf-frame-pointer" ], [], [-Werror]) + AS_CASE([$host_cpu], [arm|armv*], [ + AX_CHECK_COMPILE_FLAG([-marm], [ + frame_pointer_cflags="$frame_pointer_cflags -marm" + ], [], [-Werror]) + ]) + AS_CASE([$host_cpu], [s390*], [ + AX_CHECK_COMPILE_FLAG([-mbackchain], [ + frame_pointer_cflags="$frame_pointer_cflags -mbackchain" + ], [], [-Werror]) + ]) ], [], [-Werror]) if test -n "$frame_pointer_cflags" && test "x$with_frame_pointers" != xno; then BASECFLAGS="$frame_pointer_cflags $BASECFLAGS"