Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Doc/howto/perf_profiling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ How to obtain the best results

For best results, keep frame pointers enabled. On supported GCC-compatible
toolchains, CPython builds itself with ``-fno-omit-frame-pointer`` and, when
available, ``-mno-omit-leaf-frame-pointer`` by default. These flags allow
available, ``-mno-omit-leaf-frame-pointer`` by default. On 32-bit ARM,
CPython also adds ``-marm`` when supported. On s390 platforms, CPython also
adds ``-mbackchain`` when supported. These flags allow
profilers to unwind using only the frame pointer and not on DWARF debug
information. This is because as the code that is interposed to allow ``perf``
support is dynamically generated it doesn't have any DWARF debugging information
Expand Down
8 changes: 5 additions & 3 deletions Doc/using/configure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -784,9 +784,11 @@ also be used to improve performance.

Disable frame pointers, which are enabled by default (see :pep:`831`).

By default, the build appends ``-fno-omit-frame-pointer`` (and
``-mno-omit-leaf-frame-pointer`` when the compiler supports it) to
``BASECFLAGS`` so profilers, debuggers, and system tracing tools
By default, the build appends ``-fno-omit-frame-pointer``,
``-mno-omit-leaf-frame-pointer`` when the compiler supports it,
``-marm`` on 32-bit ARM when supported, and ``-mbackchain`` on s390
platforms when supported, to ``BASECFLAGS`` so
profilers, debuggers, and system tracing tools
(``perf``, ``eBPF``, ``dtrace``, ``gdb``) can walk the C call stack
without DWARF metadata. The flags propagate to third-party C
extensions through :mod:`sysconfig`. On compilers that do not
Expand Down
5 changes: 3 additions & 2 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2305,8 +2305,9 @@ Build changes
(:pep:`831`). Pass :option:`--without-frame-pointers` to opt out.
Authors of C extensions and native libraries built with custom build
systems should add ``-fno-omit-frame-pointer`` and
``-mno-omit-leaf-frame-pointer`` to their own ``CFLAGS`` to keep the
unwind chain intact.
``-mno-omit-leaf-frame-pointer`` to their own ``CFLAGS``,
``-marm`` on 32-bit ARM, and ``-mbackchain`` on s390 platforms,
to keep the unwind chain intact.
(Contributed by Pablo Galindo Salgado and Savannah Ostrowski in :gh:`149201`.)

.. _whatsnew315-windows-tail-calling-interpreter:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Enable frame pointers by default for GCC-compatible CPython builds, including
``-mno-omit-leaf-frame-pointer`` when the compiler supports it, so profilers
and debuggers can unwind native interpreter frames more reliably. Users can pass
``-mno-omit-leaf-frame-pointer``, ``-marm`` on 32-bit ARM, and ``-mbackchain``
on s390 platforms when the compiler supports them, so profilers and debuggers
can unwind native interpreter frames more reliably. Users can pass
``--without-frame-pointers`` to opt out.
149 changes: 133 additions & 16 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,40 @@

static const uintptr_t min_frame_pointer_addr = 0x1000;

#ifdef __s390x__
// Linux's s390 "Stack Frame Layout" table documents that z/Architecture
// backchain frames start with the backchain at offset 0 and store "saved r14
// of caller function" at offset 112. The same document's register table
// identifies r14 as the return-address register, so this backchain unwinder
// reads the return address from fp + 112.
// https://www.kernel.org/doc/html/v5.3/s390/debugging390.html#stack-frame-layout
//
// This is only for Linux s390x backchain frames. The s390x ELF ABI does not
// generally mandate where RA and FP are saved, or whether they are saved at all.
// https://sourceware.org/binutils/docs/sframe-spec.html#s390x
# define S390X_FRAME_RETURN_ADDRESS_OFFSET 112
#endif

// The generic manual unwinder treats the frame pointer as a two-word record:
// fp[0] is the previous frame pointer and fp[1] is the return address. That is
// not true for every architecture, even with frame pointers enabled, so these
// offsets describe the actual slots used by each supported frame layout.
#if defined(__arm__) && !defined(__thumb__) && !defined(__clang__)
// GCC ARM mode keeps the caller's fp one word below fp and the saved LR at
// fp[0], so the return address is not in the generic fp[1] slot.
# define FRAME_POINTER_NEXT_OFFSET (-1)
# define FRAME_POINTER_RETURN_OFFSET 0
#elif defined(__s390x__)
// s390x backchain frames keep the previous frame pointer at fp[0], but save the
// return-address register in the ABI register save area rather than fp[1].
# define FRAME_POINTER_NEXT_OFFSET 0
# define FRAME_POINTER_RETURN_OFFSET \
(S390X_FRAME_RETURN_ADDRESS_OFFSET / (Py_ssize_t)sizeof(uintptr_t))
#else
# define FRAME_POINTER_NEXT_OFFSET 0
# define FRAME_POINTER_RETURN_OFFSET 1
#endif


static PyObject *
_get_current_module(void)
Expand Down Expand Up @@ -325,16 +359,97 @@ get_jit_backend(PyObject *self, PyObject *Py_UNUSED(args))
#endif
}

static int
stack_address_is_valid(uintptr_t addr, uintptr_t stack_min, uintptr_t stack_max)
{
if (addr < min_frame_pointer_addr) {
return 0;
}
if (stack_min != 0 && (addr < stack_min || addr >= stack_max)) {
return 0;
}
return 1;
}

static int
frame_pointer_slot_is_valid(uintptr_t *frame_pointer, Py_ssize_t offset,
uintptr_t stack_min, uintptr_t stack_max)
{
uintptr_t fp_addr = (uintptr_t)frame_pointer;
uintptr_t slot_addr;
uintptr_t delta = (uintptr_t)Py_ABS(offset) * sizeof(uintptr_t);
if (offset < 0) {
if (fp_addr < delta) {
return 0;
}
slot_addr = fp_addr - delta;
}
else {
if (fp_addr > UINTPTR_MAX - delta) {
return 0;
}
slot_addr = fp_addr + delta;
}
if (!stack_address_is_valid(slot_addr, stack_min, stack_max)) {
return 0;
}
if (stack_max != 0) {
if (slot_addr > UINTPTR_MAX - sizeof(uintptr_t)) {
return 0;
}
if (slot_addr + sizeof(uintptr_t) > stack_max) {
return 0;
}
}
return 1;
}

static int
next_frame_pointer_is_valid(uintptr_t *frame_pointer, uintptr_t *next_fp,
uintptr_t stack_min, uintptr_t stack_max)
{
uintptr_t fp_addr = (uintptr_t)frame_pointer;
uintptr_t next_addr = (uintptr_t)next_fp;
if (!stack_address_is_valid(next_addr, stack_min, stack_max)) {
return 0;
}
if ((next_addr % sizeof(uintptr_t)) != 0) {
return 0;
}
#if _Py_STACK_GROWS_DOWN
return next_addr > fp_addr;
#else
return next_addr < fp_addr;
#endif
}

static PyObject *
manual_unwind_from_fp(uintptr_t *frame_pointer)
{
Py_ssize_t max_depth = 200;
int stack_grows_down = _Py_STACK_GROWS_DOWN;
uintptr_t stack_min = 0;
uintptr_t stack_max = 0;

#ifdef __s390x__
Py_BUILD_ASSERT(S390X_FRAME_RETURN_ADDRESS_OFFSET % sizeof(uintptr_t) == 0);
#endif

if (frame_pointer == NULL) {
return PyList_New(0);
}

PyThreadState *tstate = _PyThreadState_GET();
if (tstate != NULL) {
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
#if _Py_STACK_GROWS_DOWN
stack_min = tstate_impl->c_stack_hard_limit;
stack_max = tstate_impl->c_stack_top;
#else
stack_min = tstate_impl->c_stack_top;
stack_max = tstate_impl->c_stack_hard_limit;
#endif
}

PyObject *result = PyList_New(0);
if (result == NULL) {
return NULL;
Expand All @@ -348,7 +463,21 @@ manual_unwind_from_fp(uintptr_t *frame_pointer)
if ((fp_addr % sizeof(uintptr_t)) != 0) {
break;
}
uintptr_t return_addr = frame_pointer[1];
if (!stack_address_is_valid(fp_addr, stack_min, stack_max)) {
break;
}
if (!frame_pointer_slot_is_valid(frame_pointer,
FRAME_POINTER_NEXT_OFFSET,
stack_min, stack_max)) {
break;
}
if (!frame_pointer_slot_is_valid(frame_pointer,
FRAME_POINTER_RETURN_OFFSET,
stack_min, stack_max)) {
break;
}
uintptr_t *next_fp = (uintptr_t *)frame_pointer[FRAME_POINTER_NEXT_OFFSET];
uintptr_t return_addr = frame_pointer[FRAME_POINTER_RETURN_OFFSET];

PyObject *addr_obj = PyLong_FromUnsignedLongLong(return_addr);
if (addr_obj == NULL) {
Expand All @@ -362,22 +491,10 @@ manual_unwind_from_fp(uintptr_t *frame_pointer)
}
Py_DECREF(addr_obj);

uintptr_t *next_fp = (uintptr_t *)frame_pointer[0];
// Stop if the frame pointer is extremely low.
if ((uintptr_t)next_fp < min_frame_pointer_addr) {
if (!next_frame_pointer_is_valid(frame_pointer, next_fp,
stack_min, stack_max)) {
break;
}
uintptr_t next_addr = (uintptr_t)next_fp;
if (stack_grows_down) {
if (next_addr <= fp_addr) {
break;
}
}
else {
if (next_addr >= fp_addr) {
break;
}
}
frame_pointer = next_fp;
}

Expand Down
100 changes: 100 additions & 0 deletions configure

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2548,6 +2548,16 @@ AS_VAR_IF([ac_cv_gcc_compat], [yes], [
AX_CHECK_COMPILE_FLAG([-mno-omit-leaf-frame-pointer], [
frame_pointer_cflags="$frame_pointer_cflags -mno-omit-leaf-frame-pointer"
], [], [-Werror])
AS_CASE([$host_cpu], [arm|armv*], [
AX_CHECK_COMPILE_FLAG([-marm], [
frame_pointer_cflags="$frame_pointer_cflags -marm"
], [], [-Werror])
])
AS_CASE([$host_cpu], [s390*], [
AX_CHECK_COMPILE_FLAG([-mbackchain], [
frame_pointer_cflags="$frame_pointer_cflags -mbackchain"
], [], [-Werror])
])
], [], [-Werror])
if test -n "$frame_pointer_cflags" && test "x$with_frame_pointers" != xno; then
BASECFLAGS="$frame_pointer_cflags $BASECFLAGS"
Expand Down
Loading