From 0aaf2098652ffd3d41997f0e4729621501975c83 Mon Sep 17 00:00:00 2001 From: Saul Cooperman Date: Wed, 20 May 2026 21:59:02 +0100 Subject: [PATCH 1/7] Re-implement in nanobind Signed-off-by: Saul Cooperman --- Dockerfile | 1 + src/pystack/__init__.py | 4 +- src/pystack/__main__.py | 29 +- src/pystack/_pystack.pyi | 10 + src/pystack/_pystack/CMakeLists.txt | 2 + src/pystack/_pystack/bindings.cpp | 180 +++++- src/pystack/_pystack/interpreter.cpp | 36 ++ src/pystack/_pystack/interpreter.h | 24 + src/pystack/_pystack/native_frame.cpp | 88 +++ src/pystack/_pystack/native_frame.h | 15 + src/pystack/_pystack/process.cpp | 7 + src/pystack/_pystack/process.h | 1 + src/pystack/_pystack/pythread.cpp | 46 +- src/pystack/_pystack/pythread.h | 4 + src/pystack/_pystack/thread_builder.cpp | 19 +- src/pystack/_pystack/thread_builder.h | 6 +- src/pystack/_pystack/version.cpp | 45 +- src/pystack/_pystack/version.h | 1 + src/pystack/traceback_formatter.py | 36 +- src/pystack/types.py | 34 +- tests/integration/test_subinterpreters.py | 632 ++++++++++++++++++++++ tests/unit/test_main.py | 122 +++-- tests/unit/test_traceback_formatter.py | 4 +- tests/utils.py | 7 + 24 files changed, 1225 insertions(+), 128 deletions(-) create mode 100644 src/pystack/_pystack/interpreter.cpp create mode 100644 src/pystack/_pystack/interpreter.h create mode 100644 src/pystack/_pystack/native_frame.cpp create mode 100644 tests/integration/test_subinterpreters.py diff --git a/Dockerfile b/Dockerfile index c56e376c..3b938cd7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,6 +53,7 @@ RUN apt-get update \ file \ less \ libcrypt-dev \ + bear \ libzstd-dev \ liblzma-dev \ libbz2-dev \ diff --git a/src/pystack/__init__.py b/src/pystack/__init__.py index e973464d..96909732 100644 --- a/src/pystack/__init__.py +++ b/src/pystack/__init__.py @@ -1,7 +1,7 @@ from ._version import __version__ -from .traceback_formatter import print_thread +from .traceback_formatter import TracebackPrinter __all__ = [ "__version__", - "print_thread", + "TracebackPrinter", ] diff --git a/src/pystack/__main__.py b/src/pystack/__main__.py index a8f6a10e..365ddfac 100644 --- a/src/pystack/__main__.py +++ b/src/pystack/__main__.py @@ -8,6 +8,7 @@ from textwrap import dedent from typing import Any from typing import Dict +from typing import List from typing import NoReturn from typing import Optional from typing import Set @@ -17,9 +18,10 @@ from pystack.process import decompress_gzip from pystack.process import is_elf from pystack.process import is_gzip +from pystack.types import PyThread +from . import TracebackPrinter from . import errors -from . import print_thread from .colors import colored from .engine import CoreFileAnalyzer from .engine import NativeReportingMode @@ -283,18 +285,27 @@ def main() -> None: _exit_with_code(the_error) +def _include_subinterpreters(threads: List[PyThread]) -> bool: + return len(set(thread.interpreter_id for thread in threads)) > 1 + + def process_remote(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None: if not args.block and args.native_mode != NativeReportingMode.OFF: parser.error("Native traces are only available in blocking mode") - for thread in get_process_threads( + threads = get_process_threads( args.pid, stop_process=args.block, native_mode=args.native_mode, locals=args.locals, method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO, - ): - print_thread(thread, args.native_mode) + ) + + printer = TracebackPrinter( + args.native_mode, include_subinterpreters=_include_subinterpreters(threads) + ) + for thread in threads: + printer.print_thread(thread) def format_psinfo_information(psinfo: Dict[str, Any]) -> str: @@ -414,15 +425,19 @@ def process_core(parser: argparse.ArgumentParser, args: argparse.Namespace) -> N elf_id if elf_id else "", ) - for thread in get_process_threads_for_core( + threads = get_process_threads_for_core( corefile, executable, library_search_path=lib_search_path, native_mode=args.native_mode, locals=args.locals, method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO, - ): - print_thread(thread, args.native_mode) + ) + printer = TracebackPrinter( + args.native_mode, include_subinterpreters=_include_subinterpreters(threads) + ) + for thread in threads: + printer.print_thread(thread) if __name__ == "__main__": # pragma: no cover diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index 6a3ea6ff..1ab8867c 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -85,6 +85,16 @@ def get_bss_info(binary: Union[str, pathlib.Path]) -> Optional[Dict[str, Any]]: def copy_memory_from_address(pid: int, address: int, size: int) -> bytes: ... def _check_interpreter_shutdown(manager: ProcessManager) -> None: ... +class NativeFrameType(enum.Enum): + IGNORE = 0 + EVAL = 1 + OTHER = 3 + +def is_eval_frame(symbol: str, python_version: Tuple[int, int]) -> bool: ... +def frame_type( + symbol: str, python_version: Optional[Tuple[int, int]] = None +) -> NativeFrameType: ... + F = TypeVar("F", bound=Callable[..., Any]) def intercept_runtime_errors() -> Callable[[F], F]: ... diff --git a/src/pystack/_pystack/CMakeLists.txt b/src/pystack/_pystack/CMakeLists.txt index 3621e1fa..9a3042b5 100644 --- a/src/pystack/_pystack/CMakeLists.txt +++ b/src/pystack/_pystack/CMakeLists.txt @@ -11,6 +11,7 @@ set(PYSTACK_SOURCES logging.cpp maps_parser.cpp mem.cpp + native_frame.cpp process.cpp pycode.cpp pyframe.cpp @@ -21,6 +22,7 @@ set(PYSTACK_SOURCES version.cpp version_detector.cpp bindings.cpp + interpreter.cpp ) # Create the nanobind module diff --git a/src/pystack/_pystack/bindings.cpp b/src/pystack/_pystack/bindings.cpp index 9dfa8a4a..a74c432d 100644 --- a/src/pystack/_pystack/bindings.cpp +++ b/src/pystack/_pystack/bindings.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,14 +12,19 @@ #include #include +#include #include +#include #include +#include #include "corefile.h" #include "elf_common.h" +#include "interpreter.h" #include "logging.h" #include "maps_parser.h" #include "mem.h" +#include "native_frame.h" #include "process.h" #include "thread_builder.h" @@ -491,7 +497,8 @@ buildPyThreadObject( thread.gil_status, thread.gc_status, nb::make_tuple(python_version.first, python_version.second), - "name"_a = thread.name ? nb::cast(*thread.name) : nb::none()); + "name"_a = thread.name ? nb::cast(*thread.name) : nb::none(), + "interpreter_id"_a = thread.interpreter_id); } // Build a native-only thread object (no Python frames) @@ -534,6 +541,107 @@ logMemoryMaps(const std::vector& maps, const char* source) } } +std::vector +_slice_native_stack(std::vector data) +{ + // Capture a canonical + auto canonical_thread = + std::find_if(data.begin(), data.end(), [](const pystack::PyThreadData& py_thread_data) { + return !py_thread_data.native_frames.empty(); + }); + if (canonical_thread == data.end()) { + return data; + } + + // Capture canonical frames and python version + const std::vector canonical_frames = canonical_thread->native_frames; + const auto python_version = data[0].python_version; + + std::vector eval_index; + for (std::size_t i = 0; i < canonical_frames.size(); ++i) { + if (pystack::is_eval_frame(canonical_frames[i].symbol, python_version)) { + eval_index.push_back(i); + } + } + + const auto total_entry_frames = static_cast( + std::accumulate(data.begin(), data.end(), 0, [](int acc, const pystack::PyThreadData& d) { + return acc + + static_cast(std::count_if( + d.frames.begin(), + d.frames.end(), + [](const pystack::PyFrameData& frame) { return frame.is_entry; })); + })); + + if (eval_index.size() != total_entry_frames) { + return data; + } + + std::vector ordered_threads = std::move(data); + // Sort by: + // 1. With stack anchor (!=0) before without + // 2. Stack anchor in descending order + // 3. Index in PyThreadData vec (handled by stable_sort) + std::stable_sort( + ordered_threads.begin(), + ordered_threads.end(), + [](const pystack::PyThreadData& a, const pystack::PyThreadData& b) { + return std::make_tuple(a.stack_anchor == 0 ? 1 : 0, -a.stack_anchor) + < std::make_tuple(b.stack_anchor == 0 ? 1 : 0, -b.stack_anchor); + }); + + // Slice frames according to eval frames per python thread + std::size_t cursor = 0; + for (auto& thread_data : ordered_threads) { + const auto required_eval_frames = static_cast(std::count_if( + thread_data.frames.begin(), + thread_data.frames.end(), + [](const pystack::PyFrameData& py_frame) { return py_frame.is_entry; })); + + if (required_eval_frames == 0) { + continue; + } + + const std::size_t end = cursor + required_eval_frames; + const std::size_t from = eval_index[cursor]; + const std::size_t to = end < eval_index.size() ? eval_index[end] : canonical_frames.size(); + thread_data.native_frames.assign(canonical_frames.begin() + from, canonical_frames.begin() + to); + cursor = end; + } + return ordered_threads; +} + +std::vector +_normalize_threads(std::vector threads, NativeReportingMode native_mode) +{ + if (native_mode == NativeReportingMode::OFF) { + return threads; + } + + // First pass: bucket threads by TID (capture index only) + std::unordered_map> indices_by_tid; + for (std::size_t i = 0; i < threads.size(); ++i) { + indices_by_tid[threads[i].tid].push_back(i); + } + + // Second pass: for groups that share a TID, slice native stacks. + for (auto& [_, indices] : indices_by_tid) { + if (indices.size() <= 1) { + continue; + } + std::vector group; + for (const std::size_t idx : indices) { + group.push_back(std::move(threads[idx])); + } + auto sliced = _slice_native_stack(std::move(group)); + for (std::size_t i = 0; i < indices.size(); ++i) { + threads[indices[i]] = std::move(sliced[i]); + } + } + + return threads; +} + nb::object get_process_threads( pid_t pid, @@ -571,21 +679,28 @@ get_process_threads( } else { python_version = manager->python_version(); std::vector all_tids = pystack::getThreadIds(manager->get_manager()); - - if (head != 0) { - bool add_native = native_mode != NativeReportingMode::OFF; - python_threads = pystack::buildThreadsFromInterpreter( - manager->get_manager(), - head, - pid, - add_native, - locals); - - for (const auto& thread : python_threads) { + bool add_native = native_mode != NativeReportingMode::OFF; + + while (head) { + std::vector new_threads = + pystack::buildThreadsFromInterpreter( + manager->get_manager(), + head, + pid, + add_native, + locals); + + for (const auto& thread : new_threads) { all_tids.erase( std::remove(all_tids.begin(), all_tids.end(), thread.tid), all_tids.end()); } + python_threads.insert( + python_threads.end(), + std::make_move_iterator(new_threads.begin()), + std::make_move_iterator(new_threads.end())); + + head = pystack::InterpreterUtils::getNextInterpreter(manager->get_manager(), head); } if (native_mode == NativeReportingMode::ALL) { @@ -606,7 +721,7 @@ get_process_threads( } nb::list result; - for (const auto& thread : python_threads) { + for (const auto& thread : _normalize_threads(python_threads, native_mode)) { result.append(buildPyThreadObject(thread, types, python_version)); } for (const auto& thread : native_only_threads) { @@ -651,11 +766,11 @@ get_process_threads_for_core( } nb::list result; + std::vector ret_cpp; std::vector all_tids = pystack::getThreadIds(manager->get_manager()); + bool add_native = native_mode != NativeReportingMode::OFF; - if (head != 0) { - bool add_native = native_mode == NativeReportingMode::PYTHON - || native_mode == NativeReportingMode::ALL; + while (head) { auto threads = pystack::buildThreadsFromInterpreter( manager->get_manager(), head, @@ -664,11 +779,20 @@ get_process_threads_for_core( locals); for (const auto& thread : threads) { - result.append(buildPyThreadObject(thread, types, manager->python_version())); all_tids.erase( std::remove(all_tids.begin(), all_tids.end(), thread.tid), all_tids.end()); } + ret_cpp.insert( + ret_cpp.end(), + std::make_move_iterator(threads.begin()), + std::make_move_iterator(threads.end())); + + head = pystack::InterpreterUtils::getNextInterpreter(manager->get_manager(), head); + } + + for (const auto& thread : _normalize_threads(ret_cpp, native_mode)) { + result.append(buildPyThreadObject(thread, types, manager->python_version())); } if (native_mode == NativeReportingMode::ALL) { @@ -863,4 +987,26 @@ NB_MODULE(_pystack, m) // intercept_runtime_errors decorator - re-export from pystack.errors nb::module_ pystack_errors = nb::module_::import_("pystack.errors"); m.attr("intercept_runtime_errors") = pystack_errors.attr("intercept_runtime_errors"); + + nb::enum_(m, "NativeFrameType") + .value("IGNORE", pystack::NativeFrame::FrameType::IGNORE) + .value("EVAL", pystack::NativeFrame::FrameType::EVAL) + .value("OTHER", pystack::NativeFrame::FrameType::OTHER); + + m.def("is_eval_frame", + &pystack::is_eval_frame, + "symbol"_a, + "python_version"_a, + "Return True if the symbol is a CPython eval frame function"); + + m.def( + "frame_type", + [](const std::string& symbol, std::optional> python_version) { + pystack::NativeFrame frame{}; + frame.symbol = symbol; + return pystack::frame_type(frame, python_version); + }, + "symbol"_a, + "python_version"_a = nb::none(), + "Return the FrameType for a native frame symbol"); } diff --git a/src/pystack/_pystack/interpreter.cpp b/src/pystack/_pystack/interpreter.cpp new file mode 100644 index 00000000..4f52e043 --- /dev/null +++ b/src/pystack/_pystack/interpreter.cpp @@ -0,0 +1,36 @@ +#include + +#include "interpreter.h" +#include "logging.h" +#include "process.h" +#include "structure.h" +#include "version.h" + +namespace pystack { + +remote_addr_t +InterpreterUtils::getNextInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr) +{ + Structure is(manager, interpreter_addr); + return is.getField(&py_is_v::o_next); +} + +int64_t +InterpreterUtils::getInterpreterId( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr) +{ + if (!manager->versionIsAtLeast(3, 7)) { + // No support for subinterpreters so the only interpreter is ID 0. + return 0; + } + + Structure is(manager, interpreter_addr); + int64_t id_value = is.getField(&py_is_v::o_id); + + return id_value; +} + +} // namespace pystack diff --git a/src/pystack/_pystack/interpreter.h b/src/pystack/_pystack/interpreter.h new file mode 100644 index 00000000..0138ff84 --- /dev/null +++ b/src/pystack/_pystack/interpreter.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +#include "mem.h" +#include "process.h" + +namespace pystack { + +class InterpreterUtils +{ + public: + // Static Methods + static remote_addr_t getNextInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr); + + static int64_t getInterpreterId( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr); +}; + +} // namespace pystack diff --git a/src/pystack/_pystack/native_frame.cpp b/src/pystack/_pystack/native_frame.cpp new file mode 100644 index 00000000..0a793cfb --- /dev/null +++ b/src/pystack/_pystack/native_frame.cpp @@ -0,0 +1,88 @@ +#include "native_frame.h" + +#include +#include +#include + +namespace pystack { + +static const std::set SYMBOL_IGNORELIST = { + "PyObject_Call", + "call_function", + "classmethoddescr_call", + "cmpwrapper_call", + "fast_function", + "function_call", + "instance_call", + "instancemethod_call", + "methoddescr_call", + "proxy_call", + "slot_tp_call", + "type_call", + "weakref_call", + "wrap_call", + "wrapper_call", + "wrapperdescr_call", + "do_call_core", +}; + +static bool +starts_with(const std::string& str, const std::string& prefix) +{ + return str.rfind(prefix, 0) == 0; +} + +bool +is_eval_frame(const std::string& symbol, std::pair python_version) +{ + if (python_version < std::make_pair(3, 6)) { + return symbol.find("PyEval_EvalFrameEx") != std::string::npos; + } + if (symbol.find("_PyEval_EvalFrameDefault") != std::string::npos) { + return true; + } + // Python 3.14 tail call interpreter uses LLVM-generated functions + if (starts_with(symbol, "_TAIL_CALL_") && symbol.find(".llvm.") != std::string::npos) { + return true; + } + // Python 3.15+ tail call interpreter drops the .llvm. suffix + if (python_version >= std::make_pair(3, 15) and starts_with(symbol, "_TAIL_CALL_")) { + return true; + } + return false; +} + +NativeFrame::FrameType +frame_type(const NativeFrame& frame, std::optional> python_version) +{ + const std::string& symbol = frame.symbol; + + if (python_version && is_eval_frame(symbol, *python_version)) { + return NativeFrame::FrameType::EVAL; + } + if (starts_with(symbol, "PyEval") || starts_with(symbol, "_PyEval")) { + return NativeFrame::FrameType::IGNORE; + } + if (starts_with(symbol, "_Py")) { + return NativeFrame::FrameType::IGNORE; + } + if (starts_with(symbol, "_TAIL_CALL_")) { + return NativeFrame::FrameType::IGNORE; + } + if (python_version && *python_version >= std::make_pair(3, 8)) { + std::string lower = symbol; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + if (lower.find("vectorcall") != std::string::npos) { + return NativeFrame::FrameType::IGNORE; + } + } + for (const auto& ignored : SYMBOL_IGNORELIST) { + if (starts_with(symbol, ignored)) { + return NativeFrame::FrameType::IGNORE; + } + } + + return NativeFrame::FrameType::OTHER; +} + +} // namespace pystack diff --git a/src/pystack/_pystack/native_frame.h b/src/pystack/_pystack/native_frame.h index f98118a8..66496d5f 100644 --- a/src/pystack/_pystack/native_frame.h +++ b/src/pystack/_pystack/native_frame.h @@ -1,12 +1,20 @@ #pragma once +#include #include +#include namespace pystack { // The reason this is a struct is so Cython can easily generate // automatic conversions without explicit code. struct NativeFrame { + enum class FrameType { + IGNORE = 0, + EVAL = 1, + OTHER = 3, + }; + unsigned long address; std::string symbol; std::string path; @@ -14,4 +22,11 @@ struct NativeFrame int colnumber; std::string library; }; + +bool +is_eval_frame(const std::string& symbol, std::pair python_version); + +NativeFrame::FrameType +frame_type(const NativeFrame& frame, std::optional> python_version); + } // namespace pystack diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 9aac1b2c..af24a76f 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -999,6 +999,7 @@ AbstractProcessManager::copyDebugOffsets(Structure& py_runtime, py set_offset(py_is.o_sysdict, &py_runtime_v::o_dbg_off_interpreter_state_sysdict); set_offset(py_is.o_builtins, &py_runtime_v::o_dbg_off_interpreter_state_builtins); set_offset(py_is.o_gil_runtime_state, &py_runtime_v::o_dbg_off_interpreter_state_ceval_gil); + set_offset(py_is.o_id, &py_runtime_v::o_dbg_off_interpreter_state_id); set_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size); set_offset(py_thread.o_prev, &py_runtime_v::o_dbg_off_thread_state_prev); @@ -1474,6 +1475,12 @@ ProcessManager::Tids() const return d_tids; } +std::pair +AbstractProcessManager::pythonVersion() const +{ + return {d_major, d_minor}; +} + std::shared_ptr CoreFileProcessManager::create( const std::string& core_file, diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index 0f52ff3c..965a0d4b 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -98,6 +98,7 @@ class AbstractProcessManager : public std::enable_shared_from_this& version); bool versionIsAtLeast(int required_major, int required_minor) const; + std::pair pythonVersion() const; bool isFreeThreaded() const; const python_v& offsets() const; diff --git a/src/pystack/_pystack/pythread.cpp b/src/pystack/_pystack/pythread.cpp index 3607a791..54120453 100644 --- a/src/pystack/_pystack/pythread.cpp +++ b/src/pystack/_pystack/pythread.cpp @@ -129,6 +129,48 @@ findPthreadTidOffset( return 0; } +remote_addr_t +getStackAnchor(const std::shared_ptr& manager, remote_addr_t frame_addr) +{ + if (!frame_addr) { + return 0; + } + if (!manager->versionIsAtLeast(3, 12)) { + return frame_addr; + } + + remote_addr_t current_addr = frame_addr; + for (int i = 0; i < 4096 && current_addr; ++i) { + Structure current_frame(manager, current_addr); + auto owner = current_frame.getField(&py_frame_v::o_owner); + + if (manager->versionIsAtLeast(3, 14)) { + if (owner == Python3_14::FRAME_OWNED_BY_INTERPRETER + || owner == Python3_14::FRAME_OWNED_BY_CSTACK) + { + return current_addr; + } + } else { + if (owner == Python3_12::FRAME_OWNED_BY_CSTACK) { + return current_addr; + } + } + + remote_addr_t next_addr = current_frame.getField(&py_frame_v::o_back); + if (next_addr == current_addr) { + break; + } + current_addr = next_addr; + } + return frame_addr; +} + +remote_addr_t +PyThread::stackAnchor() const +{ + return d_stack_anchor; +} + PyThread::PyThread(const std::shared_ptr& manager, remote_addr_t addr) : Thread(0, 0) { @@ -142,6 +184,8 @@ PyThread::PyThread(const std::shared_ptr& manager, LOG(DEBUG) << std::hex << std::showbase << "Attempting to construct frame from address " << frame_addr; d_first_frame = std::make_unique(manager, frame_addr, 0); + + d_stack_anchor = getStackAnchor(manager, frame_addr); } d_addr = addr; @@ -366,7 +410,7 @@ getThreadFromInterpreterState( const std::shared_ptr& manager, remote_addr_t addr) { - if (tid_offset_in_pthread_struct == 0) { + if (tid_offset_in_pthread_struct == 0 && !manager->versionIsAtLeast(3, 11)) { tid_offset_in_pthread_struct = findPthreadTidOffset(manager, addr); } diff --git a/src/pystack/_pystack/pythread.h b/src/pystack/_pystack/pythread.h index ab02c672..3938dcf8 100644 --- a/src/pystack/_pystack/pythread.h +++ b/src/pystack/_pystack/pythread.h @@ -17,6 +17,7 @@ class Thread Thread(pid_t pid, pid_t tid); pid_t Tid() const; const std::vector& NativeFrames() const; + remote_addr_t stack_anchor; // Methods void populateNativeStackTrace(const std::shared_ptr& manager); @@ -45,6 +46,7 @@ class PyThread : public Thread // Methods GilStatus isGilHolder() const; GCStatus isGCCollecting() const; + remote_addr_t stackAnchor() const; // Static Methods static remote_addr_t getFrameAddr( @@ -60,6 +62,8 @@ class PyThread : public Thread remote_addr_t d_next_addr; std::shared_ptr d_next; std::shared_ptr d_first_frame; + remote_addr_t d_stack_anchor; + int interpreter_id; // Methods GilStatus calculateGilStatus( diff --git a/src/pystack/_pystack/thread_builder.cpp b/src/pystack/_pystack/thread_builder.cpp index 629e502a..406f4a43 100644 --- a/src/pystack/_pystack/thread_builder.cpp +++ b/src/pystack/_pystack/thread_builder.cpp @@ -1,7 +1,9 @@ #include "thread_builder.h" +#include "interpreter.h" #include "logging.h" #include "maps_parser.h" +#include namespace pystack { @@ -58,7 +60,8 @@ buildPythonThread( PyThread* thread, pid_t pid, bool add_native_traces, - bool resolve_locals) + bool resolve_locals, + int64_t interpreter_id) { PyThreadData data; data.tid = thread->Tid(); @@ -80,6 +83,9 @@ buildPythonThread( data.gil_status = static_cast(thread->isGilHolder()); data.gc_status = static_cast(thread->isGCCollecting()); + data.interpreter_id = interpreter_id; + data.python_version = manager->pythonVersion(); + data.stack_anchor = thread->stackAnchor(); return data; } @@ -100,6 +106,7 @@ buildNativeThread(const std::shared_ptr& manager, pid_t const auto& native_frames = native_thread.NativeFrames(); data.native_frames.assign(native_frames.rbegin(), native_frames.rend()); + data.python_version = manager->pythonVersion(); return data; } @@ -116,11 +123,17 @@ buildThreadsFromInterpreter( std::vector threads; auto thread = getThreadFromInterpreterState(manager, interpreter_head); + int64_t interpreter_id = InterpreterUtils::getInterpreterId(manager, interpreter_head); PyThread* current_thread = thread.get(); while (current_thread != nullptr) { - threads.push_back( - buildPythonThread(manager, current_thread, pid, add_native_traces, resolve_locals)); + threads.push_back(buildPythonThread( + manager, + current_thread, + pid, + add_native_traces, + resolve_locals, + interpreter_id)); auto next = current_thread->NextThread(); current_thread = next.get(); diff --git a/src/pystack/_pystack/thread_builder.h b/src/pystack/_pystack/thread_builder.h index ac431387..d1f7424f 100644 --- a/src/pystack/_pystack/thread_builder.h +++ b/src/pystack/_pystack/thread_builder.h @@ -39,6 +39,9 @@ struct PyThreadData std::vector native_frames; int gil_status; // -1 = unknown, 0 = not held, 1 = held int gc_status; // -1 = unknown, 0 = not collecting, 1 = collecting + int64_t interpreter_id; + std::pair python_version; + remote_addr_t stack_anchor; }; std::vector @@ -55,7 +58,8 @@ buildPythonThread( PyThread* thread, pid_t pid, bool add_native_traces, - bool resolve_locals); + bool resolve_locals, + int64_t interpreter_id); PyThreadData buildNativeThread(const std::shared_ptr& manager, pid_t pid, pid_t tid); diff --git a/src/pystack/_pystack/version.cpp b/src/pystack/_pystack/version.cpp index 9f0733d6..ab7fe48d 100644 --- a/src/pystack/_pystack/version.cpp +++ b/src/pystack/_pystack/version.cpp @@ -179,6 +179,23 @@ py_is() }; } +template +constexpr py_is_v +py_isv37() +{ + return { + sizeof(T), + {offsetof(T, next)}, + {offsetof(T, tstate_head)}, + {offsetof(T, gc)}, + {offsetof(T, modules)}, + {offsetof(T, sysdict)}, + {offsetof(T, builtins)}, + {0}, + {offsetof(T, id)}, + }; +} + template constexpr py_is_v py_isv311() @@ -191,6 +208,8 @@ py_isv311() {offsetof(T, modules)}, {offsetof(T, sysdict)}, {offsetof(T, builtins)}, + {0}, + {offsetof(T, id)}, }; } @@ -207,6 +226,24 @@ py_isv312() {offsetof(T, sysdict)}, {offsetof(T, builtins)}, {offsetof(T, ceval.gil)}, + {offsetof(T, id)}, + }; +} + +template +constexpr py_is_v +py_isv314() +{ + return { + sizeof(T), + {offsetof(T, next)}, + {offsetof(T, threads.head)}, + {offsetof(T, gc)}, + {offsetof(T, imports.modules)}, + {offsetof(T, sysdict)}, + {offsetof(T, builtins)}, + {offsetof(T, _gil)}, + {offsetof(T, id)}, }; } @@ -664,7 +701,7 @@ python_v python_v3_7 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; @@ -686,7 +723,7 @@ python_v python_v3_8 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; @@ -708,7 +745,7 @@ python_v python_v3_9 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; @@ -730,7 +767,7 @@ python_v python_v3_10 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; diff --git a/src/pystack/_pystack/version.h b/src/pystack/_pystack/version.h index c56851ac..d9b2b2de 100644 --- a/src/pystack/_pystack/version.h +++ b/src/pystack/_pystack/version.h @@ -241,6 +241,7 @@ struct py_is_v FieldOffset o_sysdict; FieldOffset o_builtins; FieldOffset o_gil_runtime_state; + FieldOffset o_id; }; struct py_gc_v diff --git a/src/pystack/traceback_formatter.py b/src/pystack/traceback_formatter.py index d6ca8442..346dc474 100644 --- a/src/pystack/traceback_formatter.py +++ b/src/pystack/traceback_formatter.py @@ -12,9 +12,39 @@ from .types import frame_type -def print_thread(thread: PyThread, native_mode: NativeReportingMode) -> None: - for line in format_thread(thread, native_mode): - print(line, file=sys.stdout, flush=True) +class TracebackPrinter: + def __init__( + self, native_mode: NativeReportingMode, include_subinterpreters: bool = False + ): + self.native_mode = native_mode + self.include_subinterpreters = include_subinterpreters + self._current_interpreter_id: Optional[int] = None + self._first_print_sentinel = True + + def print_thread(self, thread: PyThread) -> None: + # Print interpreter header if we've switched interpreters + if self.include_subinterpreters: + if ( + thread.interpreter_id != self._current_interpreter_id + or self._first_print_sentinel + ): + self._print_interpreter_header(thread.interpreter_id) + self._current_interpreter_id = thread.interpreter_id + self._first_print_sentinel = False + + # Print the thread with indentation + for line in format_thread(thread, self.native_mode): + if self.include_subinterpreters: + print(" " * 2, end="") + print(line, file=sys.stdout, flush=True) + + def _print_interpreter_header(self, interpreter_id: Optional[int]) -> None: + header = ( + f"Interpreter-{interpreter_id if interpreter_id is not None else 'Unknown'}" + ) + if interpreter_id == 0: + header += " (main)" + print(header, file=sys.stdout, flush=True) def format_frame(frame: PyFrame) -> Iterable[str]: diff --git a/src/pystack/types.py b/src/pystack/types.py index c0f72011..0a33fe0d 100644 --- a/src/pystack/types.py +++ b/src/pystack/types.py @@ -7,6 +7,8 @@ from typing import Optional from typing import Tuple +from ._pystack import frame_type as _frame_type_cpp + SYMBOL_IGNORELIST = { "PyObject_Call", "call_function", @@ -44,38 +46,11 @@ class FrameType(enum.Enum): OTHER = 3 -def _is_eval_frame(symbol: str, python_version: Tuple[int, int]) -> bool: - if python_version < (3, 6): - return "PyEval_EvalFrameEx" in symbol - if "_PyEval_EvalFrameDefault" in symbol: - return True - # Python 3.14 tail call interpreter uses LLVM-generated functions - if symbol.startswith("_TAIL_CALL_") and ".llvm." in symbol: - return True - # Python 3.15+ tail call interpreter drops the .llvm. suffix - if python_version >= (3, 15) and symbol.startswith("_TAIL_CALL_"): - return True - return False - - def frame_type( frame: NativeFrame, python_version: Optional[Tuple[int, int]] ) -> NativeFrame.FrameType: - symbol = frame.symbol - if python_version and _is_eval_frame(symbol, python_version): - return frame.FrameType.EVAL - if symbol.startswith("PyEval") or symbol.startswith("_PyEval"): - return frame.FrameType.IGNORE - if symbol.startswith("_Py"): - return frame.FrameType.IGNORE - if symbol.startswith("_TAIL_CALL_"): - return frame.FrameType.IGNORE - if python_version and python_version >= (3, 8) and "vectorcall" in symbol.lower(): - return frame.FrameType.IGNORE - if any(symbol.startswith(ignored_symbol) for ignored_symbol in SYMBOL_IGNORELIST): - return frame.FrameType.IGNORE - - return frame.FrameType.OTHER + result = _frame_type_cpp(frame.symbol, python_version) + return NativeFrame.FrameType(result.value) class LocationInfo(NamedTuple): @@ -119,6 +94,7 @@ class PyThread: is_gc_collecting: int python_version: Optional[Tuple[int, int]] name: Optional[str] = None + interpreter_id: Optional[int] = None @property def frames(self) -> Iterable[PyFrame]: diff --git a/tests/integration/test_subinterpreters.py b/tests/integration/test_subinterpreters.py new file mode 100644 index 00000000..6554ff7d --- /dev/null +++ b/tests/integration/test_subinterpreters.py @@ -0,0 +1,632 @@ +import io +import subprocess +import time +from collections import Counter +from contextlib import redirect_stdout +from pathlib import Path +from typing import Dict +from typing import List +from typing import Set + +import pytest + +from pystack.engine import NativeReportingMode +from pystack.engine import StackMethod +from pystack.engine import get_process_threads +from pystack.engine import get_process_threads_for_core +from pystack.traceback_formatter import TracebackPrinter +from pystack.types import NativeFrame +from pystack.types import frame_type +from tests.utils import ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +from tests.utils import generate_core_file +from tests.utils import spawn_child_process + +NUM_INTERPRETERS = 3 +NUM_INTERPRETERS_WITH_THREADS = 2 +NUM_THREADS_PER_SUBINTERPRETER = 2 + +# Compatibility shim so test programs work on both 3.13 (_interpreters) +# and 3.14+ (concurrent.interpreters). +_INTERPRETERS_SHIM = """\ +import sys as _sys +try: + from concurrent import interpreters +except ImportError: + import _interpreters as _raw + class _W: + def __init__(self, id): + self.id = id + def exec(self, code): + _raw.exec(self.id, code) + class interpreters: + @staticmethod + def create(): + return _W(_raw.create()) + Interpreter = _W +""" + +PROGRAM = f"""\ +import sys +import threading +import time + +{_INTERPRETERS_SHIM} + +NUM_INTERPRETERS = {NUM_INTERPRETERS} + + +def start_interpreter_async(interp, code): + t = threading.Thread(target=interp.exec, args=(code,)) + t.daemon = True + t.start() + return t + + +CODE = '''\\ +import time +while True: + time.sleep(1) +''' + +threads = [] +for _ in range(NUM_INTERPRETERS): + interp = interpreters.create() + t = start_interpreter_async(interp, CODE) + threads.append(t) + +# Give sub-interpreters time to start executing +time.sleep(1) + +fifo = sys.argv[1] +with open(fifo, "w") as f: + f.write("ready") + +while True: + time.sleep(1) +""" + + +PROGRAM_WITH_THREADS = f"""\ +import sys +import threading +import time + +{_INTERPRETERS_SHIM} + +NUM_INTERPRETERS = {NUM_INTERPRETERS_WITH_THREADS} + + +def start_interpreter_async(interp, code): + t = threading.Thread(target=interp.exec, args=(code,)) + t.daemon = True + t.start() + return t + + +CODE = '''\\ +import threading +import time + +NUM_THREADS = {NUM_THREADS_PER_SUBINTERPRETER} + +def worker(): + while True: + time.sleep(1) + +threads = [] +for _ in range(NUM_THREADS): + t = threading.Thread(target=worker) + # daemon threads are disabled in isolated subinterpreters + t.start() + threads.append(t) + +while True: + time.sleep(1) +''' + +threads = [] +for _ in range(NUM_INTERPRETERS): + interp = interpreters.create() + t = start_interpreter_async(interp, CODE) + threads.append(t) + +# Give sub-interpreters and their internal workers time to start. +time.sleep(2) + +fifo = sys.argv[1] +with open(fifo, "w") as f: + f.write("ready") + +while True: + time.sleep(1) +""" + +PROGRAM_NESTED_SAME_THREAD = ( + """\ +import sys +import threading +import time + +""" + + _INTERPRETERS_SHIM + + """ +_SHIM = '''""" + + _INTERPRETERS_SHIM + + """''' + +fifo = sys.argv[1] + +interp_outer = interpreters.create() +interp_inner = interpreters.create() + +inner_code = f'''\\ +import time +with open({fifo!r}, "w") as f: + f.write("ready") +while True: + time.sleep(1) +''' +outer_code = _SHIM + f''' +interpreters.Interpreter({{inner_id}}).exec({{inner_code!r}}) +'''.format(inner_id=interp_inner.id, inner_code=inner_code) + +t = threading.Thread(target=interp_outer.exec, args=(outer_code,)) +t.daemon = True +t.start() + +while True: + time.sleep(1) +""" +) + +PROGRAM_TWO_THREADS_THREE_SUBINTERPRETERS_EACH = ( + """\ +import sys +import threading +import time +from pathlib import Path + +""" + + _INTERPRETERS_SHIM + + """ +_SHIM = '''""" + + _INTERPRETERS_SHIM + + """''' + +signal_file = Path(sys.argv[1]) + + +def make_level3_code(token): + return f'''\\ +import time +from pathlib import Path +Path({str(signal_file)!r}).open("a").write("{token}\\\\n") +while True: + time.sleep(1) +''' + + +def make_level2_code(interp3_id, level3_code): + return _SHIM + f''' +interpreters.Interpreter({interp3_id}).exec({level3_code!r}) +''' + + +def make_level1_code(interp2_id, level2_code): + return _SHIM + f''' +interpreters.Interpreter({interp2_id}).exec({level2_code!r}) +''' + + +def launch_chain(token): + interp1 = interpreters.create() + interp2 = interpreters.create() + interp3 = interpreters.create() + + level3_code = make_level3_code(token) + level2_code = make_level2_code(interp3.id, level3_code) + level1_code = make_level1_code(interp2.id, level2_code) + interp1.exec(level1_code) + + +t1 = threading.Thread(target=launch_chain, args=("chain1",), daemon=True) +t2 = threading.Thread(target=launch_chain, args=("chain2",), daemon=True) +t1.start() +t2.start() + +while True: + time.sleep(1) +""" +) + + +def _collect_threads( + python_executable: Path, + tmpdir: Path, + native_mode: NativeReportingMode = NativeReportingMode.OFF, +): + test_file = Path(str(tmpdir)) / "subinterpreters_program.py" + test_file.write_text(PROGRAM) + + with spawn_child_process( + str(python_executable), str(test_file), tmpdir + ) as child_process: + return list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=native_mode, + ) + ) + + +def _assert_interpreter_headers( + threads, + native_mode: NativeReportingMode, + interpreter_ids, +) -> str: + printer = TracebackPrinter( + native_mode=native_mode, + include_subinterpreters=True, + ) + output = io.StringIO() + with redirect_stdout(output): + for thread in threads: + printer.print_thread(thread) + + result = output.getvalue() + assert "Interpreter-0 (main)" in result + for interpreter_id in interpreter_ids: + if interpreter_id == 0: + continue + assert f"Interpreter-{interpreter_id}" in result + return result + + +def _count_threads_by_interpreter(threads): + return dict( + Counter( + thread.interpreter_id + for thread in threads + if thread.interpreter_id is not None + ) + ) + + +def _interpreter_ids(threads) -> Set[int]: + return { + thread.interpreter_id for thread in threads if thread.interpreter_id is not None + } + + +def _assert_subinterpreter_coverage(threads) -> Set[int]: + interpreter_ids = _interpreter_ids(threads) + assert 0 in interpreter_ids + assert len(interpreter_ids) == NUM_INTERPRETERS + 1 + return interpreter_ids + + +def _assert_native_eval_symbols(threads) -> None: + eval_frames = [ + frame + for thread in threads + for frame in thread.native_frames + if frame_type(frame, thread.python_version) == NativeFrame.FrameType.EVAL + ] + assert eval_frames + assert all("?" not in frame.symbol for frame in eval_frames) + if any(frame.linenumber == 0 for frame in eval_frames): # pragma: no cover + assert all(frame.linenumber == 0 for frame in eval_frames) + assert all(frame.path == "???" for frame in eval_frames) + else: # pragma: no cover + assert all(frame.linenumber != 0 for frame in eval_frames) + assert any(frame.path and "?" not in frame.path for frame in eval_frames) + + +def _assert_mergeable_same_tid_groups(threads) -> bool: + groups: Dict[int, List] = {} + for thread in threads: + groups.setdefault(thread.tid, []).append(thread) + + found_shared_tid = False + for group in groups.values(): + interpreter_ids = { + thread.interpreter_id + for thread in group + if thread.interpreter_id is not None + } + if len(group) < 2 or len(interpreter_ids) < 2: + continue + found_shared_tid = True + for thread in group: + eval_frames = [ + frame + for frame in thread.native_frames + if frame_type(frame, thread.python_version) + == NativeFrame.FrameType.EVAL + ] + entry_count = sum(frame.is_entry for frame in thread.all_frames) + assert len(eval_frames) == entry_count + return found_shared_tid + + +def _shared_tid_groups_with_min_interpreters(threads, min_interpreters): + groups = {} + for thread in threads: + groups.setdefault(thread.tid, []).append(thread) + + matching = [] + for tid, group in groups.items(): + interpreter_ids = { + thread.interpreter_id + for thread in group + if thread.interpreter_id is not None + } + if len(interpreter_ids) >= min_interpreters: + matching.append((tid, group)) + return matching + + +def _assert_strict_native_eval_symbols_for_group(group) -> None: + for thread in group: + eval_frames = [ + frame + for frame in thread.native_frames + if frame_type(frame, thread.python_version) == NativeFrame.FrameType.EVAL + ] + assert eval_frames + assert all("?" not in frame.symbol for frame in eval_frames) + if any(frame.linenumber == 0 for frame in eval_frames): + assert all(frame.linenumber == 0 for frame in eval_frames) + assert all(frame.path == "???" for frame in eval_frames) + else: + assert all(frame.linenumber != 0 for frame in eval_frames) + assert any(frame.path and "?" not in frame.path for frame in eval_frames) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters(python, tmpdir): + _, python_executable = python + + threads = _collect_threads( + python_executable=python_executable, + tmpdir=tmpdir, + native_mode=NativeReportingMode.OFF, + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert all(not thread.native_frames for thread in threads) + _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.OFF, + interpreter_ids=interpreter_ids, + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +@pytest.mark.parametrize( + "native_mode", + [ + NativeReportingMode.PYTHON, + NativeReportingMode.LAST, + NativeReportingMode.ALL, + ], + ids=["python", "last", "all"], +) +def test_subinterpreters_with_native(python, tmpdir, native_mode): + _, python_executable = python + + threads = _collect_threads( + python_executable=python_executable, + tmpdir=tmpdir, + native_mode=native_mode, + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + + output = _assert_interpreter_headers( + threads=threads, + native_mode=native_mode, + interpreter_ids=interpreter_ids, + ) + assert "(C)" in output or "Unable to merge native stack" in output + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_many_threads_with_native(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_with_threads_program.py" + test_file.write_text(PROGRAM_WITH_THREADS) + + with spawn_child_process(python_executable, test_file, tmpdir) as child_process: + threads = list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=NativeReportingMode.PYTHON, + method=StackMethod.DEBUG_OFFSETS, + ) + ) + + interpreter_ids = _interpreter_ids(threads) + assert 0 in interpreter_ids + assert len(interpreter_ids) == NUM_INTERPRETERS_WITH_THREADS + 1 + + counts_by_interpreter = _count_threads_by_interpreter(threads) + assert all( + counts_by_interpreter.get(interpreter_id, 0) >= 1 + for interpreter_id in interpreter_ids + ) + # At least one sub-interpreter should expose multiple Python threads. + assert any( + count > 1 + for interpreter_id, count in counts_by_interpreter.items() + if interpreter_id != 0 + ) + + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_nested_same_thread_with_native(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_nested_same_thread.py" + test_file.write_text(PROGRAM_NESTED_SAME_THREAD) + + with spawn_child_process(python_executable, test_file, tmpdir) as child_process: + threads = list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=NativeReportingMode.PYTHON, + method=StackMethod.DEBUG_OFFSETS, + ) + ) + + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + + has_shared_tid = _assert_mergeable_same_tid_groups(threads) + assert has_shared_tid + + output = _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.PYTHON, + interpreter_ids=_interpreter_ids(threads), + ) + assert ( + "Unable to merge native stack due to insufficient native information" + not in output + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_two_threads_three_per_thread_with_native(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_two_threads_three_each.py" + signal_file = Path(str(tmpdir)) / "subinterpreters_ready.txt" + signal_file.write_text("") + test_file.write_text(PROGRAM_TWO_THREADS_THREE_SUBINTERPRETERS_EACH) + + with subprocess.Popen( + [str(python_executable), str(test_file), str(signal_file)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) as child_process: + deadline = time.time() + 10 + while time.time() < deadline: + lines = [line for line in signal_file.read_text().splitlines() if line] + if len(lines) >= 2: + break + time.sleep(0.1) + else: + child_process.terminate() + child_process.kill() + raise AssertionError("Timed out waiting for nested subinterpreter chains") + + threads = list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=NativeReportingMode.PYTHON, + method=StackMethod.DEBUG_OFFSETS, + ) + ) + + child_process.terminate() + child_process.kill() + child_process.wait(timeout=5) + + groups = _shared_tid_groups_with_min_interpreters(threads, min_interpreters=3) + assert len(groups) >= 2 + + for _, group in groups: + _assert_strict_native_eval_symbols_for_group(group) + for thread in group: + eval_frames = [ + frame + for frame in thread.native_frames + if frame_type(frame, thread.python_version) + == NativeFrame.FrameType.EVAL + ] + entry_count = sum(frame.is_entry for frame in thread.all_frames) + assert len(eval_frames) == entry_count + assert len(eval_frames) > 0 + + output = _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.PYTHON, + interpreter_ids=_interpreter_ids(threads), + ) + assert ( + "Unable to merge native stack due to insufficient native information" + not in output + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_for_core(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_program.py" + test_file.write_text(PROGRAM) + + with generate_core_file(python_executable, test_file, tmpdir) as core_file: + threads = list( + get_process_threads_for_core( + core_file, + python_executable, + native_mode=NativeReportingMode.OFF, + ) + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert all(not thread.native_frames for thread in threads) + _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.OFF, + interpreter_ids=interpreter_ids, + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +@pytest.mark.parametrize( + "native_mode", + [ + NativeReportingMode.PYTHON, + NativeReportingMode.LAST, + NativeReportingMode.ALL, + ], + ids=["python", "last", "all"], +) +def test_subinterpreters_for_core_with_native(python, tmpdir, native_mode): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_program.py" + test_file.write_text(PROGRAM) + + with generate_core_file(python_executable, test_file, tmpdir) as core_file: + threads = list( + get_process_threads_for_core( + core_file, + python_executable, + native_mode=native_mode, + ) + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + output = _assert_interpreter_headers( + threads=threads, + native_mode=native_mode, + interpreter_ids=interpreter_ids, + ) + assert "(C)" in output or "Unable to merge native stack" in output diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index cbab172b..37039e93 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -190,7 +190,7 @@ def test_process_remote_default(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -205,8 +205,8 @@ def test_process_remote_default(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -221,7 +221,7 @@ def test_process_remote_no_block(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -236,8 +236,8 @@ def test_process_remote_no_block(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -260,7 +260,7 @@ def test_process_remote_native(argument, mode): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -275,7 +275,9 @@ def test_process_remote_native(argument, mode): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [call(thread, mode) for thread in threads] + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads + ] def test_process_remote_locals(): @@ -289,7 +291,7 @@ def test_process_remote_locals(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -304,8 +306,8 @@ def test_process_remote_locals(): locals=True, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -320,7 +322,7 @@ def test_process_remote_native_no_block(capsys): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -330,7 +332,7 @@ def test_process_remote_native_no_block(capsys): main() get_process_threads_mock.assert_not_called() - print_thread_mock.assert_not_called() + MockPrinter.assert_not_called() def test_process_remote_exhaustive(): @@ -344,7 +346,7 @@ def test_process_remote_exhaustive(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -359,8 +361,8 @@ def test_process_remote_exhaustive(): locals=False, method=StackMethod.ALL, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -376,7 +378,7 @@ def test_process_remote_error(exception, exval, capsys): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), ): @@ -388,7 +390,7 @@ def test_process_remote_error(exception, exval, capsys): # THEN get_process_threads_mock.assert_called_once() - print_thread_mock.assert_not_called() + MockPrinter.assert_not_called() capture = capsys.readouterr() assert "Oh no!" in capture.err @@ -406,7 +408,7 @@ def test_process_core_default_without_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.is_elf", return_value=True), @@ -429,8 +431,8 @@ def test_process_core_default_without_executable(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -453,7 +455,7 @@ def test_process_core_default_gzip_without_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.is_elf", return_value=True), @@ -479,8 +481,8 @@ def test_process_core_default_gzip_without_executable(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] gzip_open_mock.assert_called_with(Path("corefile.gz"), "rb") @@ -551,7 +553,7 @@ def test_process_core_default_with_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -571,8 +573,8 @@ def test_process_core_default_with_executable(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -597,7 +599,7 @@ def test_process_core_native(argument, mode): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -617,7 +619,9 @@ def test_process_core_native(argument, mode): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [call(thread, mode) for thread in threads] + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads + ] def test_process_core_locals(): @@ -633,7 +637,7 @@ def test_process_core_locals(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -653,8 +657,8 @@ def test_process_core_locals(): locals=True, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -678,7 +682,7 @@ def test_process_core_with_search_path(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -698,8 +702,8 @@ def test_process_core_with_search_path(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -716,7 +720,7 @@ def test_process_core_with_search_root(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -747,8 +751,8 @@ def test_process_core_with_search_root(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -761,7 +765,7 @@ def test_process_core_with_not_readable_search_root(): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -786,7 +790,7 @@ def test_process_core_with_invalid_search_root(): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -814,7 +818,7 @@ def path_exists(what): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch.object(Path, "exists", path_exists), ): @@ -826,7 +830,7 @@ def path_exists(what): # THEN get_process_threads_mock.assert_not_called() - print_thread_mock.assert_not_called() + MockPrinter.assert_not_called() def test_process_core_executable_does_not_exit(): @@ -845,7 +849,7 @@ def does_exit(what): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), patch.object(Path, "exists", does_exit), @@ -857,7 +861,7 @@ def does_exit(what): # THEN get_process_threads_mock.assert_not_called() - print_thread_mock.assert_not_called() + MockPrinter.assert_not_called() @pytest.mark.parametrize( @@ -874,7 +878,7 @@ def test_process_core_error(exception, exval, capsys): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -890,7 +894,7 @@ def test_process_core_error(exception, exval, capsys): # THEN get_process_threads_mock.assert_called_once() - print_thread_mock.assert_not_called() + MockPrinter.assert_not_called() capture = capsys.readouterr() assert "Oh no!" in capture.err @@ -907,7 +911,7 @@ def test_process_core_exhaustive(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -927,8 +931,8 @@ def test_process_core_exhaustive(): locals=False, method=StackMethod.ALL, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads + assert MockPrinter.return_value.print_thread.mock_calls == [ + call(thread) for thread in threads ] @@ -942,7 +946,7 @@ def test_default_colored_output(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -963,7 +967,7 @@ def test_nocolor_output(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -984,7 +988,7 @@ def test_nocolor_output_at_the_front_for_process(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -1004,7 +1008,7 @@ def test_nocolor_output_at_the_front_for_core(): # WHEN with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1029,7 +1033,7 @@ def test_global_options_can_be_placed_at_any_point(option): # WHEN with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1051,7 +1055,7 @@ def test_verbose_as_global_options_sets_correctly_the_logger(): # WHEN with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1198,7 +1202,7 @@ def test_process_core_does_not_crash_if_core_analyzer_fails(method): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1223,7 +1227,7 @@ def test_core_file_missing_modules_are_logged(caplog, native): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1254,7 +1258,7 @@ def test_core_file_missing_build_ids_are_logged(caplog, native): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1294,7 +1298,7 @@ def test_executable_is_not_elf_uses_the_first_map(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.TracebackPrinter"), patch("pystack.__main__.is_elf", lambda x: x == real_executable), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), diff --git a/tests/unit/test_traceback_formatter.py b/tests/unit/test_traceback_formatter.py index 7ff3b276..73c93c34 100644 --- a/tests/unit/test_traceback_formatter.py +++ b/tests/unit/test_traceback_formatter.py @@ -4,8 +4,8 @@ import pytest from pystack.engine import NativeReportingMode +from pystack.traceback_formatter import TracebackPrinter from pystack.traceback_formatter import format_thread -from pystack.traceback_formatter import print_thread from pystack.types import SYMBOL_IGNORELIST from pystack.types import LocationInfo from pystack.types import NativeFrame @@ -1268,7 +1268,7 @@ def test_print_thread(capsys): "pystack.traceback_formatter.format_thread", return_value=("1", "2", "3"), ): - print_thread(thread, NativeReportingMode.OFF) + TracebackPrinter(NativeReportingMode.OFF).print_thread(thread) # THEN diff --git a/tests/utils.py b/tests/utils.py index cdaa2f04..e779989a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -314,6 +314,13 @@ def all_pystack_combinations(corefile=False, native=False): ) +ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS = pytest.mark.parametrize( + "python", + [python[:2] for python in AVAILABLE_PYTHONS if python.version >= (3, 13)], + ids=[python[1].name for python in AVAILABLE_PYTHONS if python.version >= (3, 13)], +) + + def all_pythons_since(major: int, minor: int): return pytest.mark.parametrize( "python", From 8e2e16105f85f8563f2d07f6c8e957f3998d3763 Mon Sep 17 00:00:00 2001 From: Saul Cooperman Date: Fri, 29 May 2026 20:06:24 +0200 Subject: [PATCH 2/7] Move d_stack_anchor to PyThread, no longer part of Thread base class. Signed-off-by: Saul Cooperman --- src/pystack/_pystack/pythread.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pystack/_pystack/pythread.h b/src/pystack/_pystack/pythread.h index 3938dcf8..6004a570 100644 --- a/src/pystack/_pystack/pythread.h +++ b/src/pystack/_pystack/pythread.h @@ -17,7 +17,6 @@ class Thread Thread(pid_t pid, pid_t tid); pid_t Tid() const; const std::vector& NativeFrames() const; - remote_addr_t stack_anchor; // Methods void populateNativeStackTrace(const std::shared_ptr& manager); From 599bd1f66331b4b0ff0d559a4e707c0b8ff37bed Mon Sep 17 00:00:00 2001 From: Saul Cooperman Date: Fri, 29 May 2026 20:18:02 +0200 Subject: [PATCH 3/7] Add basic test for traceback printing include subinterpreters. Signed-off-by: Saul Cooperman --- tests/unit/test_main.py | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 37039e93..2b2a1cc4 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -1350,3 +1350,51 @@ def test_executable_is_not_elf_uses_the_first_map(): locals=False, method=StackMethod.AUTO, ) + + +def test_include_subinterpreters_false_for_single_interpreter(): + # GIVEN + + argv = ["pystack", "remote", "31"] + + threads = [Mock(interpreter_id=0), Mock(interpreter_id=0), Mock(interpreter_id=0)] + + # WHEN + + with ( + patch("pystack.__main__.get_process_threads") as get_process_threads_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("sys.argv", argv), + ): + get_process_threads_mock.return_value = threads + main() + + # THEN: TracebackPrinter must NOT show interpreter headers for a single interpreter + + MockPrinter.assert_called_once_with( + NativeReportingMode.OFF, include_subinterpreters=False + ) + + +def test_include_subinterpreters_true_for_multiple_interpreters(): + # GIVEN + + argv = ["pystack", "remote", "31"] + + threads = [Mock(interpreter_id=0), Mock(interpreter_id=1), Mock(interpreter_id=2)] + + # WHEN + + with ( + patch("pystack.__main__.get_process_threads") as get_process_threads_mock, + patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("sys.argv", argv), + ): + get_process_threads_mock.return_value = threads + main() + + # THEN: TracebackPrinter must show interpreter headers when sub-interpreters exist + + MockPrinter.assert_called_once_with( + NativeReportingMode.OFF, include_subinterpreters=True + ) From 61d529c0d5ad32189c4e1dca9022cd8ab888b7be Mon Sep 17 00:00:00 2001 From: Saul Cooperman Date: Sun, 31 May 2026 01:01:41 +0200 Subject: [PATCH 4/7] Parametrise test Signed-off-by: Saul Cooperman --- tests/unit/test_main.py | 42 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 2b2a1cc4..dc7ec72c 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -1352,36 +1352,20 @@ def test_executable_is_not_elf_uses_the_first_map(): ) -def test_include_subinterpreters_false_for_single_interpreter(): - # GIVEN - - argv = ["pystack", "remote", "31"] - - threads = [Mock(interpreter_id=0), Mock(interpreter_id=0), Mock(interpreter_id=0)] - - # WHEN - - with ( - patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, - patch("sys.argv", argv), - ): - get_process_threads_mock.return_value = threads - main() - - # THEN: TracebackPrinter must NOT show interpreter headers for a single interpreter - - MockPrinter.assert_called_once_with( - NativeReportingMode.OFF, include_subinterpreters=False - ) - - -def test_include_subinterpreters_true_for_multiple_interpreters(): +@pytest.mark.parametrize( + "interpreter_ids, expected", + [ + ((None, None, None), False), + ((0, 0, 0), False), + ((0, 1, 2), True), + ((None, 0, 1), True), + ], +) +def test_include_subinterpreters_passed_to_traceback_printer(interpreter_ids, expected): # GIVEN argv = ["pystack", "remote", "31"] - - threads = [Mock(interpreter_id=0), Mock(interpreter_id=1), Mock(interpreter_id=2)] + threads = [Mock(interpreter_id=iid) for iid in interpreter_ids] # WHEN @@ -1393,8 +1377,8 @@ def test_include_subinterpreters_true_for_multiple_interpreters(): get_process_threads_mock.return_value = threads main() - # THEN: TracebackPrinter must show interpreter headers when sub-interpreters exist + # THEN MockPrinter.assert_called_once_with( - NativeReportingMode.OFF, include_subinterpreters=True + NativeReportingMode.OFF, include_subinterpreters=expected ) From 738c40265a2015b42d9316a579a809d71cfbdf4f Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Thu, 2 Jul 2026 22:35:32 -0400 Subject: [PATCH 5/7] Rework traceback formatting for subinterps Replace the `TracebackPrinter` class with a `print_threads()` function that detects shared-TID thread groups positionally. When threads from multiple subinterpreters share an OS thread, `format_thread()` now accepts `continuing_from_previous`/`continues_to_next` flags to emit a single "Traceback for thread" header with per-interpreter annotations like "In the main interpreter" or "In interpreter N". This relies on all of the interpreters' stacks for a given TID being printed sequentially. Previously that wasn't guaranteed, now it is. I've reworked `_normalize_threads()` to group threads by TID and sort each group by stack anchor even when native traces aren't enabled. This is what users would expect, anyway. Move the `frame_type()` classification back to Python, keeping only `is_eval_frame()` in C++ since that's what all native stack slicing needs. Remove the `py_isv314` version template. It's not necessary now that we only support using the `_Py_DebugOffsets` for 3.14+ and have dropped the hardcoded 3.14 offsets. Extract `num_entry_frames()` and `sort_threads_by_stack_anchor()` as standalone helpers. Fix the `std::accumulate` call to use a `size_t` rather than `int` for the accumulator. For Python < 3.7, `getInterpreterId()` now returns the interpreter address as a unique interpreter identifier instead of hardcoding 0. Signed-off-by: Matt Wozniski --- news/279.feature.rst | 1 + src/pystack/__init__.py | 4 +- src/pystack/__main__.py | 21 +-- src/pystack/_pystack.pyi | 9 -- src/pystack/_pystack/bindings.cpp | 166 +++++++++++----------- src/pystack/_pystack/interpreter.cpp | 10 +- src/pystack/_pystack/native_frame.cpp | 55 ------- src/pystack/_pystack/native_frame.h | 10 -- src/pystack/_pystack/pythread.h | 3 +- src/pystack/_pystack/thread_builder.cpp | 2 + src/pystack/_pystack/version.cpp | 17 --- src/pystack/traceback_formatter.py | 79 +++++----- src/pystack/types.py | 19 ++- tests/integration/test_subinterpreters.py | 13 +- tests/unit/test_main.py | 155 +++++++------------- tests/unit/test_traceback_formatter.py | 4 +- 16 files changed, 207 insertions(+), 361 deletions(-) create mode 100644 news/279.feature.rst diff --git a/news/279.feature.rst b/news/279.feature.rst new file mode 100644 index 00000000..3a39b2f7 --- /dev/null +++ b/news/279.feature.rst @@ -0,0 +1 @@ +Add support for Python subinterpreters. When a process uses multiple interpreters (e.g. via Python 3.14's ``concurrent.interpreters`` module), stacks for all interpreters are now reported instead of just the main one. diff --git a/src/pystack/__init__.py b/src/pystack/__init__.py index 96909732..156b8f4f 100644 --- a/src/pystack/__init__.py +++ b/src/pystack/__init__.py @@ -1,7 +1,7 @@ from ._version import __version__ -from .traceback_formatter import TracebackPrinter +from .traceback_formatter import print_threads __all__ = [ "__version__", - "TracebackPrinter", + "print_threads", ] diff --git a/src/pystack/__main__.py b/src/pystack/__main__.py index 365ddfac..4a2e844e 100644 --- a/src/pystack/__main__.py +++ b/src/pystack/__main__.py @@ -8,7 +8,6 @@ from textwrap import dedent from typing import Any from typing import Dict -from typing import List from typing import NoReturn from typing import Optional from typing import Set @@ -18,10 +17,9 @@ from pystack.process import decompress_gzip from pystack.process import is_elf from pystack.process import is_gzip -from pystack.types import PyThread -from . import TracebackPrinter from . import errors +from . import print_threads from .colors import colored from .engine import CoreFileAnalyzer from .engine import NativeReportingMode @@ -285,10 +283,6 @@ def main() -> None: _exit_with_code(the_error) -def _include_subinterpreters(threads: List[PyThread]) -> bool: - return len(set(thread.interpreter_id for thread in threads)) > 1 - - def process_remote(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None: if not args.block and args.native_mode != NativeReportingMode.OFF: parser.error("Native traces are only available in blocking mode") @@ -300,12 +294,7 @@ def process_remote(parser: argparse.ArgumentParser, args: argparse.Namespace) -> locals=args.locals, method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO, ) - - printer = TracebackPrinter( - args.native_mode, include_subinterpreters=_include_subinterpreters(threads) - ) - for thread in threads: - printer.print_thread(thread) + print_threads(threads, args.native_mode) def format_psinfo_information(psinfo: Dict[str, Any]) -> str: @@ -433,11 +422,7 @@ def process_core(parser: argparse.ArgumentParser, args: argparse.Namespace) -> N locals=args.locals, method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO, ) - printer = TracebackPrinter( - args.native_mode, include_subinterpreters=_include_subinterpreters(threads) - ) - for thread in threads: - printer.print_thread(thread) + print_threads(threads, args.native_mode) if __name__ == "__main__": # pragma: no cover diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index 1ab8867c..0ab1d69b 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -84,16 +84,7 @@ def get_process_threads_for_core( def get_bss_info(binary: Union[str, pathlib.Path]) -> Optional[Dict[str, Any]]: ... def copy_memory_from_address(pid: int, address: int, size: int) -> bytes: ... def _check_interpreter_shutdown(manager: ProcessManager) -> None: ... - -class NativeFrameType(enum.Enum): - IGNORE = 0 - EVAL = 1 - OTHER = 3 - def is_eval_frame(symbol: str, python_version: Tuple[int, int]) -> bool: ... -def frame_type( - symbol: str, python_version: Optional[Tuple[int, int]] = None -) -> NativeFrameType: ... F = TypeVar("F", bound=Callable[..., Any]) diff --git a/src/pystack/_pystack/bindings.cpp b/src/pystack/_pystack/bindings.cpp index a74c432d..710cd06b 100644 --- a/src/pystack/_pystack/bindings.cpp +++ b/src/pystack/_pystack/bindings.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -541,6 +540,32 @@ logMemoryMaps(const std::vector& maps, const char* source) } } +size_t +num_entry_frames(const pystack::PyThreadData& thread) +{ + return std::count_if( + thread.frames.begin(), + thread.frames.end(), + [](const pystack::PyFrameData& frame) { return frame.is_entry; }); +} + +std::vector +sort_threads_by_stack_anchor(std::vector data) +{ + // Sort by: + // 1. With stack anchor (!=0) before without + // 2. Stack anchor in descending order + // 3. Index in the vector as a tie breaker (handled by stable_sort) + std::stable_sort( + data.begin(), + data.end(), + [](const pystack::PyThreadData& a, const pystack::PyThreadData& b) { + return std::make_tuple(a.stack_anchor == 0 ? 1 : 0, -a.stack_anchor) + < std::make_tuple(b.stack_anchor == 0 ? 1 : 0, -b.stack_anchor); + }); + return data; +} + std::vector _slice_native_stack(std::vector data) { @@ -553,93 +578,86 @@ _slice_native_stack(std::vector data) return data; } - // Capture canonical frames and python version - const std::vector canonical_frames = canonical_thread->native_frames; + // Capture native frames and python version + const std::vector native_frames = canonical_thread->native_frames; const auto python_version = data[0].python_version; std::vector eval_index; - for (std::size_t i = 0; i < canonical_frames.size(); ++i) { - if (pystack::is_eval_frame(canonical_frames[i].symbol, python_version)) { + for (std::size_t i = 0; i < native_frames.size(); ++i) { + if (pystack::is_eval_frame(native_frames[i].symbol, python_version)) { eval_index.push_back(i); } } - const auto total_entry_frames = static_cast( - std::accumulate(data.begin(), data.end(), 0, [](int acc, const pystack::PyThreadData& d) { - return acc - + static_cast(std::count_if( - d.frames.begin(), - d.frames.end(), - [](const pystack::PyFrameData& frame) { return frame.is_entry; })); - })); + const auto total_entry_frames = std::accumulate( + data.begin(), + data.end(), + size_t{0}, + [](size_t acc, const pystack::PyThreadData& d) { return acc + num_entry_frames(d); }); if (eval_index.size() != total_entry_frames) { + pystack::LOG(pystack::DEBUG) << "Skipping same-tid native slicing for tid " << data[0].tid + << " due to mismatched counts: entry=" << total_entry_frames + << ", eval=" << eval_index.size(); return data; } - std::vector ordered_threads = std::move(data); - // Sort by: - // 1. With stack anchor (!=0) before without - // 2. Stack anchor in descending order - // 3. Index in PyThreadData vec (handled by stable_sort) - std::stable_sort( - ordered_threads.begin(), - ordered_threads.end(), - [](const pystack::PyThreadData& a, const pystack::PyThreadData& b) { - return std::make_tuple(a.stack_anchor == 0 ? 1 : 0, -a.stack_anchor) - < std::make_tuple(b.stack_anchor == 0 ? 1 : 0, -b.stack_anchor); - }); - // Slice frames according to eval frames per python thread std::size_t cursor = 0; - for (auto& thread_data : ordered_threads) { - const auto required_eval_frames = static_cast(std::count_if( - thread_data.frames.begin(), - thread_data.frames.end(), - [](const pystack::PyFrameData& py_frame) { return py_frame.is_entry; })); + auto native_frames_cursor = native_frames.begin(); + for (auto& thread_data : data) { + const auto num_entry_frames_for_thread = num_entry_frames(thread_data); - if (required_eval_frames == 0) { + if (num_entry_frames_for_thread == 0) { + thread_data.native_frames.clear(); continue; } - const std::size_t end = cursor + required_eval_frames; - const std::size_t from = eval_index[cursor]; - const std::size_t to = end < eval_index.size() ? eval_index[end] : canonical_frames.size(); - thread_data.native_frames.assign(canonical_frames.begin() + from, canonical_frames.begin() + to); - cursor = end; + const std::size_t next_thread_cursor = cursor + num_entry_frames_for_thread; + const std::size_t next_thread_first_eval = next_thread_cursor < eval_index.size() + ? eval_index[next_thread_cursor] + : native_frames.size(); + const auto next_thread_native_frames_cursor = native_frames.begin() + next_thread_first_eval; + + thread_data.native_frames.assign(native_frames_cursor, next_thread_native_frames_cursor); + native_frames_cursor = next_thread_native_frames_cursor; + cursor = next_thread_cursor; } - return ordered_threads; + return data; } std::vector _normalize_threads(std::vector threads, NativeReportingMode native_mode) { - if (native_mode == NativeReportingMode::OFF) { - return threads; - } - - // First pass: bucket threads by TID (capture index only) - std::unordered_map> indices_by_tid; - for (std::size_t i = 0; i < threads.size(); ++i) { - indices_by_tid[threads[i].tid].push_back(i); + // Group threads by TID, preserving first-seen order. + // One TID can have multiple PyThreadData due to subinterpreters. + std::unordered_map tid_to_group; + std::vector> groups; + + for (auto& thread : threads) { + auto [it, inserted] = tid_to_group.try_emplace(thread.tid, groups.size()); + if (inserted) { + groups.emplace_back(); + } + groups[it->second].push_back(std::move(thread)); } - // Second pass: for groups that share a TID, slice native stacks. - for (auto& [_, indices] : indices_by_tid) { - if (indices.size() <= 1) { - continue; - } - std::vector group; - for (const std::size_t idx : indices) { - group.push_back(std::move(threads[idx])); + std::vector ret; + for (auto& group : groups) { + if (group.size() > 1) { + // Order interpreters for this TID from outermost to innermost + group = sort_threads_by_stack_anchor(std::move(group)); + // Associate each Python stack with its chunk of the native stack + if (native_mode != NativeReportingMode::OFF) { + group = _slice_native_stack(std::move(group)); + } } - auto sliced = _slice_native_stack(std::move(group)); - for (std::size_t i = 0; i < indices.size(); ++i) { - threads[indices[i]] = std::move(sliced[i]); + for (auto& thread : group) { + ret.push_back(std::move(thread)); } } - return threads; + return ret; } nb::object @@ -766,32 +784,32 @@ get_process_threads_for_core( } nb::list result; - std::vector ret_cpp; + std::vector python_threads; std::vector all_tids = pystack::getThreadIds(manager->get_manager()); bool add_native = native_mode != NativeReportingMode::OFF; while (head) { - auto threads = pystack::buildThreadsFromInterpreter( + auto new_threads = pystack::buildThreadsFromInterpreter( manager->get_manager(), head, manager->pid(), add_native, locals); - for (const auto& thread : threads) { + for (const auto& thread : new_threads) { all_tids.erase( std::remove(all_tids.begin(), all_tids.end(), thread.tid), all_tids.end()); } - ret_cpp.insert( - ret_cpp.end(), - std::make_move_iterator(threads.begin()), - std::make_move_iterator(threads.end())); + python_threads.insert( + python_threads.end(), + std::make_move_iterator(new_threads.begin()), + std::make_move_iterator(new_threads.end())); head = pystack::InterpreterUtils::getNextInterpreter(manager->get_manager(), head); } - for (const auto& thread : _normalize_threads(ret_cpp, native_mode)) { + for (const auto& thread : _normalize_threads(python_threads, native_mode)) { result.append(buildPyThreadObject(thread, types, manager->python_version())); } @@ -988,25 +1006,9 @@ NB_MODULE(_pystack, m) nb::module_ pystack_errors = nb::module_::import_("pystack.errors"); m.attr("intercept_runtime_errors") = pystack_errors.attr("intercept_runtime_errors"); - nb::enum_(m, "NativeFrameType") - .value("IGNORE", pystack::NativeFrame::FrameType::IGNORE) - .value("EVAL", pystack::NativeFrame::FrameType::EVAL) - .value("OTHER", pystack::NativeFrame::FrameType::OTHER); - m.def("is_eval_frame", &pystack::is_eval_frame, "symbol"_a, "python_version"_a, "Return True if the symbol is a CPython eval frame function"); - - m.def( - "frame_type", - [](const std::string& symbol, std::optional> python_version) { - pystack::NativeFrame frame{}; - frame.symbol = symbol; - return pystack::frame_type(frame, python_version); - }, - "symbol"_a, - "python_version"_a = nb::none(), - "Return the FrameType for a native frame symbol"); } diff --git a/src/pystack/_pystack/interpreter.cpp b/src/pystack/_pystack/interpreter.cpp index 4f52e043..78eef9ab 100644 --- a/src/pystack/_pystack/interpreter.cpp +++ b/src/pystack/_pystack/interpreter.cpp @@ -1,7 +1,6 @@ #include #include "interpreter.h" -#include "logging.h" #include "process.h" #include "structure.h" #include "version.h" @@ -23,14 +22,13 @@ InterpreterUtils::getInterpreterId( remote_addr_t interpreter_addr) { if (!manager->versionIsAtLeast(3, 7)) { - // No support for subinterpreters so the only interpreter is ID 0. - return 0; + // Interpreter ID was added in Python 3.7, so for earlier versions + // we just return the address as a unique identifier. + return static_cast(interpreter_addr); } Structure is(manager, interpreter_addr); - int64_t id_value = is.getField(&py_is_v::o_id); - - return id_value; + return is.getField(&py_is_v::o_id); } } // namespace pystack diff --git a/src/pystack/_pystack/native_frame.cpp b/src/pystack/_pystack/native_frame.cpp index 0a793cfb..12672dab 100644 --- a/src/pystack/_pystack/native_frame.cpp +++ b/src/pystack/_pystack/native_frame.cpp @@ -1,31 +1,9 @@ #include "native_frame.h" -#include -#include #include namespace pystack { -static const std::set SYMBOL_IGNORELIST = { - "PyObject_Call", - "call_function", - "classmethoddescr_call", - "cmpwrapper_call", - "fast_function", - "function_call", - "instance_call", - "instancemethod_call", - "methoddescr_call", - "proxy_call", - "slot_tp_call", - "type_call", - "weakref_call", - "wrap_call", - "wrapper_call", - "wrapperdescr_call", - "do_call_core", -}; - static bool starts_with(const std::string& str, const std::string& prefix) { @@ -52,37 +30,4 @@ is_eval_frame(const std::string& symbol, std::pair python_version) return false; } -NativeFrame::FrameType -frame_type(const NativeFrame& frame, std::optional> python_version) -{ - const std::string& symbol = frame.symbol; - - if (python_version && is_eval_frame(symbol, *python_version)) { - return NativeFrame::FrameType::EVAL; - } - if (starts_with(symbol, "PyEval") || starts_with(symbol, "_PyEval")) { - return NativeFrame::FrameType::IGNORE; - } - if (starts_with(symbol, "_Py")) { - return NativeFrame::FrameType::IGNORE; - } - if (starts_with(symbol, "_TAIL_CALL_")) { - return NativeFrame::FrameType::IGNORE; - } - if (python_version && *python_version >= std::make_pair(3, 8)) { - std::string lower = symbol; - std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); - if (lower.find("vectorcall") != std::string::npos) { - return NativeFrame::FrameType::IGNORE; - } - } - for (const auto& ignored : SYMBOL_IGNORELIST) { - if (starts_with(symbol, ignored)) { - return NativeFrame::FrameType::IGNORE; - } - } - - return NativeFrame::FrameType::OTHER; -} - } // namespace pystack diff --git a/src/pystack/_pystack/native_frame.h b/src/pystack/_pystack/native_frame.h index 66496d5f..a462cc5a 100644 --- a/src/pystack/_pystack/native_frame.h +++ b/src/pystack/_pystack/native_frame.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -9,12 +8,6 @@ namespace pystack { // automatic conversions without explicit code. struct NativeFrame { - enum class FrameType { - IGNORE = 0, - EVAL = 1, - OTHER = 3, - }; - unsigned long address; std::string symbol; std::string path; @@ -26,7 +19,4 @@ struct NativeFrame bool is_eval_frame(const std::string& symbol, std::pair python_version); -NativeFrame::FrameType -frame_type(const NativeFrame& frame, std::optional> python_version); - } // namespace pystack diff --git a/src/pystack/_pystack/pythread.h b/src/pystack/_pystack/pythread.h index 6004a570..bb490ffb 100644 --- a/src/pystack/_pystack/pythread.h +++ b/src/pystack/_pystack/pythread.h @@ -61,8 +61,7 @@ class PyThread : public Thread remote_addr_t d_next_addr; std::shared_ptr d_next; std::shared_ptr d_first_frame; - remote_addr_t d_stack_anchor; - int interpreter_id; + remote_addr_t d_stack_anchor{}; // Methods GilStatus calculateGilStatus( diff --git a/src/pystack/_pystack/thread_builder.cpp b/src/pystack/_pystack/thread_builder.cpp index 406f4a43..8b7d28ab 100644 --- a/src/pystack/_pystack/thread_builder.cpp +++ b/src/pystack/_pystack/thread_builder.cpp @@ -98,6 +98,8 @@ buildNativeThread(const std::shared_ptr& manager, pid_t data.name = getThreadName(pid, tid); data.gil_status = 0; // NOT_HELD data.gc_status = 0; // NOT_COLLECTING + data.interpreter_id = 0; // No Python stack for this thread means no interpreter + data.stack_anchor = 0; // and no stack anchor. LOG(INFO) << "Constructing new native thread with tid " << tid; diff --git a/src/pystack/_pystack/version.cpp b/src/pystack/_pystack/version.cpp index ab7fe48d..91ca5579 100644 --- a/src/pystack/_pystack/version.cpp +++ b/src/pystack/_pystack/version.cpp @@ -230,23 +230,6 @@ py_isv312() }; } -template -constexpr py_is_v -py_isv314() -{ - return { - sizeof(T), - {offsetof(T, next)}, - {offsetof(T, threads.head)}, - {offsetof(T, gc)}, - {offsetof(T, imports.modules)}, - {offsetof(T, sysdict)}, - {offsetof(T, builtins)}, - {offsetof(T, _gil)}, - {offsetof(T, id)}, - }; -} - template constexpr py_gc_v py_gc() diff --git a/src/pystack/traceback_formatter.py b/src/pystack/traceback_formatter.py index 346dc474..6797f1e8 100644 --- a/src/pystack/traceback_formatter.py +++ b/src/pystack/traceback_formatter.py @@ -1,6 +1,7 @@ import os import sys from typing import Iterable +from typing import List from typing import Optional from .colors import colored @@ -12,40 +13,18 @@ from .types import frame_type -class TracebackPrinter: - def __init__( - self, native_mode: NativeReportingMode, include_subinterpreters: bool = False - ): - self.native_mode = native_mode - self.include_subinterpreters = include_subinterpreters - self._current_interpreter_id: Optional[int] = None - self._first_print_sentinel = True - - def print_thread(self, thread: PyThread) -> None: - # Print interpreter header if we've switched interpreters - if self.include_subinterpreters: - if ( - thread.interpreter_id != self._current_interpreter_id - or self._first_print_sentinel - ): - self._print_interpreter_header(thread.interpreter_id) - self._current_interpreter_id = thread.interpreter_id - self._first_print_sentinel = False - - # Print the thread with indentation - for line in format_thread(thread, self.native_mode): - if self.include_subinterpreters: - print(" " * 2, end="") +def print_threads(threads: List[PyThread], native_mode: NativeReportingMode) -> None: + for i, thread in enumerate(threads): + same_tid_as_prev = i > 0 and thread.tid == threads[i - 1].tid + same_tid_as_next = i < len(threads) - 1 and thread.tid == threads[i + 1].tid + for line in format_thread( + thread, + native_mode, + continuing_from_previous=same_tid_as_prev, + continues_to_next=same_tid_as_next, + ): print(line, file=sys.stdout, flush=True) - def _print_interpreter_header(self, interpreter_id: Optional[int]) -> None: - header = ( - f"Interpreter-{interpreter_id if interpreter_id is not None else 'Unknown'}" - ) - if interpreter_id == 0: - header += " (main)" - print(header, file=sys.stdout, flush=True) - def format_frame(frame: PyFrame) -> Iterable[str]: code: PyCodeObject = frame.code @@ -94,18 +73,39 @@ def _are_the_stacks_mergeable(thread: PyThread) -> bool: return n_eval_frames == n_entry_frames -def format_thread(thread: PyThread, native_mode: NativeReportingMode) -> Iterable[str]: +def format_thread( + thread: PyThread, + native_mode: NativeReportingMode, + continuing_from_previous: bool = False, + continues_to_next: bool = False, +) -> Iterable[str]: native = native_mode != NativeReportingMode.OFF current_frame: Optional[PyFrame] = thread.first_frame - if current_frame is None and not native: + if ( + current_frame is None + and not native + and not continuing_from_previous + and not continues_to_next + ): yield f"The frame stack for thread {thread.tid} is empty" return thread_name = f" ({thread.name}) " if thread.name else " " - yield ( - f"Traceback for thread {thread.tid}{thread_name}{thread.status} " - "(most recent call last):" - ) + if not continuing_from_previous: + yield ( + f"Traceback for thread {thread.tid}{thread_name}" + f"{thread.status + ' ' if not continues_to_next else ''}" + f"(most recent call last):" + ) + + if continues_to_next or continuing_from_previous: + if thread.interpreter_id is None: + interp_name = "Not attached to any interpreter" + elif thread.interpreter_id == 0: + interp_name = "In the main interpreter" + else: + interp_name = f"In interpreter {thread.interpreter_id}" + yield f" {interp_name} {thread.status}" if not (native and _are_the_stacks_mergeable(thread)): if native: @@ -118,7 +118,8 @@ def format_thread(thread: PyThread, native_mode: NativeReportingMode) -> Iterabl yield from _format_merged_stacks( thread, current_frame, native_mode == NativeReportingMode.LAST ) - yield "" + if not continues_to_next: + yield "" def _format_merged_stacks( diff --git a/src/pystack/types.py b/src/pystack/types.py index 0a33fe0d..2b0c27f1 100644 --- a/src/pystack/types.py +++ b/src/pystack/types.py @@ -7,7 +7,7 @@ from typing import Optional from typing import Tuple -from ._pystack import frame_type as _frame_type_cpp +from ._pystack import is_eval_frame as _is_eval_frame SYMBOL_IGNORELIST = { "PyObject_Call", @@ -49,8 +49,21 @@ class FrameType(enum.Enum): def frame_type( frame: NativeFrame, python_version: Optional[Tuple[int, int]] ) -> NativeFrame.FrameType: - result = _frame_type_cpp(frame.symbol, python_version) - return NativeFrame.FrameType(result.value) + symbol = frame.symbol + if python_version and _is_eval_frame(symbol, python_version): + return frame.FrameType.EVAL + if symbol.startswith("PyEval") or symbol.startswith("_PyEval"): + return frame.FrameType.IGNORE + if symbol.startswith("_Py"): + return frame.FrameType.IGNORE + if symbol.startswith("_TAIL_CALL_"): + return frame.FrameType.IGNORE + if python_version and python_version >= (3, 8) and "vectorcall" in symbol.lower(): + return frame.FrameType.IGNORE + if any(symbol.startswith(ignored_symbol) for ignored_symbol in SYMBOL_IGNORELIST): + return frame.FrameType.IGNORE + + return frame.FrameType.OTHER class LocationInfo(NamedTuple): diff --git a/tests/integration/test_subinterpreters.py b/tests/integration/test_subinterpreters.py index 6554ff7d..8cdc18e7 100644 --- a/tests/integration/test_subinterpreters.py +++ b/tests/integration/test_subinterpreters.py @@ -14,7 +14,7 @@ from pystack.engine import StackMethod from pystack.engine import get_process_threads from pystack.engine import get_process_threads_for_core -from pystack.traceback_formatter import TracebackPrinter +from pystack.traceback_formatter import print_threads from pystack.types import NativeFrame from pystack.types import frame_type from tests.utils import ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS @@ -265,21 +265,16 @@ def _assert_interpreter_headers( native_mode: NativeReportingMode, interpreter_ids, ) -> str: - printer = TracebackPrinter( - native_mode=native_mode, - include_subinterpreters=True, - ) output = io.StringIO() with redirect_stdout(output): - for thread in threads: - printer.print_thread(thread) + print_threads(threads, native_mode=native_mode) result = output.getvalue() - assert "Interpreter-0 (main)" in result + assert "In the main interpreter" in result for interpreter_id in interpreter_ids: if interpreter_id == 0: continue - assert f"Interpreter-{interpreter_id}" in result + assert f"In interpreter {interpreter_id}" in result return result diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index dc7ec72c..d8e9d01c 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -2,7 +2,6 @@ from pathlib import Path from textwrap import dedent from unittest.mock import Mock -from unittest.mock import call from unittest.mock import mock_open from unittest.mock import patch @@ -190,7 +189,7 @@ def test_process_remote_default(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -205,9 +204,7 @@ def test_process_remote_default(): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_remote_no_block(): @@ -221,7 +218,7 @@ def test_process_remote_no_block(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -236,9 +233,7 @@ def test_process_remote_no_block(): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) @pytest.mark.parametrize( @@ -260,7 +255,7 @@ def test_process_remote_native(argument, mode): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -275,9 +270,7 @@ def test_process_remote_native(argument, mode): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, mode) def test_process_remote_locals(): @@ -291,7 +284,7 @@ def test_process_remote_locals(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -306,9 +299,7 @@ def test_process_remote_locals(): locals=True, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_remote_native_no_block(capsys): @@ -322,7 +313,7 @@ def test_process_remote_native_no_block(capsys): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -332,7 +323,7 @@ def test_process_remote_native_no_block(capsys): main() get_process_threads_mock.assert_not_called() - MockPrinter.assert_not_called() + print_threads_mock.assert_not_called() def test_process_remote_exhaustive(): @@ -346,7 +337,7 @@ def test_process_remote_exhaustive(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -361,9 +352,7 @@ def test_process_remote_exhaustive(): locals=False, method=StackMethod.ALL, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) @pytest.mark.parametrize( @@ -378,7 +367,7 @@ def test_process_remote_error(exception, exval, capsys): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), ): @@ -390,7 +379,7 @@ def test_process_remote_error(exception, exval, capsys): # THEN get_process_threads_mock.assert_called_once() - MockPrinter.assert_not_called() + print_threads_mock.assert_not_called() capture = capsys.readouterr() assert "Oh no!" in capture.err @@ -408,7 +397,7 @@ def test_process_core_default_without_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.is_elf", return_value=True), @@ -431,9 +420,7 @@ def test_process_core_default_without_executable(): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_default_gzip_without_executable(): @@ -455,7 +442,7 @@ def test_process_core_default_gzip_without_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.is_elf", return_value=True), @@ -481,9 +468,7 @@ def test_process_core_default_gzip_without_executable(): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) gzip_open_mock.assert_called_with(Path("corefile.gz"), "rb") @@ -553,7 +538,7 @@ def test_process_core_default_with_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -573,9 +558,7 @@ def test_process_core_default_with_executable(): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) @pytest.mark.parametrize( @@ -599,7 +582,7 @@ def test_process_core_native(argument, mode): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -619,9 +602,7 @@ def test_process_core_native(argument, mode): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, mode) def test_process_core_locals(): @@ -637,7 +618,7 @@ def test_process_core_locals(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -657,9 +638,7 @@ def test_process_core_locals(): locals=True, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_with_search_path(): @@ -682,7 +661,7 @@ def test_process_core_with_search_path(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -702,9 +681,7 @@ def test_process_core_with_search_path(): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_with_search_root(): @@ -720,7 +697,7 @@ def test_process_core_with_search_root(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -751,9 +728,7 @@ def test_process_core_with_search_root(): locals=False, method=StackMethod.AUTO, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_with_not_readable_search_root(): @@ -765,7 +740,7 @@ def test_process_core_with_not_readable_search_root(): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -790,7 +765,7 @@ def test_process_core_with_invalid_search_root(): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -818,7 +793,7 @@ def path_exists(what): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch.object(Path, "exists", path_exists), ): @@ -830,7 +805,7 @@ def path_exists(what): # THEN get_process_threads_mock.assert_not_called() - MockPrinter.assert_not_called() + print_threads_mock.assert_not_called() def test_process_core_executable_does_not_exit(): @@ -849,7 +824,7 @@ def does_exit(what): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), patch.object(Path, "exists", does_exit), @@ -861,7 +836,7 @@ def does_exit(what): # THEN get_process_threads_mock.assert_not_called() - MockPrinter.assert_not_called() + print_threads_mock.assert_not_called() @pytest.mark.parametrize( @@ -878,7 +853,7 @@ def test_process_core_error(exception, exval, capsys): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -894,7 +869,7 @@ def test_process_core_error(exception, exval, capsys): # THEN get_process_threads_mock.assert_called_once() - MockPrinter.assert_not_called() + print_threads_mock.assert_not_called() capture = capsys.readouterr() assert "Oh no!" in capture.err @@ -911,7 +886,7 @@ def test_process_core_exhaustive(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -931,9 +906,7 @@ def test_process_core_exhaustive(): locals=False, method=StackMethod.ALL, ) - assert MockPrinter.return_value.print_thread.mock_calls == [ - call(thread) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_default_colored_output(): @@ -946,7 +919,7 @@ def test_default_colored_output(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -967,7 +940,7 @@ def test_nocolor_output(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -988,7 +961,7 @@ def test_nocolor_output_at_the_front_for_process(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -1008,7 +981,7 @@ def test_nocolor_output_at_the_front_for_core(): # WHEN with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1033,7 +1006,7 @@ def test_global_options_can_be_placed_at_any_point(option): # WHEN with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1055,7 +1028,7 @@ def test_verbose_as_global_options_sets_correctly_the_logger(): # WHEN with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1202,7 +1175,7 @@ def test_process_core_does_not_crash_if_core_analyzer_fails(method): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1227,7 +1200,7 @@ def test_core_file_missing_modules_are_logged(caplog, native): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1258,7 +1231,7 @@ def test_core_file_missing_build_ids_are_logged(caplog, native): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1298,7 +1271,7 @@ def test_executable_is_not_elf_uses_the_first_map(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", lambda x: x == real_executable), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1350,35 +1323,3 @@ def test_executable_is_not_elf_uses_the_first_map(): locals=False, method=StackMethod.AUTO, ) - - -@pytest.mark.parametrize( - "interpreter_ids, expected", - [ - ((None, None, None), False), - ((0, 0, 0), False), - ((0, 1, 2), True), - ((None, 0, 1), True), - ], -) -def test_include_subinterpreters_passed_to_traceback_printer(interpreter_ids, expected): - # GIVEN - - argv = ["pystack", "remote", "31"] - threads = [Mock(interpreter_id=iid) for iid in interpreter_ids] - - # WHEN - - with ( - patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.TracebackPrinter") as MockPrinter, - patch("sys.argv", argv), - ): - get_process_threads_mock.return_value = threads - main() - - # THEN - - MockPrinter.assert_called_once_with( - NativeReportingMode.OFF, include_subinterpreters=expected - ) diff --git a/tests/unit/test_traceback_formatter.py b/tests/unit/test_traceback_formatter.py index 73c93c34..5bad8082 100644 --- a/tests/unit/test_traceback_formatter.py +++ b/tests/unit/test_traceback_formatter.py @@ -4,8 +4,8 @@ import pytest from pystack.engine import NativeReportingMode -from pystack.traceback_formatter import TracebackPrinter from pystack.traceback_formatter import format_thread +from pystack.traceback_formatter import print_threads from pystack.types import SYMBOL_IGNORELIST from pystack.types import LocationInfo from pystack.types import NativeFrame @@ -1268,7 +1268,7 @@ def test_print_thread(capsys): "pystack.traceback_formatter.format_thread", return_value=("1", "2", "3"), ): - TracebackPrinter(NativeReportingMode.OFF).print_thread(thread) + print_threads([thread], NativeReportingMode.OFF) # THEN From f1af851c0cde03e06b93848bdd612f04e4d5a853 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 3 Jul 2026 01:08:31 -0400 Subject: [PATCH 6/7] Make C++ function names more idiomatic Signed-off-by: Matt Wozniski --- src/pystack/_pystack/bindings.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/pystack/_pystack/bindings.cpp b/src/pystack/_pystack/bindings.cpp index 710cd06b..b3bbe912 100644 --- a/src/pystack/_pystack/bindings.cpp +++ b/src/pystack/_pystack/bindings.cpp @@ -541,7 +541,7 @@ logMemoryMaps(const std::vector& maps, const char* source) } size_t -num_entry_frames(const pystack::PyThreadData& thread) +numEntryFrames(const pystack::PyThreadData& thread) { return std::count_if( thread.frames.begin(), @@ -550,7 +550,7 @@ num_entry_frames(const pystack::PyThreadData& thread) } std::vector -sort_threads_by_stack_anchor(std::vector data) +sortThreadsByStackAnchor(std::vector data) { // Sort by: // 1. With stack anchor (!=0) before without @@ -567,7 +567,7 @@ sort_threads_by_stack_anchor(std::vector data) } std::vector -_slice_native_stack(std::vector data) +sliceNativeStack(std::vector data) { // Capture a canonical auto canonical_thread = @@ -593,7 +593,7 @@ _slice_native_stack(std::vector data) data.begin(), data.end(), size_t{0}, - [](size_t acc, const pystack::PyThreadData& d) { return acc + num_entry_frames(d); }); + [](size_t acc, const pystack::PyThreadData& d) { return acc + numEntryFrames(d); }); if (eval_index.size() != total_entry_frames) { pystack::LOG(pystack::DEBUG) << "Skipping same-tid native slicing for tid " << data[0].tid @@ -606,7 +606,7 @@ _slice_native_stack(std::vector data) std::size_t cursor = 0; auto native_frames_cursor = native_frames.begin(); for (auto& thread_data : data) { - const auto num_entry_frames_for_thread = num_entry_frames(thread_data); + const auto num_entry_frames_for_thread = numEntryFrames(thread_data); if (num_entry_frames_for_thread == 0) { thread_data.native_frames.clear(); @@ -627,7 +627,7 @@ _slice_native_stack(std::vector data) } std::vector -_normalize_threads(std::vector threads, NativeReportingMode native_mode) +normalizeThreads(std::vector threads, NativeReportingMode native_mode) { // Group threads by TID, preserving first-seen order. // One TID can have multiple PyThreadData due to subinterpreters. @@ -646,10 +646,10 @@ _normalize_threads(std::vector threads, NativeReportingMo for (auto& group : groups) { if (group.size() > 1) { // Order interpreters for this TID from outermost to innermost - group = sort_threads_by_stack_anchor(std::move(group)); + group = sortThreadsByStackAnchor(std::move(group)); // Associate each Python stack with its chunk of the native stack if (native_mode != NativeReportingMode::OFF) { - group = _slice_native_stack(std::move(group)); + group = sliceNativeStack(std::move(group)); } } for (auto& thread : group) { @@ -739,7 +739,7 @@ get_process_threads( } nb::list result; - for (const auto& thread : _normalize_threads(python_threads, native_mode)) { + for (const auto& thread : normalizeThreads(python_threads, native_mode)) { result.append(buildPyThreadObject(thread, types, python_version)); } for (const auto& thread : native_only_threads) { @@ -809,7 +809,7 @@ get_process_threads_for_core( head = pystack::InterpreterUtils::getNextInterpreter(manager->get_manager(), head); } - for (const auto& thread : _normalize_threads(python_threads, native_mode)) { + for (const auto& thread : normalizeThreads(python_threads, native_mode)) { result.append(buildPyThreadObject(thread, types, manager->python_version())); } From f9e500d910d11c2ba467afa04a1069712b8c8170 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 3 Jul 2026 02:25:11 -0400 Subject: [PATCH 7/7] Expose normalizeThreads for direct testing This function is tricky enough to justify unit testing it, so expose it directly through pybind so we can exercise it with pytest. Signed-off-by: Matt Wozniski --- src/pystack/_pystack.pyi | 4 + src/pystack/_pystack/bindings.cpp | 48 +++++++ tests/unit/test_normalize_threads.py | 197 +++++++++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 tests/unit/test_normalize_threads.py diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index 0ab1d69b..af781a92 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -85,6 +85,10 @@ def get_bss_info(binary: Union[str, pathlib.Path]) -> Optional[Dict[str, Any]]: def copy_memory_from_address(pid: int, address: int, size: int) -> bytes: ... def _check_interpreter_shutdown(manager: ProcessManager) -> None: ... def is_eval_frame(symbol: str, python_version: Tuple[int, int]) -> bool: ... +def _normalize_threads_for_testing( + thread_descs: List[Dict[str, Any]], + native_mode: NativeReportingMode, +) -> List[PyThread]: ... F = TypeVar("F", bound=Callable[..., Any]) diff --git a/src/pystack/_pystack/bindings.cpp b/src/pystack/_pystack/bindings.cpp index b3bbe912..99551071 100644 --- a/src/pystack/_pystack/bindings.cpp +++ b/src/pystack/_pystack/bindings.cpp @@ -1011,4 +1011,52 @@ NB_MODULE(_pystack, m) "symbol"_a, "python_version"_a, "Return True if the symbol is a CPython eval frame function"); + + m.def( + "_normalize_threads_for_testing", + [](nb::list thread_descs, NativeReportingMode native_mode) -> nb::list { + auto types = PyTypes::load(); + std::vector threads; + + for (auto desc_handle : thread_descs) { + nb::dict desc = nb::cast(desc_handle); + pystack::PyThreadData td{}; + td.tid = nb::cast(desc["tid"]); + td.stack_anchor = nb::cast(desc["stack_anchor"]); + td.interpreter_id = nb::cast(desc["interpreter_id"]); + td.python_version = nb::cast>(desc["python_version"]); + + nb::list symbols = nb::cast(desc["native_symbols"]); + for (auto sym_handle : symbols) { + pystack::NativeFrame nf{}; + nf.symbol = nb::cast(sym_handle); + nf.path = "test.c"; + nf.linenumber = 1; + td.native_frames.push_back(nf); + } + + nb::list frames = nb::cast(desc["frames"]); + for (auto frame_handle : frames) { + auto frame_tuple = nb::cast(frame_handle); + pystack::PyFrameData fd{}; + fd.code.filename = "test.py"; + fd.code.scope = nb::cast(frame_tuple[0]); + fd.code.location = {1, 1, 0, 0}; + fd.is_entry = nb::cast(frame_tuple[1]); + td.frames.push_back(fd); + } + + threads.push_back(std::move(td)); + } + + auto normalized = normalizeThreads(std::move(threads), native_mode); + + nb::list result; + for (const auto& t : normalized) { + result.append(buildPyThreadObject(t, types, t.python_version)); + } + return result; + }, + "thread_descs"_a, + "native_mode"_a); } diff --git a/tests/unit/test_normalize_threads.py b/tests/unit/test_normalize_threads.py new file mode 100644 index 00000000..457e6a83 --- /dev/null +++ b/tests/unit/test_normalize_threads.py @@ -0,0 +1,197 @@ +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Tuple + +from pystack._pystack import NativeReportingMode +from pystack._pystack import _normalize_threads_for_testing + +EVAL = "_PyEval_EvalFrameDefault" +PY_VERSION = (3, 13) + + +def _make_thread( + tid: int, + *, + stack_anchor: int = 0, + interpreter_id: int = 0, + native_symbols: Optional[List[str]] = None, + frames: Optional[List[Tuple[str, bool]]] = None, +) -> Dict[str, Any]: + return dict( + tid=tid, + stack_anchor=stack_anchor, + interpreter_id=interpreter_id, + native_symbols=(native_symbols or []), + frames=(frames or []), + python_version=PY_VERSION, + ) + + +def test_unique_tids_pass_through_in_order(): + threads = [ + _make_thread( + 3, + native_symbols=["alpha", EVAL, "beta"], + frames=[("main", True)], + ), + _make_thread( + 1, + native_symbols=["gamma", EVAL, "delta"], + frames=[("run", True)], + ), + _make_thread( + 2, + native_symbols=["epsilon"], + frames=[("work", True)], + ), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.PYTHON) + + assert len(result) == 3 + assert [t.tid for t in result] == [3, 1, 2] + assert [f.symbol for f in result[0].native_frames] == ["alpha", EVAL, "beta"] + assert [f.symbol for f in result[1].native_frames] == ["gamma", EVAL, "delta"] + assert [f.symbol for f in result[2].native_frames] == ["epsilon"] + assert [f.code.scope for f in result[0].frames] == ["main"] + assert [f.code.scope for f in result[1].frames] == ["run"] + assert [f.code.scope for f in result[2].frames] == ["work"] + + +def test_first_seen_tid_order_preserved(): + threads = [ + _make_thread(10, interpreter_id=0, stack_anchor=1000, frames=[("a", True)]), + _make_thread(20, interpreter_id=0, stack_anchor=2000, frames=[("b", True)]), + _make_thread(20, interpreter_id=1, stack_anchor=1500, frames=[("d", True)]), + _make_thread(10, interpreter_id=1, stack_anchor=500, frames=[("c", True)]), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.OFF) + + assert [t.tid for t in result] == [10, 10, 20, 20] + assert [t.interpreter_id for t in result] == [0, 1, 0, 1] + assert [f.code.scope for f in result[0].frames] == ["a"] + assert [f.code.scope for f in result[1].frames] == ["c"] + assert [f.code.scope for f in result[2].frames] == ["b"] + assert [f.code.scope for f in result[3].frames] == ["d"] + + +def test_stack_anchor_sort_within_group(): + threads = [ + _make_thread(1, interpreter_id=2, stack_anchor=0, frames=[("inner", True)]), + _make_thread(1, interpreter_id=0, stack_anchor=9000, frames=[("outer", True)]), + _make_thread(1, interpreter_id=1, stack_anchor=5000, frames=[("middle", True)]), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.OFF) + + assert len(result) == 3 + assert [t.interpreter_id for t in result] == [0, 1, 2] + assert [f.code.scope for f in result[0].frames] == ["outer"] + assert [f.code.scope for f in result[1].frames] == ["middle"] + assert [f.code.scope for f in result[2].frames] == ["inner"] + + +def test_native_slice_correctness(): + native_symbols = [ + "outer_c_func", + EVAL, + "middle_c_func_a", + "middle_c_func_b", + EVAL, + "inner_c_func", + EVAL, + ] + threads = [ + _make_thread( + 1, + interpreter_id=0, + stack_anchor=9000, + native_symbols=native_symbols, + frames=[("helper", False), ("main", True)], + ), + _make_thread( + 1, + interpreter_id=1, + stack_anchor=5000, + native_symbols=native_symbols, + frames=[("run", True)], + ), + _make_thread( + 1, + interpreter_id=2, + stack_anchor=1000, + native_symbols=native_symbols, + frames=[("work", True)], + ), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.PYTHON) + + assert len(result) == 3 + assert result[0].interpreter_id == 0 + assert result[1].interpreter_id == 1 + assert result[2].interpreter_id == 2 + + syms0 = [f.symbol for f in result[0].native_frames] + syms1 = [f.symbol for f in result[1].native_frames] + syms2 = [f.symbol for f in result[2].native_frames] + + assert syms0 == ["outer_c_func", EVAL, "middle_c_func_a", "middle_c_func_b"] + assert syms1 == [EVAL, "inner_c_func"] + assert syms2 == [EVAL] + + assert syms0 + syms1 + syms2 == native_symbols + + assert [f.code.scope for f in result[0].frames] == ["main", "helper"] + assert [f.code.scope for f in result[1].frames] == ["run"] + assert [f.code.scope for f in result[2].frames] == ["work"] + + +def test_middle_interpreter_no_frames_gets_native_cleared(): + native_symbols = [ + "setup", + EVAL, + "bridge", + EVAL, + ] + threads = [ + _make_thread( + 1, + interpreter_id=0, + stack_anchor=9000, + native_symbols=native_symbols, + frames=[("outer", True)], + ), + _make_thread( + 1, + interpreter_id=1, + stack_anchor=5000, + native_symbols=native_symbols, + frames=[], + ), + _make_thread( + 1, + interpreter_id=2, + stack_anchor=1000, + native_symbols=native_symbols, + frames=[("inner", True)], + ), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.PYTHON) + + assert len(result) == 3 + assert result[0].interpreter_id == 0 + assert result[1].interpreter_id == 1 + assert result[2].interpreter_id == 2 + + assert [f.symbol for f in result[0].native_frames] == ["setup", EVAL, "bridge"] + assert [f.symbol for f in result[1].native_frames] == [] + assert [f.symbol for f in result[2].native_frames] == [EVAL] + + assert [f.code.scope for f in result[0].frames] == ["outer"] + assert [f.code.scope for f in result[1].frames] == [] + assert [f.code.scope for f in result[2].frames] == ["inner"]