diff --git a/Dockerfile b/Dockerfile index c56e376c..3b938cd7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,6 +53,7 @@ RUN apt-get update \ file \ less \ libcrypt-dev \ + bear \ libzstd-dev \ liblzma-dev \ libbz2-dev \ diff --git a/news/279.feature.rst b/news/279.feature.rst new file mode 100644 index 00000000..3a39b2f7 --- /dev/null +++ b/news/279.feature.rst @@ -0,0 +1 @@ +Add support for Python subinterpreters. When a process uses multiple interpreters (e.g. via Python 3.14's ``concurrent.interpreters`` module), stacks for all interpreters are now reported instead of just the main one. diff --git a/src/pystack/__init__.py b/src/pystack/__init__.py index e973464d..156b8f4f 100644 --- a/src/pystack/__init__.py +++ b/src/pystack/__init__.py @@ -1,7 +1,7 @@ from ._version import __version__ -from .traceback_formatter import print_thread +from .traceback_formatter import print_threads __all__ = [ "__version__", - "print_thread", + "print_threads", ] diff --git a/src/pystack/__main__.py b/src/pystack/__main__.py index a8f6a10e..4a2e844e 100644 --- a/src/pystack/__main__.py +++ b/src/pystack/__main__.py @@ -19,7 +19,7 @@ from pystack.process import is_gzip from . import errors -from . import print_thread +from . import print_threads from .colors import colored from .engine import CoreFileAnalyzer from .engine import NativeReportingMode @@ -287,14 +287,14 @@ def process_remote(parser: argparse.ArgumentParser, args: argparse.Namespace) -> if not args.block and args.native_mode != NativeReportingMode.OFF: parser.error("Native traces are only available in blocking mode") - for thread in get_process_threads( + threads = get_process_threads( args.pid, stop_process=args.block, native_mode=args.native_mode, locals=args.locals, method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO, - ): - print_thread(thread, args.native_mode) + ) + print_threads(threads, args.native_mode) def format_psinfo_information(psinfo: Dict[str, Any]) -> str: @@ -414,15 +414,15 @@ def process_core(parser: argparse.ArgumentParser, args: argparse.Namespace) -> N elf_id if elf_id else "", ) - for thread in get_process_threads_for_core( + threads = get_process_threads_for_core( corefile, executable, library_search_path=lib_search_path, native_mode=args.native_mode, locals=args.locals, method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO, - ): - print_thread(thread, args.native_mode) + ) + print_threads(threads, args.native_mode) if __name__ == "__main__": # pragma: no cover diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index 6a3ea6ff..af781a92 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -84,6 +84,11 @@ def get_process_threads_for_core( def get_bss_info(binary: Union[str, pathlib.Path]) -> Optional[Dict[str, Any]]: ... def copy_memory_from_address(pid: int, address: int, size: int) -> bytes: ... def _check_interpreter_shutdown(manager: ProcessManager) -> None: ... +def is_eval_frame(symbol: str, python_version: Tuple[int, int]) -> bool: ... +def _normalize_threads_for_testing( + thread_descs: List[Dict[str, Any]], + native_mode: NativeReportingMode, +) -> List[PyThread]: ... F = TypeVar("F", bound=Callable[..., Any]) diff --git a/src/pystack/_pystack/CMakeLists.txt b/src/pystack/_pystack/CMakeLists.txt index 3621e1fa..9a3042b5 100644 --- a/src/pystack/_pystack/CMakeLists.txt +++ b/src/pystack/_pystack/CMakeLists.txt @@ -11,6 +11,7 @@ set(PYSTACK_SOURCES logging.cpp maps_parser.cpp mem.cpp + native_frame.cpp process.cpp pycode.cpp pyframe.cpp @@ -21,6 +22,7 @@ set(PYSTACK_SOURCES version.cpp version_detector.cpp bindings.cpp + interpreter.cpp ) # Create the nanobind module diff --git a/src/pystack/_pystack/bindings.cpp b/src/pystack/_pystack/bindings.cpp index 9dfa8a4a..99551071 100644 --- a/src/pystack/_pystack/bindings.cpp +++ b/src/pystack/_pystack/bindings.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,14 +12,18 @@ #include #include +#include #include #include +#include #include "corefile.h" #include "elf_common.h" +#include "interpreter.h" #include "logging.h" #include "maps_parser.h" #include "mem.h" +#include "native_frame.h" #include "process.h" #include "thread_builder.h" @@ -491,7 +496,8 @@ buildPyThreadObject( thread.gil_status, thread.gc_status, nb::make_tuple(python_version.first, python_version.second), - "name"_a = thread.name ? nb::cast(*thread.name) : nb::none()); + "name"_a = thread.name ? nb::cast(*thread.name) : nb::none(), + "interpreter_id"_a = thread.interpreter_id); } // Build a native-only thread object (no Python frames) @@ -534,6 +540,126 @@ logMemoryMaps(const std::vector& maps, const char* source) } } +size_t +numEntryFrames(const pystack::PyThreadData& thread) +{ + return std::count_if( + thread.frames.begin(), + thread.frames.end(), + [](const pystack::PyFrameData& frame) { return frame.is_entry; }); +} + +std::vector +sortThreadsByStackAnchor(std::vector data) +{ + // Sort by: + // 1. With stack anchor (!=0) before without + // 2. Stack anchor in descending order + // 3. Index in the vector as a tie breaker (handled by stable_sort) + std::stable_sort( + data.begin(), + data.end(), + [](const pystack::PyThreadData& a, const pystack::PyThreadData& b) { + return std::make_tuple(a.stack_anchor == 0 ? 1 : 0, -a.stack_anchor) + < std::make_tuple(b.stack_anchor == 0 ? 1 : 0, -b.stack_anchor); + }); + return data; +} + +std::vector +sliceNativeStack(std::vector data) +{ + // Capture a canonical + auto canonical_thread = + std::find_if(data.begin(), data.end(), [](const pystack::PyThreadData& py_thread_data) { + return !py_thread_data.native_frames.empty(); + }); + if (canonical_thread == data.end()) { + return data; + } + + // Capture native frames and python version + const std::vector native_frames = canonical_thread->native_frames; + const auto python_version = data[0].python_version; + + std::vector eval_index; + for (std::size_t i = 0; i < native_frames.size(); ++i) { + if (pystack::is_eval_frame(native_frames[i].symbol, python_version)) { + eval_index.push_back(i); + } + } + + const auto total_entry_frames = std::accumulate( + data.begin(), + data.end(), + size_t{0}, + [](size_t acc, const pystack::PyThreadData& d) { return acc + numEntryFrames(d); }); + + if (eval_index.size() != total_entry_frames) { + pystack::LOG(pystack::DEBUG) << "Skipping same-tid native slicing for tid " << data[0].tid + << " due to mismatched counts: entry=" << total_entry_frames + << ", eval=" << eval_index.size(); + return data; + } + + // Slice frames according to eval frames per python thread + std::size_t cursor = 0; + auto native_frames_cursor = native_frames.begin(); + for (auto& thread_data : data) { + const auto num_entry_frames_for_thread = numEntryFrames(thread_data); + + if (num_entry_frames_for_thread == 0) { + thread_data.native_frames.clear(); + continue; + } + + const std::size_t next_thread_cursor = cursor + num_entry_frames_for_thread; + const std::size_t next_thread_first_eval = next_thread_cursor < eval_index.size() + ? eval_index[next_thread_cursor] + : native_frames.size(); + const auto next_thread_native_frames_cursor = native_frames.begin() + next_thread_first_eval; + + thread_data.native_frames.assign(native_frames_cursor, next_thread_native_frames_cursor); + native_frames_cursor = next_thread_native_frames_cursor; + cursor = next_thread_cursor; + } + return data; +} + +std::vector +normalizeThreads(std::vector threads, NativeReportingMode native_mode) +{ + // Group threads by TID, preserving first-seen order. + // One TID can have multiple PyThreadData due to subinterpreters. + std::unordered_map tid_to_group; + std::vector> groups; + + for (auto& thread : threads) { + auto [it, inserted] = tid_to_group.try_emplace(thread.tid, groups.size()); + if (inserted) { + groups.emplace_back(); + } + groups[it->second].push_back(std::move(thread)); + } + + std::vector ret; + for (auto& group : groups) { + if (group.size() > 1) { + // Order interpreters for this TID from outermost to innermost + group = sortThreadsByStackAnchor(std::move(group)); + // Associate each Python stack with its chunk of the native stack + if (native_mode != NativeReportingMode::OFF) { + group = sliceNativeStack(std::move(group)); + } + } + for (auto& thread : group) { + ret.push_back(std::move(thread)); + } + } + + return ret; +} + nb::object get_process_threads( pid_t pid, @@ -571,21 +697,28 @@ get_process_threads( } else { python_version = manager->python_version(); std::vector all_tids = pystack::getThreadIds(manager->get_manager()); - - if (head != 0) { - bool add_native = native_mode != NativeReportingMode::OFF; - python_threads = pystack::buildThreadsFromInterpreter( - manager->get_manager(), - head, - pid, - add_native, - locals); - - for (const auto& thread : python_threads) { + bool add_native = native_mode != NativeReportingMode::OFF; + + while (head) { + std::vector new_threads = + pystack::buildThreadsFromInterpreter( + manager->get_manager(), + head, + pid, + add_native, + locals); + + for (const auto& thread : new_threads) { all_tids.erase( std::remove(all_tids.begin(), all_tids.end(), thread.tid), all_tids.end()); } + python_threads.insert( + python_threads.end(), + std::make_move_iterator(new_threads.begin()), + std::make_move_iterator(new_threads.end())); + + head = pystack::InterpreterUtils::getNextInterpreter(manager->get_manager(), head); } if (native_mode == NativeReportingMode::ALL) { @@ -606,7 +739,7 @@ get_process_threads( } nb::list result; - for (const auto& thread : python_threads) { + for (const auto& thread : normalizeThreads(python_threads, native_mode)) { result.append(buildPyThreadObject(thread, types, python_version)); } for (const auto& thread : native_only_threads) { @@ -651,24 +784,33 @@ get_process_threads_for_core( } nb::list result; + std::vector python_threads; std::vector all_tids = pystack::getThreadIds(manager->get_manager()); + bool add_native = native_mode != NativeReportingMode::OFF; - if (head != 0) { - bool add_native = native_mode == NativeReportingMode::PYTHON - || native_mode == NativeReportingMode::ALL; - auto threads = pystack::buildThreadsFromInterpreter( + while (head) { + auto new_threads = pystack::buildThreadsFromInterpreter( manager->get_manager(), head, manager->pid(), add_native, locals); - for (const auto& thread : threads) { - result.append(buildPyThreadObject(thread, types, manager->python_version())); + for (const auto& thread : new_threads) { all_tids.erase( std::remove(all_tids.begin(), all_tids.end(), thread.tid), all_tids.end()); } + python_threads.insert( + python_threads.end(), + std::make_move_iterator(new_threads.begin()), + std::make_move_iterator(new_threads.end())); + + head = pystack::InterpreterUtils::getNextInterpreter(manager->get_manager(), head); + } + + for (const auto& thread : normalizeThreads(python_threads, native_mode)) { + result.append(buildPyThreadObject(thread, types, manager->python_version())); } if (native_mode == NativeReportingMode::ALL) { @@ -863,4 +1005,58 @@ NB_MODULE(_pystack, m) // intercept_runtime_errors decorator - re-export from pystack.errors nb::module_ pystack_errors = nb::module_::import_("pystack.errors"); m.attr("intercept_runtime_errors") = pystack_errors.attr("intercept_runtime_errors"); + + m.def("is_eval_frame", + &pystack::is_eval_frame, + "symbol"_a, + "python_version"_a, + "Return True if the symbol is a CPython eval frame function"); + + m.def( + "_normalize_threads_for_testing", + [](nb::list thread_descs, NativeReportingMode native_mode) -> nb::list { + auto types = PyTypes::load(); + std::vector threads; + + for (auto desc_handle : thread_descs) { + nb::dict desc = nb::cast(desc_handle); + pystack::PyThreadData td{}; + td.tid = nb::cast(desc["tid"]); + td.stack_anchor = nb::cast(desc["stack_anchor"]); + td.interpreter_id = nb::cast(desc["interpreter_id"]); + td.python_version = nb::cast>(desc["python_version"]); + + nb::list symbols = nb::cast(desc["native_symbols"]); + for (auto sym_handle : symbols) { + pystack::NativeFrame nf{}; + nf.symbol = nb::cast(sym_handle); + nf.path = "test.c"; + nf.linenumber = 1; + td.native_frames.push_back(nf); + } + + nb::list frames = nb::cast(desc["frames"]); + for (auto frame_handle : frames) { + auto frame_tuple = nb::cast(frame_handle); + pystack::PyFrameData fd{}; + fd.code.filename = "test.py"; + fd.code.scope = nb::cast(frame_tuple[0]); + fd.code.location = {1, 1, 0, 0}; + fd.is_entry = nb::cast(frame_tuple[1]); + td.frames.push_back(fd); + } + + threads.push_back(std::move(td)); + } + + auto normalized = normalizeThreads(std::move(threads), native_mode); + + nb::list result; + for (const auto& t : normalized) { + result.append(buildPyThreadObject(t, types, t.python_version)); + } + return result; + }, + "thread_descs"_a, + "native_mode"_a); } diff --git a/src/pystack/_pystack/interpreter.cpp b/src/pystack/_pystack/interpreter.cpp new file mode 100644 index 00000000..78eef9ab --- /dev/null +++ b/src/pystack/_pystack/interpreter.cpp @@ -0,0 +1,34 @@ +#include + +#include "interpreter.h" +#include "process.h" +#include "structure.h" +#include "version.h" + +namespace pystack { + +remote_addr_t +InterpreterUtils::getNextInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr) +{ + Structure is(manager, interpreter_addr); + return is.getField(&py_is_v::o_next); +} + +int64_t +InterpreterUtils::getInterpreterId( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr) +{ + if (!manager->versionIsAtLeast(3, 7)) { + // Interpreter ID was added in Python 3.7, so for earlier versions + // we just return the address as a unique identifier. + return static_cast(interpreter_addr); + } + + Structure is(manager, interpreter_addr); + return is.getField(&py_is_v::o_id); +} + +} // namespace pystack diff --git a/src/pystack/_pystack/interpreter.h b/src/pystack/_pystack/interpreter.h new file mode 100644 index 00000000..0138ff84 --- /dev/null +++ b/src/pystack/_pystack/interpreter.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +#include "mem.h" +#include "process.h" + +namespace pystack { + +class InterpreterUtils +{ + public: + // Static Methods + static remote_addr_t getNextInterpreter( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr); + + static int64_t getInterpreterId( + const std::shared_ptr& manager, + remote_addr_t interpreter_addr); +}; + +} // namespace pystack diff --git a/src/pystack/_pystack/native_frame.cpp b/src/pystack/_pystack/native_frame.cpp new file mode 100644 index 00000000..12672dab --- /dev/null +++ b/src/pystack/_pystack/native_frame.cpp @@ -0,0 +1,33 @@ +#include "native_frame.h" + +#include + +namespace pystack { + +static bool +starts_with(const std::string& str, const std::string& prefix) +{ + return str.rfind(prefix, 0) == 0; +} + +bool +is_eval_frame(const std::string& symbol, std::pair python_version) +{ + if (python_version < std::make_pair(3, 6)) { + return symbol.find("PyEval_EvalFrameEx") != std::string::npos; + } + if (symbol.find("_PyEval_EvalFrameDefault") != std::string::npos) { + return true; + } + // Python 3.14 tail call interpreter uses LLVM-generated functions + if (starts_with(symbol, "_TAIL_CALL_") && symbol.find(".llvm.") != std::string::npos) { + return true; + } + // Python 3.15+ tail call interpreter drops the .llvm. suffix + if (python_version >= std::make_pair(3, 15) and starts_with(symbol, "_TAIL_CALL_")) { + return true; + } + return false; +} + +} // namespace pystack diff --git a/src/pystack/_pystack/native_frame.h b/src/pystack/_pystack/native_frame.h index f98118a8..a462cc5a 100644 --- a/src/pystack/_pystack/native_frame.h +++ b/src/pystack/_pystack/native_frame.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace pystack { // The reason this is a struct is so Cython can easily generate @@ -14,4 +15,8 @@ struct NativeFrame int colnumber; std::string library; }; + +bool +is_eval_frame(const std::string& symbol, std::pair python_version); + } // namespace pystack diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 9aac1b2c..af24a76f 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -999,6 +999,7 @@ AbstractProcessManager::copyDebugOffsets(Structure& py_runtime, py set_offset(py_is.o_sysdict, &py_runtime_v::o_dbg_off_interpreter_state_sysdict); set_offset(py_is.o_builtins, &py_runtime_v::o_dbg_off_interpreter_state_builtins); set_offset(py_is.o_gil_runtime_state, &py_runtime_v::o_dbg_off_interpreter_state_ceval_gil); + set_offset(py_is.o_id, &py_runtime_v::o_dbg_off_interpreter_state_id); set_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size); set_offset(py_thread.o_prev, &py_runtime_v::o_dbg_off_thread_state_prev); @@ -1474,6 +1475,12 @@ ProcessManager::Tids() const return d_tids; } +std::pair +AbstractProcessManager::pythonVersion() const +{ + return {d_major, d_minor}; +} + std::shared_ptr CoreFileProcessManager::create( const std::string& core_file, diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index 0f52ff3c..965a0d4b 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -98,6 +98,7 @@ class AbstractProcessManager : public std::enable_shared_from_this& version); bool versionIsAtLeast(int required_major, int required_minor) const; + std::pair pythonVersion() const; bool isFreeThreaded() const; const python_v& offsets() const; diff --git a/src/pystack/_pystack/pythread.cpp b/src/pystack/_pystack/pythread.cpp index 3607a791..54120453 100644 --- a/src/pystack/_pystack/pythread.cpp +++ b/src/pystack/_pystack/pythread.cpp @@ -129,6 +129,48 @@ findPthreadTidOffset( return 0; } +remote_addr_t +getStackAnchor(const std::shared_ptr& manager, remote_addr_t frame_addr) +{ + if (!frame_addr) { + return 0; + } + if (!manager->versionIsAtLeast(3, 12)) { + return frame_addr; + } + + remote_addr_t current_addr = frame_addr; + for (int i = 0; i < 4096 && current_addr; ++i) { + Structure current_frame(manager, current_addr); + auto owner = current_frame.getField(&py_frame_v::o_owner); + + if (manager->versionIsAtLeast(3, 14)) { + if (owner == Python3_14::FRAME_OWNED_BY_INTERPRETER + || owner == Python3_14::FRAME_OWNED_BY_CSTACK) + { + return current_addr; + } + } else { + if (owner == Python3_12::FRAME_OWNED_BY_CSTACK) { + return current_addr; + } + } + + remote_addr_t next_addr = current_frame.getField(&py_frame_v::o_back); + if (next_addr == current_addr) { + break; + } + current_addr = next_addr; + } + return frame_addr; +} + +remote_addr_t +PyThread::stackAnchor() const +{ + return d_stack_anchor; +} + PyThread::PyThread(const std::shared_ptr& manager, remote_addr_t addr) : Thread(0, 0) { @@ -142,6 +184,8 @@ PyThread::PyThread(const std::shared_ptr& manager, LOG(DEBUG) << std::hex << std::showbase << "Attempting to construct frame from address " << frame_addr; d_first_frame = std::make_unique(manager, frame_addr, 0); + + d_stack_anchor = getStackAnchor(manager, frame_addr); } d_addr = addr; @@ -366,7 +410,7 @@ getThreadFromInterpreterState( const std::shared_ptr& manager, remote_addr_t addr) { - if (tid_offset_in_pthread_struct == 0) { + if (tid_offset_in_pthread_struct == 0 && !manager->versionIsAtLeast(3, 11)) { tid_offset_in_pthread_struct = findPthreadTidOffset(manager, addr); } diff --git a/src/pystack/_pystack/pythread.h b/src/pystack/_pystack/pythread.h index ab02c672..bb490ffb 100644 --- a/src/pystack/_pystack/pythread.h +++ b/src/pystack/_pystack/pythread.h @@ -45,6 +45,7 @@ class PyThread : public Thread // Methods GilStatus isGilHolder() const; GCStatus isGCCollecting() const; + remote_addr_t stackAnchor() const; // Static Methods static remote_addr_t getFrameAddr( @@ -60,6 +61,7 @@ class PyThread : public Thread remote_addr_t d_next_addr; std::shared_ptr d_next; std::shared_ptr d_first_frame; + remote_addr_t d_stack_anchor{}; // Methods GilStatus calculateGilStatus( diff --git a/src/pystack/_pystack/thread_builder.cpp b/src/pystack/_pystack/thread_builder.cpp index 629e502a..8b7d28ab 100644 --- a/src/pystack/_pystack/thread_builder.cpp +++ b/src/pystack/_pystack/thread_builder.cpp @@ -1,7 +1,9 @@ #include "thread_builder.h" +#include "interpreter.h" #include "logging.h" #include "maps_parser.h" +#include namespace pystack { @@ -58,7 +60,8 @@ buildPythonThread( PyThread* thread, pid_t pid, bool add_native_traces, - bool resolve_locals) + bool resolve_locals, + int64_t interpreter_id) { PyThreadData data; data.tid = thread->Tid(); @@ -80,6 +83,9 @@ buildPythonThread( data.gil_status = static_cast(thread->isGilHolder()); data.gc_status = static_cast(thread->isGCCollecting()); + data.interpreter_id = interpreter_id; + data.python_version = manager->pythonVersion(); + data.stack_anchor = thread->stackAnchor(); return data; } @@ -92,6 +98,8 @@ buildNativeThread(const std::shared_ptr& manager, pid_t data.name = getThreadName(pid, tid); data.gil_status = 0; // NOT_HELD data.gc_status = 0; // NOT_COLLECTING + data.interpreter_id = 0; // No Python stack for this thread means no interpreter + data.stack_anchor = 0; // and no stack anchor. LOG(INFO) << "Constructing new native thread with tid " << tid; @@ -100,6 +108,7 @@ buildNativeThread(const std::shared_ptr& manager, pid_t const auto& native_frames = native_thread.NativeFrames(); data.native_frames.assign(native_frames.rbegin(), native_frames.rend()); + data.python_version = manager->pythonVersion(); return data; } @@ -116,11 +125,17 @@ buildThreadsFromInterpreter( std::vector threads; auto thread = getThreadFromInterpreterState(manager, interpreter_head); + int64_t interpreter_id = InterpreterUtils::getInterpreterId(manager, interpreter_head); PyThread* current_thread = thread.get(); while (current_thread != nullptr) { - threads.push_back( - buildPythonThread(manager, current_thread, pid, add_native_traces, resolve_locals)); + threads.push_back(buildPythonThread( + manager, + current_thread, + pid, + add_native_traces, + resolve_locals, + interpreter_id)); auto next = current_thread->NextThread(); current_thread = next.get(); diff --git a/src/pystack/_pystack/thread_builder.h b/src/pystack/_pystack/thread_builder.h index ac431387..d1f7424f 100644 --- a/src/pystack/_pystack/thread_builder.h +++ b/src/pystack/_pystack/thread_builder.h @@ -39,6 +39,9 @@ struct PyThreadData std::vector native_frames; int gil_status; // -1 = unknown, 0 = not held, 1 = held int gc_status; // -1 = unknown, 0 = not collecting, 1 = collecting + int64_t interpreter_id; + std::pair python_version; + remote_addr_t stack_anchor; }; std::vector @@ -55,7 +58,8 @@ buildPythonThread( PyThread* thread, pid_t pid, bool add_native_traces, - bool resolve_locals); + bool resolve_locals, + int64_t interpreter_id); PyThreadData buildNativeThread(const std::shared_ptr& manager, pid_t pid, pid_t tid); diff --git a/src/pystack/_pystack/version.cpp b/src/pystack/_pystack/version.cpp index 9f0733d6..91ca5579 100644 --- a/src/pystack/_pystack/version.cpp +++ b/src/pystack/_pystack/version.cpp @@ -179,6 +179,23 @@ py_is() }; } +template +constexpr py_is_v +py_isv37() +{ + return { + sizeof(T), + {offsetof(T, next)}, + {offsetof(T, tstate_head)}, + {offsetof(T, gc)}, + {offsetof(T, modules)}, + {offsetof(T, sysdict)}, + {offsetof(T, builtins)}, + {0}, + {offsetof(T, id)}, + }; +} + template constexpr py_is_v py_isv311() @@ -191,6 +208,8 @@ py_isv311() {offsetof(T, modules)}, {offsetof(T, sysdict)}, {offsetof(T, builtins)}, + {0}, + {offsetof(T, id)}, }; } @@ -207,6 +226,7 @@ py_isv312() {offsetof(T, sysdict)}, {offsetof(T, builtins)}, {offsetof(T, ceval.gil)}, + {offsetof(T, id)}, }; } @@ -664,7 +684,7 @@ python_v python_v3_7 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; @@ -686,7 +706,7 @@ python_v python_v3_8 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; @@ -708,7 +728,7 @@ python_v python_v3_9 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; @@ -730,7 +750,7 @@ python_v python_v3_10 = { py_code(), py_frame(), py_thread(), - py_is(), + py_isv37(), py_runtime(), py_gc(), }; diff --git a/src/pystack/_pystack/version.h b/src/pystack/_pystack/version.h index c56851ac..d9b2b2de 100644 --- a/src/pystack/_pystack/version.h +++ b/src/pystack/_pystack/version.h @@ -241,6 +241,7 @@ struct py_is_v FieldOffset o_sysdict; FieldOffset o_builtins; FieldOffset o_gil_runtime_state; + FieldOffset o_id; }; struct py_gc_v diff --git a/src/pystack/traceback_formatter.py b/src/pystack/traceback_formatter.py index d6ca8442..6797f1e8 100644 --- a/src/pystack/traceback_formatter.py +++ b/src/pystack/traceback_formatter.py @@ -1,6 +1,7 @@ import os import sys from typing import Iterable +from typing import List from typing import Optional from .colors import colored @@ -12,9 +13,17 @@ from .types import frame_type -def print_thread(thread: PyThread, native_mode: NativeReportingMode) -> None: - for line in format_thread(thread, native_mode): - print(line, file=sys.stdout, flush=True) +def print_threads(threads: List[PyThread], native_mode: NativeReportingMode) -> None: + for i, thread in enumerate(threads): + same_tid_as_prev = i > 0 and thread.tid == threads[i - 1].tid + same_tid_as_next = i < len(threads) - 1 and thread.tid == threads[i + 1].tid + for line in format_thread( + thread, + native_mode, + continuing_from_previous=same_tid_as_prev, + continues_to_next=same_tid_as_next, + ): + print(line, file=sys.stdout, flush=True) def format_frame(frame: PyFrame) -> Iterable[str]: @@ -64,18 +73,39 @@ def _are_the_stacks_mergeable(thread: PyThread) -> bool: return n_eval_frames == n_entry_frames -def format_thread(thread: PyThread, native_mode: NativeReportingMode) -> Iterable[str]: +def format_thread( + thread: PyThread, + native_mode: NativeReportingMode, + continuing_from_previous: bool = False, + continues_to_next: bool = False, +) -> Iterable[str]: native = native_mode != NativeReportingMode.OFF current_frame: Optional[PyFrame] = thread.first_frame - if current_frame is None and not native: + if ( + current_frame is None + and not native + and not continuing_from_previous + and not continues_to_next + ): yield f"The frame stack for thread {thread.tid} is empty" return thread_name = f" ({thread.name}) " if thread.name else " " - yield ( - f"Traceback for thread {thread.tid}{thread_name}{thread.status} " - "(most recent call last):" - ) + if not continuing_from_previous: + yield ( + f"Traceback for thread {thread.tid}{thread_name}" + f"{thread.status + ' ' if not continues_to_next else ''}" + f"(most recent call last):" + ) + + if continues_to_next or continuing_from_previous: + if thread.interpreter_id is None: + interp_name = "Not attached to any interpreter" + elif thread.interpreter_id == 0: + interp_name = "In the main interpreter" + else: + interp_name = f"In interpreter {thread.interpreter_id}" + yield f" {interp_name} {thread.status}" if not (native and _are_the_stacks_mergeable(thread)): if native: @@ -88,7 +118,8 @@ def format_thread(thread: PyThread, native_mode: NativeReportingMode) -> Iterabl yield from _format_merged_stacks( thread, current_frame, native_mode == NativeReportingMode.LAST ) - yield "" + if not continues_to_next: + yield "" def _format_merged_stacks( diff --git a/src/pystack/types.py b/src/pystack/types.py index c0f72011..2b0c27f1 100644 --- a/src/pystack/types.py +++ b/src/pystack/types.py @@ -7,6 +7,8 @@ from typing import Optional from typing import Tuple +from ._pystack import is_eval_frame as _is_eval_frame + SYMBOL_IGNORELIST = { "PyObject_Call", "call_function", @@ -44,20 +46,6 @@ class FrameType(enum.Enum): OTHER = 3 -def _is_eval_frame(symbol: str, python_version: Tuple[int, int]) -> bool: - if python_version < (3, 6): - return "PyEval_EvalFrameEx" in symbol - if "_PyEval_EvalFrameDefault" in symbol: - return True - # Python 3.14 tail call interpreter uses LLVM-generated functions - if symbol.startswith("_TAIL_CALL_") and ".llvm." in symbol: - return True - # Python 3.15+ tail call interpreter drops the .llvm. suffix - if python_version >= (3, 15) and symbol.startswith("_TAIL_CALL_"): - return True - return False - - def frame_type( frame: NativeFrame, python_version: Optional[Tuple[int, int]] ) -> NativeFrame.FrameType: @@ -119,6 +107,7 @@ class PyThread: is_gc_collecting: int python_version: Optional[Tuple[int, int]] name: Optional[str] = None + interpreter_id: Optional[int] = None @property def frames(self) -> Iterable[PyFrame]: diff --git a/tests/integration/test_subinterpreters.py b/tests/integration/test_subinterpreters.py new file mode 100644 index 00000000..8cdc18e7 --- /dev/null +++ b/tests/integration/test_subinterpreters.py @@ -0,0 +1,627 @@ +import io +import subprocess +import time +from collections import Counter +from contextlib import redirect_stdout +from pathlib import Path +from typing import Dict +from typing import List +from typing import Set + +import pytest + +from pystack.engine import NativeReportingMode +from pystack.engine import StackMethod +from pystack.engine import get_process_threads +from pystack.engine import get_process_threads_for_core +from pystack.traceback_formatter import print_threads +from pystack.types import NativeFrame +from pystack.types import frame_type +from tests.utils import ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +from tests.utils import generate_core_file +from tests.utils import spawn_child_process + +NUM_INTERPRETERS = 3 +NUM_INTERPRETERS_WITH_THREADS = 2 +NUM_THREADS_PER_SUBINTERPRETER = 2 + +# Compatibility shim so test programs work on both 3.13 (_interpreters) +# and 3.14+ (concurrent.interpreters). +_INTERPRETERS_SHIM = """\ +import sys as _sys +try: + from concurrent import interpreters +except ImportError: + import _interpreters as _raw + class _W: + def __init__(self, id): + self.id = id + def exec(self, code): + _raw.exec(self.id, code) + class interpreters: + @staticmethod + def create(): + return _W(_raw.create()) + Interpreter = _W +""" + +PROGRAM = f"""\ +import sys +import threading +import time + +{_INTERPRETERS_SHIM} + +NUM_INTERPRETERS = {NUM_INTERPRETERS} + + +def start_interpreter_async(interp, code): + t = threading.Thread(target=interp.exec, args=(code,)) + t.daemon = True + t.start() + return t + + +CODE = '''\\ +import time +while True: + time.sleep(1) +''' + +threads = [] +for _ in range(NUM_INTERPRETERS): + interp = interpreters.create() + t = start_interpreter_async(interp, CODE) + threads.append(t) + +# Give sub-interpreters time to start executing +time.sleep(1) + +fifo = sys.argv[1] +with open(fifo, "w") as f: + f.write("ready") + +while True: + time.sleep(1) +""" + + +PROGRAM_WITH_THREADS = f"""\ +import sys +import threading +import time + +{_INTERPRETERS_SHIM} + +NUM_INTERPRETERS = {NUM_INTERPRETERS_WITH_THREADS} + + +def start_interpreter_async(interp, code): + t = threading.Thread(target=interp.exec, args=(code,)) + t.daemon = True + t.start() + return t + + +CODE = '''\\ +import threading +import time + +NUM_THREADS = {NUM_THREADS_PER_SUBINTERPRETER} + +def worker(): + while True: + time.sleep(1) + +threads = [] +for _ in range(NUM_THREADS): + t = threading.Thread(target=worker) + # daemon threads are disabled in isolated subinterpreters + t.start() + threads.append(t) + +while True: + time.sleep(1) +''' + +threads = [] +for _ in range(NUM_INTERPRETERS): + interp = interpreters.create() + t = start_interpreter_async(interp, CODE) + threads.append(t) + +# Give sub-interpreters and their internal workers time to start. +time.sleep(2) + +fifo = sys.argv[1] +with open(fifo, "w") as f: + f.write("ready") + +while True: + time.sleep(1) +""" + +PROGRAM_NESTED_SAME_THREAD = ( + """\ +import sys +import threading +import time + +""" + + _INTERPRETERS_SHIM + + """ +_SHIM = '''""" + + _INTERPRETERS_SHIM + + """''' + +fifo = sys.argv[1] + +interp_outer = interpreters.create() +interp_inner = interpreters.create() + +inner_code = f'''\\ +import time +with open({fifo!r}, "w") as f: + f.write("ready") +while True: + time.sleep(1) +''' +outer_code = _SHIM + f''' +interpreters.Interpreter({{inner_id}}).exec({{inner_code!r}}) +'''.format(inner_id=interp_inner.id, inner_code=inner_code) + +t = threading.Thread(target=interp_outer.exec, args=(outer_code,)) +t.daemon = True +t.start() + +while True: + time.sleep(1) +""" +) + +PROGRAM_TWO_THREADS_THREE_SUBINTERPRETERS_EACH = ( + """\ +import sys +import threading +import time +from pathlib import Path + +""" + + _INTERPRETERS_SHIM + + """ +_SHIM = '''""" + + _INTERPRETERS_SHIM + + """''' + +signal_file = Path(sys.argv[1]) + + +def make_level3_code(token): + return f'''\\ +import time +from pathlib import Path +Path({str(signal_file)!r}).open("a").write("{token}\\\\n") +while True: + time.sleep(1) +''' + + +def make_level2_code(interp3_id, level3_code): + return _SHIM + f''' +interpreters.Interpreter({interp3_id}).exec({level3_code!r}) +''' + + +def make_level1_code(interp2_id, level2_code): + return _SHIM + f''' +interpreters.Interpreter({interp2_id}).exec({level2_code!r}) +''' + + +def launch_chain(token): + interp1 = interpreters.create() + interp2 = interpreters.create() + interp3 = interpreters.create() + + level3_code = make_level3_code(token) + level2_code = make_level2_code(interp3.id, level3_code) + level1_code = make_level1_code(interp2.id, level2_code) + interp1.exec(level1_code) + + +t1 = threading.Thread(target=launch_chain, args=("chain1",), daemon=True) +t2 = threading.Thread(target=launch_chain, args=("chain2",), daemon=True) +t1.start() +t2.start() + +while True: + time.sleep(1) +""" +) + + +def _collect_threads( + python_executable: Path, + tmpdir: Path, + native_mode: NativeReportingMode = NativeReportingMode.OFF, +): + test_file = Path(str(tmpdir)) / "subinterpreters_program.py" + test_file.write_text(PROGRAM) + + with spawn_child_process( + str(python_executable), str(test_file), tmpdir + ) as child_process: + return list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=native_mode, + ) + ) + + +def _assert_interpreter_headers( + threads, + native_mode: NativeReportingMode, + interpreter_ids, +) -> str: + output = io.StringIO() + with redirect_stdout(output): + print_threads(threads, native_mode=native_mode) + + result = output.getvalue() + assert "In the main interpreter" in result + for interpreter_id in interpreter_ids: + if interpreter_id == 0: + continue + assert f"In interpreter {interpreter_id}" in result + return result + + +def _count_threads_by_interpreter(threads): + return dict( + Counter( + thread.interpreter_id + for thread in threads + if thread.interpreter_id is not None + ) + ) + + +def _interpreter_ids(threads) -> Set[int]: + return { + thread.interpreter_id for thread in threads if thread.interpreter_id is not None + } + + +def _assert_subinterpreter_coverage(threads) -> Set[int]: + interpreter_ids = _interpreter_ids(threads) + assert 0 in interpreter_ids + assert len(interpreter_ids) == NUM_INTERPRETERS + 1 + return interpreter_ids + + +def _assert_native_eval_symbols(threads) -> None: + eval_frames = [ + frame + for thread in threads + for frame in thread.native_frames + if frame_type(frame, thread.python_version) == NativeFrame.FrameType.EVAL + ] + assert eval_frames + assert all("?" not in frame.symbol for frame in eval_frames) + if any(frame.linenumber == 0 for frame in eval_frames): # pragma: no cover + assert all(frame.linenumber == 0 for frame in eval_frames) + assert all(frame.path == "???" for frame in eval_frames) + else: # pragma: no cover + assert all(frame.linenumber != 0 for frame in eval_frames) + assert any(frame.path and "?" not in frame.path for frame in eval_frames) + + +def _assert_mergeable_same_tid_groups(threads) -> bool: + groups: Dict[int, List] = {} + for thread in threads: + groups.setdefault(thread.tid, []).append(thread) + + found_shared_tid = False + for group in groups.values(): + interpreter_ids = { + thread.interpreter_id + for thread in group + if thread.interpreter_id is not None + } + if len(group) < 2 or len(interpreter_ids) < 2: + continue + found_shared_tid = True + for thread in group: + eval_frames = [ + frame + for frame in thread.native_frames + if frame_type(frame, thread.python_version) + == NativeFrame.FrameType.EVAL + ] + entry_count = sum(frame.is_entry for frame in thread.all_frames) + assert len(eval_frames) == entry_count + return found_shared_tid + + +def _shared_tid_groups_with_min_interpreters(threads, min_interpreters): + groups = {} + for thread in threads: + groups.setdefault(thread.tid, []).append(thread) + + matching = [] + for tid, group in groups.items(): + interpreter_ids = { + thread.interpreter_id + for thread in group + if thread.interpreter_id is not None + } + if len(interpreter_ids) >= min_interpreters: + matching.append((tid, group)) + return matching + + +def _assert_strict_native_eval_symbols_for_group(group) -> None: + for thread in group: + eval_frames = [ + frame + for frame in thread.native_frames + if frame_type(frame, thread.python_version) == NativeFrame.FrameType.EVAL + ] + assert eval_frames + assert all("?" not in frame.symbol for frame in eval_frames) + if any(frame.linenumber == 0 for frame in eval_frames): + assert all(frame.linenumber == 0 for frame in eval_frames) + assert all(frame.path == "???" for frame in eval_frames) + else: + assert all(frame.linenumber != 0 for frame in eval_frames) + assert any(frame.path and "?" not in frame.path for frame in eval_frames) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters(python, tmpdir): + _, python_executable = python + + threads = _collect_threads( + python_executable=python_executable, + tmpdir=tmpdir, + native_mode=NativeReportingMode.OFF, + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert all(not thread.native_frames for thread in threads) + _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.OFF, + interpreter_ids=interpreter_ids, + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +@pytest.mark.parametrize( + "native_mode", + [ + NativeReportingMode.PYTHON, + NativeReportingMode.LAST, + NativeReportingMode.ALL, + ], + ids=["python", "last", "all"], +) +def test_subinterpreters_with_native(python, tmpdir, native_mode): + _, python_executable = python + + threads = _collect_threads( + python_executable=python_executable, + tmpdir=tmpdir, + native_mode=native_mode, + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + + output = _assert_interpreter_headers( + threads=threads, + native_mode=native_mode, + interpreter_ids=interpreter_ids, + ) + assert "(C)" in output or "Unable to merge native stack" in output + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_many_threads_with_native(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_with_threads_program.py" + test_file.write_text(PROGRAM_WITH_THREADS) + + with spawn_child_process(python_executable, test_file, tmpdir) as child_process: + threads = list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=NativeReportingMode.PYTHON, + method=StackMethod.DEBUG_OFFSETS, + ) + ) + + interpreter_ids = _interpreter_ids(threads) + assert 0 in interpreter_ids + assert len(interpreter_ids) == NUM_INTERPRETERS_WITH_THREADS + 1 + + counts_by_interpreter = _count_threads_by_interpreter(threads) + assert all( + counts_by_interpreter.get(interpreter_id, 0) >= 1 + for interpreter_id in interpreter_ids + ) + # At least one sub-interpreter should expose multiple Python threads. + assert any( + count > 1 + for interpreter_id, count in counts_by_interpreter.items() + if interpreter_id != 0 + ) + + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_nested_same_thread_with_native(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_nested_same_thread.py" + test_file.write_text(PROGRAM_NESTED_SAME_THREAD) + + with spawn_child_process(python_executable, test_file, tmpdir) as child_process: + threads = list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=NativeReportingMode.PYTHON, + method=StackMethod.DEBUG_OFFSETS, + ) + ) + + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + + has_shared_tid = _assert_mergeable_same_tid_groups(threads) + assert has_shared_tid + + output = _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.PYTHON, + interpreter_ids=_interpreter_ids(threads), + ) + assert ( + "Unable to merge native stack due to insufficient native information" + not in output + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_two_threads_three_per_thread_with_native(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_two_threads_three_each.py" + signal_file = Path(str(tmpdir)) / "subinterpreters_ready.txt" + signal_file.write_text("") + test_file.write_text(PROGRAM_TWO_THREADS_THREE_SUBINTERPRETERS_EACH) + + with subprocess.Popen( + [str(python_executable), str(test_file), str(signal_file)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) as child_process: + deadline = time.time() + 10 + while time.time() < deadline: + lines = [line for line in signal_file.read_text().splitlines() if line] + if len(lines) >= 2: + break + time.sleep(0.1) + else: + child_process.terminate() + child_process.kill() + raise AssertionError("Timed out waiting for nested subinterpreter chains") + + threads = list( + get_process_threads( + child_process.pid, + stop_process=True, + native_mode=NativeReportingMode.PYTHON, + method=StackMethod.DEBUG_OFFSETS, + ) + ) + + child_process.terminate() + child_process.kill() + child_process.wait(timeout=5) + + groups = _shared_tid_groups_with_min_interpreters(threads, min_interpreters=3) + assert len(groups) >= 2 + + for _, group in groups: + _assert_strict_native_eval_symbols_for_group(group) + for thread in group: + eval_frames = [ + frame + for frame in thread.native_frames + if frame_type(frame, thread.python_version) + == NativeFrame.FrameType.EVAL + ] + entry_count = sum(frame.is_entry for frame in thread.all_frames) + assert len(eval_frames) == entry_count + assert len(eval_frames) > 0 + + output = _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.PYTHON, + interpreter_ids=_interpreter_ids(threads), + ) + assert ( + "Unable to merge native stack due to insufficient native information" + not in output + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +def test_subinterpreters_for_core(python, tmpdir): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_program.py" + test_file.write_text(PROGRAM) + + with generate_core_file(python_executable, test_file, tmpdir) as core_file: + threads = list( + get_process_threads_for_core( + core_file, + python_executable, + native_mode=NativeReportingMode.OFF, + ) + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert all(not thread.native_frames for thread in threads) + _assert_interpreter_headers( + threads=threads, + native_mode=NativeReportingMode.OFF, + interpreter_ids=interpreter_ids, + ) + + +@ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS +@pytest.mark.parametrize( + "native_mode", + [ + NativeReportingMode.PYTHON, + NativeReportingMode.LAST, + NativeReportingMode.ALL, + ], + ids=["python", "last", "all"], +) +def test_subinterpreters_for_core_with_native(python, tmpdir, native_mode): + _, python_executable = python + + test_file = Path(str(tmpdir)) / "subinterpreters_program.py" + test_file.write_text(PROGRAM) + + with generate_core_file(python_executable, test_file, tmpdir) as core_file: + threads = list( + get_process_threads_for_core( + core_file, + python_executable, + native_mode=native_mode, + ) + ) + + interpreter_ids = _assert_subinterpreter_coverage(threads) + assert any(thread.native_frames for thread in threads) + _assert_native_eval_symbols(threads) + output = _assert_interpreter_headers( + threads=threads, + native_mode=native_mode, + interpreter_ids=interpreter_ids, + ) + assert "(C)" in output or "Unable to merge native stack" in output diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index cbab172b..d8e9d01c 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -2,7 +2,6 @@ from pathlib import Path from textwrap import dedent from unittest.mock import Mock -from unittest.mock import call from unittest.mock import mock_open from unittest.mock import patch @@ -190,7 +189,7 @@ def test_process_remote_default(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -205,9 +204,7 @@ def test_process_remote_default(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_remote_no_block(): @@ -221,7 +218,7 @@ def test_process_remote_no_block(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -236,9 +233,7 @@ def test_process_remote_no_block(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) @pytest.mark.parametrize( @@ -260,7 +255,7 @@ def test_process_remote_native(argument, mode): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -275,7 +270,7 @@ def test_process_remote_native(argument, mode): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [call(thread, mode) for thread in threads] + print_threads_mock.assert_called_once_with(threads, mode) def test_process_remote_locals(): @@ -289,7 +284,7 @@ def test_process_remote_locals(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -304,9 +299,7 @@ def test_process_remote_locals(): locals=True, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_remote_native_no_block(capsys): @@ -320,7 +313,7 @@ def test_process_remote_native_no_block(capsys): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -330,7 +323,7 @@ def test_process_remote_native_no_block(capsys): main() get_process_threads_mock.assert_not_called() - print_thread_mock.assert_not_called() + print_threads_mock.assert_not_called() def test_process_remote_exhaustive(): @@ -344,7 +337,7 @@ def test_process_remote_exhaustive(): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), ): get_process_threads_mock.return_value = threads @@ -359,9 +352,7 @@ def test_process_remote_exhaustive(): locals=False, method=StackMethod.ALL, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) @pytest.mark.parametrize( @@ -376,7 +367,7 @@ def test_process_remote_error(exception, exval, capsys): with ( patch("pystack.__main__.get_process_threads") as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), ): @@ -388,7 +379,7 @@ def test_process_remote_error(exception, exval, capsys): # THEN get_process_threads_mock.assert_called_once() - print_thread_mock.assert_not_called() + print_threads_mock.assert_not_called() capture = capsys.readouterr() assert "Oh no!" in capture.err @@ -406,7 +397,7 @@ def test_process_core_default_without_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.is_elf", return_value=True), @@ -429,9 +420,7 @@ def test_process_core_default_without_executable(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_default_gzip_without_executable(): @@ -453,7 +442,7 @@ def test_process_core_default_gzip_without_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.is_elf", return_value=True), @@ -479,9 +468,7 @@ def test_process_core_default_gzip_without_executable(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) gzip_open_mock.assert_called_with(Path("corefile.gz"), "rb") @@ -551,7 +538,7 @@ def test_process_core_default_with_executable(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -571,9 +558,7 @@ def test_process_core_default_with_executable(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) @pytest.mark.parametrize( @@ -597,7 +582,7 @@ def test_process_core_native(argument, mode): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -617,7 +602,7 @@ def test_process_core_native(argument, mode): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [call(thread, mode) for thread in threads] + print_threads_mock.assert_called_once_with(threads, mode) def test_process_core_locals(): @@ -633,7 +618,7 @@ def test_process_core_locals(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -653,9 +638,7 @@ def test_process_core_locals(): locals=True, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_with_search_path(): @@ -678,7 +661,7 @@ def test_process_core_with_search_path(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -698,9 +681,7 @@ def test_process_core_with_search_path(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_with_search_root(): @@ -716,7 +697,7 @@ def test_process_core_with_search_root(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -747,9 +728,7 @@ def test_process_core_with_search_root(): locals=False, method=StackMethod.AUTO, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_process_core_with_not_readable_search_root(): @@ -761,7 +740,7 @@ def test_process_core_with_not_readable_search_root(): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -786,7 +765,7 @@ def test_process_core_with_invalid_search_root(): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -814,7 +793,7 @@ def path_exists(what): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch.object(Path, "exists", path_exists), ): @@ -826,7 +805,7 @@ def path_exists(what): # THEN get_process_threads_mock.assert_not_called() - print_thread_mock.assert_not_called() + print_threads_mock.assert_not_called() def test_process_core_executable_does_not_exit(): @@ -845,7 +824,7 @@ def does_exit(what): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), patch.object(Path, "exists", does_exit), @@ -857,7 +836,7 @@ def does_exit(what): # THEN get_process_threads_mock.assert_not_called() - print_thread_mock.assert_not_called() + print_threads_mock.assert_not_called() @pytest.mark.parametrize( @@ -874,7 +853,7 @@ def test_process_core_error(exception, exval, capsys): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -890,7 +869,7 @@ def test_process_core_error(exception, exval, capsys): # THEN get_process_threads_mock.assert_called_once() - print_thread_mock.assert_not_called() + print_threads_mock.assert_not_called() capture = capsys.readouterr() assert "Oh no!" in capture.err @@ -907,7 +886,7 @@ def test_process_core_exhaustive(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread") as print_thread_mock, + patch("pystack.__main__.print_threads") as print_threads_mock, patch("sys.argv", argv), patch("pathlib.Path.exists", return_value=True), patch("pystack.__main__.CoreFileAnalyzer"), @@ -927,9 +906,7 @@ def test_process_core_exhaustive(): locals=False, method=StackMethod.ALL, ) - assert print_thread_mock.mock_calls == [ - call(thread, NativeReportingMode.OFF) for thread in threads - ] + print_threads_mock.assert_called_once_with(threads, NativeReportingMode.OFF) def test_default_colored_output(): @@ -942,7 +919,7 @@ def test_default_colored_output(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -963,7 +940,7 @@ def test_nocolor_output(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -984,7 +961,7 @@ def test_nocolor_output_at_the_front_for_process(): with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), ): @@ -1004,7 +981,7 @@ def test_nocolor_output_at_the_front_for_core(): # WHEN with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1029,7 +1006,7 @@ def test_global_options_can_be_placed_at_any_point(option): # WHEN with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1051,7 +1028,7 @@ def test_verbose_as_global_options_sets_correctly_the_logger(): # WHEN with ( patch("pystack.__main__.get_process_threads"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("sys.argv", argv), patch("os.environ", environ), patch("pathlib.Path.exists", return_value=True), @@ -1198,7 +1175,7 @@ def test_process_core_does_not_crash_if_core_analyzer_fails(method): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1223,7 +1200,7 @@ def test_core_file_missing_modules_are_logged(caplog, native): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1254,7 +1231,7 @@ def test_core_file_missing_build_ids_are_logged(caplog, native): with ( patch("pystack.__main__.get_process_threads_for_core"), - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", return_value=True), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), @@ -1294,7 +1271,7 @@ def test_executable_is_not_elf_uses_the_first_map(): patch( "pystack.__main__.get_process_threads_for_core" ) as get_process_threads_mock, - patch("pystack.__main__.print_thread"), + patch("pystack.__main__.print_threads"), patch("pystack.__main__.is_elf", lambda x: x == real_executable), patch("pystack.__main__.is_gzip", return_value=False), patch("sys.argv", argv), diff --git a/tests/unit/test_normalize_threads.py b/tests/unit/test_normalize_threads.py new file mode 100644 index 00000000..457e6a83 --- /dev/null +++ b/tests/unit/test_normalize_threads.py @@ -0,0 +1,197 @@ +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Tuple + +from pystack._pystack import NativeReportingMode +from pystack._pystack import _normalize_threads_for_testing + +EVAL = "_PyEval_EvalFrameDefault" +PY_VERSION = (3, 13) + + +def _make_thread( + tid: int, + *, + stack_anchor: int = 0, + interpreter_id: int = 0, + native_symbols: Optional[List[str]] = None, + frames: Optional[List[Tuple[str, bool]]] = None, +) -> Dict[str, Any]: + return dict( + tid=tid, + stack_anchor=stack_anchor, + interpreter_id=interpreter_id, + native_symbols=(native_symbols or []), + frames=(frames or []), + python_version=PY_VERSION, + ) + + +def test_unique_tids_pass_through_in_order(): + threads = [ + _make_thread( + 3, + native_symbols=["alpha", EVAL, "beta"], + frames=[("main", True)], + ), + _make_thread( + 1, + native_symbols=["gamma", EVAL, "delta"], + frames=[("run", True)], + ), + _make_thread( + 2, + native_symbols=["epsilon"], + frames=[("work", True)], + ), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.PYTHON) + + assert len(result) == 3 + assert [t.tid for t in result] == [3, 1, 2] + assert [f.symbol for f in result[0].native_frames] == ["alpha", EVAL, "beta"] + assert [f.symbol for f in result[1].native_frames] == ["gamma", EVAL, "delta"] + assert [f.symbol for f in result[2].native_frames] == ["epsilon"] + assert [f.code.scope for f in result[0].frames] == ["main"] + assert [f.code.scope for f in result[1].frames] == ["run"] + assert [f.code.scope for f in result[2].frames] == ["work"] + + +def test_first_seen_tid_order_preserved(): + threads = [ + _make_thread(10, interpreter_id=0, stack_anchor=1000, frames=[("a", True)]), + _make_thread(20, interpreter_id=0, stack_anchor=2000, frames=[("b", True)]), + _make_thread(20, interpreter_id=1, stack_anchor=1500, frames=[("d", True)]), + _make_thread(10, interpreter_id=1, stack_anchor=500, frames=[("c", True)]), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.OFF) + + assert [t.tid for t in result] == [10, 10, 20, 20] + assert [t.interpreter_id for t in result] == [0, 1, 0, 1] + assert [f.code.scope for f in result[0].frames] == ["a"] + assert [f.code.scope for f in result[1].frames] == ["c"] + assert [f.code.scope for f in result[2].frames] == ["b"] + assert [f.code.scope for f in result[3].frames] == ["d"] + + +def test_stack_anchor_sort_within_group(): + threads = [ + _make_thread(1, interpreter_id=2, stack_anchor=0, frames=[("inner", True)]), + _make_thread(1, interpreter_id=0, stack_anchor=9000, frames=[("outer", True)]), + _make_thread(1, interpreter_id=1, stack_anchor=5000, frames=[("middle", True)]), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.OFF) + + assert len(result) == 3 + assert [t.interpreter_id for t in result] == [0, 1, 2] + assert [f.code.scope for f in result[0].frames] == ["outer"] + assert [f.code.scope for f in result[1].frames] == ["middle"] + assert [f.code.scope for f in result[2].frames] == ["inner"] + + +def test_native_slice_correctness(): + native_symbols = [ + "outer_c_func", + EVAL, + "middle_c_func_a", + "middle_c_func_b", + EVAL, + "inner_c_func", + EVAL, + ] + threads = [ + _make_thread( + 1, + interpreter_id=0, + stack_anchor=9000, + native_symbols=native_symbols, + frames=[("helper", False), ("main", True)], + ), + _make_thread( + 1, + interpreter_id=1, + stack_anchor=5000, + native_symbols=native_symbols, + frames=[("run", True)], + ), + _make_thread( + 1, + interpreter_id=2, + stack_anchor=1000, + native_symbols=native_symbols, + frames=[("work", True)], + ), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.PYTHON) + + assert len(result) == 3 + assert result[0].interpreter_id == 0 + assert result[1].interpreter_id == 1 + assert result[2].interpreter_id == 2 + + syms0 = [f.symbol for f in result[0].native_frames] + syms1 = [f.symbol for f in result[1].native_frames] + syms2 = [f.symbol for f in result[2].native_frames] + + assert syms0 == ["outer_c_func", EVAL, "middle_c_func_a", "middle_c_func_b"] + assert syms1 == [EVAL, "inner_c_func"] + assert syms2 == [EVAL] + + assert syms0 + syms1 + syms2 == native_symbols + + assert [f.code.scope for f in result[0].frames] == ["main", "helper"] + assert [f.code.scope for f in result[1].frames] == ["run"] + assert [f.code.scope for f in result[2].frames] == ["work"] + + +def test_middle_interpreter_no_frames_gets_native_cleared(): + native_symbols = [ + "setup", + EVAL, + "bridge", + EVAL, + ] + threads = [ + _make_thread( + 1, + interpreter_id=0, + stack_anchor=9000, + native_symbols=native_symbols, + frames=[("outer", True)], + ), + _make_thread( + 1, + interpreter_id=1, + stack_anchor=5000, + native_symbols=native_symbols, + frames=[], + ), + _make_thread( + 1, + interpreter_id=2, + stack_anchor=1000, + native_symbols=native_symbols, + frames=[("inner", True)], + ), + ] + + result = _normalize_threads_for_testing(threads, NativeReportingMode.PYTHON) + + assert len(result) == 3 + assert result[0].interpreter_id == 0 + assert result[1].interpreter_id == 1 + assert result[2].interpreter_id == 2 + + assert [f.symbol for f in result[0].native_frames] == ["setup", EVAL, "bridge"] + assert [f.symbol for f in result[1].native_frames] == [] + assert [f.symbol for f in result[2].native_frames] == [EVAL] + + assert [f.code.scope for f in result[0].frames] == ["outer"] + assert [f.code.scope for f in result[1].frames] == [] + assert [f.code.scope for f in result[2].frames] == ["inner"] diff --git a/tests/unit/test_traceback_formatter.py b/tests/unit/test_traceback_formatter.py index 7ff3b276..5bad8082 100644 --- a/tests/unit/test_traceback_formatter.py +++ b/tests/unit/test_traceback_formatter.py @@ -5,7 +5,7 @@ from pystack.engine import NativeReportingMode from pystack.traceback_formatter import format_thread -from pystack.traceback_formatter import print_thread +from pystack.traceback_formatter import print_threads from pystack.types import SYMBOL_IGNORELIST from pystack.types import LocationInfo from pystack.types import NativeFrame @@ -1268,7 +1268,7 @@ def test_print_thread(capsys): "pystack.traceback_formatter.format_thread", return_value=("1", "2", "3"), ): - print_thread(thread, NativeReportingMode.OFF) + print_threads([thread], NativeReportingMode.OFF) # THEN diff --git a/tests/utils.py b/tests/utils.py index cdaa2f04..e779989a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -314,6 +314,13 @@ def all_pystack_combinations(corefile=False, native=False): ) +ALL_PYTHONS_THAT_SUPPORT_SUBINTERPRETERS = pytest.mark.parametrize( + "python", + [python[:2] for python in AVAILABLE_PYTHONS if python.version >= (3, 13)], + ids=[python[1].name for python in AVAILABLE_PYTHONS if python.version >= (3, 13)], +) + + def all_pythons_since(major: int, minor: int): return pytest.mark.parametrize( "python",