From 10651993bc98ecafe6c6de8eb97695696fa0021b Mon Sep 17 00:00:00 2001 From: Dongdong Tao Date: Wed, 27 May 2026 16:33:39 +0900 Subject: [PATCH] osdtrace: add --id option to trace by OSD ID, drop -o Replaces -o (userspace post-filter) with --id that resolves OSD IDs to PIDs via discovery and attaches uprobes only to those PIDs. Mutually exclusive with -p; errors on unknown / ambiguous / negative IDs. Tests: - tests/test-id-option.sh: standalone negative-path checks (no cluster). - tests/functional-test-microceph.sh: parallel --id 2 run alongside -p, asserted via new verify_osdtrace_targets_only helper. --- src/osdtrace.cc | 78 ++++++++++++++++------ tests/functional-test-microceph.sh | 42 +++++++++++- tests/lib/verify-trace-output.sh | 42 ++++++++++++ tests/test-id-option.sh | 100 +++++++++++++++++++++++++++++ 4 files changed, 241 insertions(+), 21 deletions(-) create mode 100755 tests/test-id-option.sh diff --git a/src/osdtrace.cc b/src/osdtrace.cc index 64ff8bb..c2a573d 100644 --- a/src/osdtrace.cc +++ b/src/osdtrace.cc @@ -213,7 +213,6 @@ static __u64 bootstamp = 0; __u64 threshold = 0; //in millisecond int timeout = -1; //in seconds -int probe_osdid = -1; volatile sig_atomic_t timeout_occurred = 0; @@ -983,24 +982,21 @@ static int handle_event(void *ctx, void *data, size_t size) { pid = val->pid; osd_id = osd_pid_to_id(pid); - if (probe_osdid == -1 || probe_osdid == osd_id) { - if (probe_mode == OP_SINGLE_PROBE) { - handle_single(val, osd_id); - } else if (probe_mode & OP_FULL_PROBE) { - if (mode == MODE_AVG) { - clog << "avg mode needs to be refined" << endl; - //handle_avg(val, osd_id); - } else if (mode == MODE_ALL){ - handle_full(val, osd_id); - } + if (probe_mode == OP_SINGLE_PROBE) { + handle_single(val, osd_id); + } else if (probe_mode & OP_FULL_PROBE) { + if (mode == MODE_AVG) { + clog << "avg mode needs to be refined" << endl; + //handle_avg(val, osd_id); + } else if (mode == MODE_ALL){ + handle_full(val, osd_id); } } } else if (is_bluestore_event && (probe_mode & BLUESTORE_PROBE)) { struct bluestore_lat_v *val = (struct bluestore_lat_v *) data; pid = val->pid; osd_id = osd_pid_to_id(pid); - if (probe_osdid == -1 || probe_osdid == osd_id) - handle_bluestore(val, osd_id); + handle_bluestore(val, osd_id); } if (!exists(osd_id)) { @@ -1112,11 +1108,13 @@ void print_discovered_osds(const std::vector& processes) { } std::set process_ids; // Support multiple PIDs (set ensures deduplication) +std::set requested_osd_ids; // Populated by --id; resolved to PIDs in main() int parse_args(int argc, char **argv) { static struct option long_options[] = { {"skip-version-check", no_argument, 0, 0}, {"version", no_argument, 0, 'V'}, {"list", no_argument, 0, 0}, + {"id", required_argument, 0, 0}, {0, 0, 0, 0} }; @@ -1125,7 +1123,7 @@ int parse_args(int argc, char **argv) { // it in a plain char is unsafe on platforms where char is unsigned by // default (the `!= -1` test can then loop forever). Match kfstrace. int opt; - while ((opt = getopt_long(argc, argv, ":d:m:t:o:xbj:i:l:p:V", long_options, &option_index)) != -1) { + while ((opt = getopt_long(argc, argv, ":d:m:t:xbj:i:l:p:V", long_options, &option_index)) != -1) { switch (opt) { case 0: // Handle long options @@ -1133,6 +1131,22 @@ int parse_args(int argc, char **argv) { skip_version_check = true; } else if (strcmp(long_options[option_index].name, "list") == 0) { list_only = true; + } else if (strcmp(long_options[option_index].name, "id") == 0) { + std::stringstream ss(optarg); + std::string token; + while (std::getline(ss, token, ',')) { + try { + int id = stoi(token); + if (id < 0) { + std::cerr << "Invalid --id value (must be non-negative): " << token << std::endl; + return -1; + } + requested_osd_ids.insert(id); + } catch (...) { + std::cerr << "Invalid --id value: " << token << std::endl; + return -1; + } + } } break; case 'V': @@ -1160,9 +1174,6 @@ int parse_args(int argc, char **argv) { case 'b': probe_mode |= BLUESTORE_PROBE; break; - case 'o': - probe_osdid = stoi(optarg); - break; case 'j': export_json = true; json_output_file = optarg; @@ -1198,17 +1209,17 @@ int parse_args(int argc, char **argv) { break; case '?': case 'h': - std::cout << "Usage: " << argv[0] << " [-d ] [-m ] [-l ] [-o ] [-x] [-b] [-j] [-i ] [-t ] [-p ] [--skip-version-check] [--list]\n"; + std::cout << "Usage: " << argv[0] << " [-d ] [-m ] [-l ] [-x] [-b] [-j] [-i ] [-t ] [-p ] [--id ] [--skip-version-check] [--list]\n"; std::cout << " -d Set probe duration in seconds to calculate average latency\n"; std::cout << " -m Set operation latency collection mode\n"; std::cout << " -l Set operation latency threshold to capture\n"; - std::cout << " -o Only probe a specific OSD\n"; std::cout << " -x Set probe mode to Full OPs. See below for details\n"; std::cout << " -b Set probe mode to Bluestore. See below for details\n"; std::cout << " -j Export DWARF info to JSON file\n"; std::cout << " -i Import DWARF info from JSON file\n"; std::cout << " -t Set execution timeout in seconds\n"; std::cout << " -p Probe using Process IDs (comma-separated, mandatory for tracing containerized processes)\n"; + std::cout << " --id Probe by OSD ID (comma-separated; resolves to PIDs via discovery)\n"; std::cout << " --skip-version-check Skip version check when importing DWARF JSON (currently needed for containers)\n"; std::cout << " --list List active ceph-osd processes on the host, their PIDs and OSD IDs, and exit\n"; std::cout << " -V, --version Print version information and exit\n"; @@ -1380,6 +1391,35 @@ int main(int argc, char **argv) { return 0; } + // Resolve --id to PIDs via discovery and feed into process_ids. + if (!requested_osd_ids.empty()) { + if (!process_ids.empty()) { + std::cerr << "Error: --id and -p are mutually exclusive" << std::endl; + return 1; + } + auto processes = discover_ceph_osd_processes(); + for (int want : requested_osd_ids) { + std::vector matches; + for (const auto& p : processes) { + if (p.osd_id == want) matches.push_back(p.pid); + } + if (matches.empty()) { + std::cerr << "Error: no running ceph-osd process found with OSD ID " + << want << " (try --list)" << std::endl; + return 1; + } + if (matches.size() > 1) { + std::cerr << "Error: OSD ID " << want << " matched multiple PIDs ("; + for (size_t i = 0; i < matches.size(); ++i) + std::cerr << (i ? "," : "") << matches[i]; + std::cerr << "); use -p explicitly" << std::endl; + return 1; + } + process_ids.insert(matches[0]); + clog << "--id " << want << " resolved to PID " << matches[0] << endl; + } + } + // Validate all process_ids if specified for (int pid : process_ids) { std::string proc_path = "/proc/" + std::to_string(pid); diff --git a/tests/functional-test-microceph.sh b/tests/functional-test-microceph.sh index ff7b5da..d20297d 100755 --- a/tests/functional-test-microceph.sh +++ b/tests/functional-test-microceph.sh @@ -23,8 +23,14 @@ echo "=== MicroCeph Functional Test for osdtrace and radostrace ===" echo "Project root: $PROJECT_ROOT" OSDTRACE_LOG="/tmp/osdtrace.log" +OSDTRACE_ID_LOG="/tmp/osdtrace-id.log" RADOSTRACE_LOG="/tmp/radostrace.log" +# OSD id targeted by the --id-based osdtrace run. Must differ from the OSD +# the -p-based run attaches to (osd.1) so the two runs exercise distinct +# ceph-osd PIDs; microceph creates osd.0/osd.1/osd.2 in this test's setup. +TARGET_OSD_ID=2 + # Cleanup function cleanup() { info "=== Cleanup ===" @@ -39,6 +45,12 @@ cleanup() { info " === END of OSD trace === " fi + if [[ -e $OSDTRACE_ID_LOG ]]; then + info "OSD trace (--id $TARGET_OSD_ID) output:" + cat $OSDTRACE_ID_LOG + info " === END of OSD trace (--id) === " + fi + if [[ -e $RADOSTRACE_LOG ]]; then info "RADOS trace output:" cat $RADOSTRACE_LOG @@ -46,7 +58,7 @@ cleanup() { fi # Remove test files - rm -f $OSDTRACE_LOG $RADOSTRACE_LOG + rm -f $OSDTRACE_LOG $OSDTRACE_ID_LOG $RADOSTRACE_LOG # Remove test RBD resources microceph.rbd rm test_pool/testimage 2>/dev/null || true @@ -165,6 +177,17 @@ OSDTRACE_PID=$(pidof osdtrace) info "Started osdtrace with PID $OSDTRACE_PID" sleep 3 +info "=== Step 6b: Start a second osdtrace targeting OSD $TARGET_OSD_ID via --id ===" +# Two osdtrace processes can attach uprobes to disjoint PIDs without +# interference (each owns its own BPF maps/ringbuf). This run validates +# the --id resolver: it must enumerate ceph-osd processes, map OSD ID +# $TARGET_OSD_ID to its PID, and attach to *only* that PID — which the +# verifier then proves by checking that every captured row carries +# osd_id=$TARGET_OSD_ID (using verify_osdtrace_targets_only). +timeout 30 $PROJECT_ROOT/osdtrace -i $OSD_DWARF --id $TARGET_OSD_ID --skip-version-check -x >$OSDTRACE_ID_LOG 2>&1 & +OSDTRACE_ID_BG_PID=$! +sleep 3 + info "=== Step 7: Generate I/O traffic via rbd bench ===" # Random 2 MiB read-write mix via the snap-confined rbd bench — runs # inside the microceph snap so it picks up the bundled librbd/librados @@ -226,12 +249,27 @@ info "=== Step 11: Verify osdtrace output ===" # row total is several hundred. verify_osdtrace_output "$OSDTRACE_LOG" "$TEST_POOL_ID" "$MAX_OSD_ID" "$TOT_PG" 50 +info "=== Step 11b: Verify --id-based osdtrace output ===" +# Anchor the resolver: stdout/stderr of the --id run must report the +# OSD id → PID mapping. Then run the standard per-row invariant check +# (lower min_rows: a single OSD sees a fraction of the bench traffic, +# but still hundreds of subop_w + op_r rows over 20 s). Finally pin +# the targets-only invariant: every captured row's osd_id must equal +# $TARGET_OSD_ID, which is the only thing that proves --id attached +# to *that* PID and no others. +if ! grep -q "^--id $TARGET_OSD_ID resolved to PID " "$OSDTRACE_ID_LOG"; then + err "osdtrace --id $TARGET_OSD_ID log missing 'resolved to PID' line" + exit 1 +fi +verify_osdtrace_output "$OSDTRACE_ID_LOG" "$TEST_POOL_ID" "$MAX_OSD_ID" "$TOT_PG" 10 +verify_osdtrace_targets_only "$OSDTRACE_ID_LOG" "$TARGET_OSD_ID" + info "=== Step 12: Verify radostrace output ===" verify_radostrace_output "$RADOSTRACE_LOG" "$TEST_POOL_ID" "$MAX_OSD_ID" 50 info "=== Test Summary ===" info "✓ MicroCeph cluster deployed successfully" -info "✓ osdtrace and radostrace output validated" +info "✓ osdtrace (-p and --id) and radostrace output validated" info "✓ All functional tests passed!" exit 0 diff --git a/tests/lib/verify-trace-output.sh b/tests/lib/verify-trace-output.sh index 4296277..3f6419c 100644 --- a/tests/lib/verify-trace-output.sh +++ b/tests/lib/verify-trace-output.sh @@ -468,6 +468,48 @@ _verify_radostrace_output_impl() { } +# verify_osdtrace_targets_only [target_osd_id ...] +# +# Anchors --id semantics: assert the set of distinct osd_id values appearing +# in 's data rows is a subset of the given targets. Reuses +# _osdtrace_rows so it shares the same NF/landmark-based row parser as the +# main verifier (truncated tail rows are filtered upstream). +# +# Fails if: +# - any data row carries an osd_id outside the target set (proves uprobes +# attached to a process the user didn't ask for), or +# - no data rows were captured at all (proves the --id resolution → attach +# path produced *something*; the looser min_rows check is left to +# verify_osdtrace_output). +verify_osdtrace_targets_only() { + local log=$1 + shift + local -A want=() + local id + for id in "$@"; do + want[$id]=1 + done + + local total=0 + local op_type osd_id _rest + while IFS='|' read -r op_type osd_id _rest; do + [ -z "$op_type" ] && continue + total=$((total + 1)) + if [ -z "${want[$osd_id]:-}" ]; then + err "osdtrace row carries osd_id=$osd_id, expected one of: $* (log=$log)" + return 1 + fi + done < <(_osdtrace_rows "$log") + + if (( total == 0 )); then + err "osdtrace log $log has zero data rows; --id resolution may have failed silently" + return 1 + fi + + info "✓ osdtrace targets-only check passed ($total rows, all in {$*})" +} + + # verify_osdtrace_rgw_output # # Variant of verify_osdtrace_output for RGW-driven workloads (S3 PUT/GET via diff --git a/tests/test-id-option.sh b/tests/test-id-option.sh new file mode 100755 index 0000000..c332058 --- /dev/null +++ b/tests/test-id-option.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# +# Negative-path / argument-parsing test for osdtrace's --id option. +# Validates the error branches that don't require a running ceph cluster: +# +# - --id with a non-existent OSD ID fails with "no running ceph-osd +# process found" +# - --id combined with -p fails with "mutually exclusive" +# - --id with a non-numeric value fails with "Invalid --id value" +# - --id with a negative value fails with "must be non-negative" +# - the removed -o option is rejected by getopt +# +# Exit codes from `parse_args() < 0` get converted to `return 0` inside +# osdtrace's `main()`, so we anchor on stderr substrings rather than the +# numeric exit code for parse-time errors. For main()-time errors (--id +# resolution, --id/-p conflict) the binary exits 1 — those we check both. + +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +OSDTRACE="$PROJECT_ROOT/osdtrace" + +# shellcheck source=lib/log.sh +source "$SCRIPT_DIR/lib/log.sh" + +if [ ! -x "$OSDTRACE" ]; then + err "osdtrace binary not found at $OSDTRACE (build with 'make osdtrace')" + exit 1 +fi + +pass_count=0 +fail_count=0 + +# expect_stderr -- +expect_stderr() { + local name=$1 needle=$2 want_exit=$3 + shift 3 + [ "$1" = "--" ] && shift + + local out + local rc=0 + out=$("$OSDTRACE" "$@" 2>&1) || rc=$? + + local ok=1 + if [ "$want_exit" != "-1" ] && [ "$rc" -ne "$want_exit" ]; then + err "[$name] expected exit $want_exit, got $rc" + ok=0 + fi + if ! grep -qF -- "$needle" <<< "$out"; then + err "[$name] expected stderr/stdout substring not found: $needle" + err "----- actual output -----" + echo "$out" >&2 + err "-------------------------" + ok=0 + fi + + if (( ok )); then + info "[$name] PASS" + pass_count=$((pass_count + 1)) + else + fail_count=$((fail_count + 1)) + fi +} + +# Picking an OSD ID that is essentially never going to exist on a CI host, +# so the "not found" branch fires deterministically regardless of whether +# the test host happens to have a Ceph OSD running. +NONEXISTENT_OSD_ID=2147483600 + +info "=== osdtrace --id option negative-path tests ===" + +expect_stderr "not_found" \ + "no running ceph-osd process found with OSD ID $NONEXISTENT_OSD_ID" \ + 1 -- --id "$NONEXISTENT_OSD_ID" + +expect_stderr "conflict_with_-p" \ + "--id and -p are mutually exclusive" \ + 1 -- --id "$NONEXISTENT_OSD_ID" -p 1 + +expect_stderr "non_numeric" \ + "Invalid --id value: abc" \ + -1 -- --id abc + +expect_stderr "negative" \ + "Invalid --id value (must be non-negative): -1" \ + -1 -- --id -1 + +# -o was removed when --id replaced it; getopt should print the standard +# `invalid option` diagnostic. Anchor on `?` being treated as the help +# fall-through (which prints the usage banner that starts with "Usage:"). +expect_stderr "o_option_removed" \ + "Usage:" \ + -1 -- -o 0 + +info "=== Summary: $pass_count passed, $fail_count failed ===" +if (( fail_count > 0 )); then + exit 1 +fi +exit 0