diff --git a/CLAUDE.md b/CLAUDE.md index 0bf92db..7dafff4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,6 +15,7 @@ cargo fmt --all -- --check # Check formatting ``` Each change should check tests and formatting, and if necessary fix. +Also each change should write app tests using new functionality. ## Commiting changes diff --git a/README.md b/README.md index c54569d..1375531 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,19 @@ The built binary will be in target/release/pgweasel. Here is a list of currently implemented commands +### Global options + +These flags work with any subcommand: + +| Flag | Short | Description | +|------|-------|-------------| +| `--begin` | `-b` | Show entries from a given time or relative interval (e.g. `10m`, `2025-05-21 12:00:00`) | +| `--end` | `-e` | Show entries up to a given time | +| `--mask` | `-m` | Timestamp prefix mask — show all events matching the prefix | +| `--context` | `-C` | Show NUM records before **and** after each matching record | +| `--before-context` | `-B` | Show NUM records before each matching record | +| `--after-context` | `-A` | Show NUM records after each matching record | + ### errors [ err | errs ] - [x] `pgweasel errors $LOG(s)` - Show WARN+ (by default) log entries "as is" @@ -95,11 +108,17 @@ Here is a list of currently implemented commands ### connections -- [x] `pgweasel connections ./tests/files/azure_connections.log` Show connections counts by total, db, user, application name. Assumes log_connections enabled +- [x] `pgweasel connections ./tests/files/azure_connections.log` Show connections counts by total, db, user, application name. Assumes log_connections enabled. Output is sorted by count descending. ### grep -For grep I would recommend using grep cli - ripgrep +- [x] `pgweasel grep "deadlock" $LOG(s)` Show log records containing the search term (case-insensitive) + +- [x] `pgweasel -A 2 grep "deadlock" $LOG(s)` Show matching records plus 2 records after each match + +- [x] `pgweasel -B 1 grep "FATAL" $LOG(s)` Show matching records plus 1 record before each match + +- [x] `pgweasel -C 2 grep "checkpoint" $LOG(s)` Show matching records plus 2 records on both sides ## Contributing diff --git a/src/cli.rs b/src/cli.rs index a2d7f90..80e65b9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -12,6 +12,24 @@ pub fn cli() -> Command { .arg(arg!(--mask ).short('m').help("Postgres log timestamp mask (e.g. \"2025-05-21 12:57\" - will show all events at 12:57)")) .arg(arg!(--begin ).short('b')) .arg(arg!(--end ).short('e')) + .arg( + arg!(--context ) + .short('C') + .help("Show NUM records before and after each match") + .value_parser(value_parser!(usize)), + ) + .arg( + arg!(--"before-context" ) + .short('B') + .help("Show NUM records before each match") + .value_parser(value_parser!(usize)), + ) + .arg( + arg!(--"after-context" ) + .short('A') + .help("Show NUM records after each match") + .value_parser(value_parser!(usize)), + ) .subcommand_required(true) .subcommand( Command::new("errors") @@ -94,6 +112,12 @@ pub fn cli() -> Command { .args_conflicts_with_subcommands(true) .args(filelist_args()) ) + .subcommand( + Command::new("grep") + .about("Show log lines containing the given search term") + .arg(arg!().help("Search term to look for in log lines")) + .args(filelist_args()) + ) } fn level_args() -> Vec { diff --git a/src/convert_args.rs b/src/convert_args.rs index 92f9fc0..d311aea 100644 --- a/src/convert_args.rs +++ b/src/convert_args.rs @@ -29,6 +29,8 @@ pub struct ConvertedArgs { pub mask: Option, pub verbose: bool, pub print_details: bool, + pub before_context: usize, + pub after_context: usize, } impl ConvertedArgs { @@ -50,6 +52,16 @@ impl ConvertedArgs { .get_one::("mask") .map(std::borrow::ToOwned::to_owned); + let context = val.get_one::("context").copied().unwrap_or(0); + let before_context = val + .get_one::("before-context") + .copied() + .unwrap_or(context); + let after_context = val + .get_one::("after-context") + .copied() + .unwrap_or(context); + // Initialize logger based on verbose flag let mut verbose = false; env_logger::Builder::from_default_env() @@ -71,6 +83,8 @@ impl ConvertedArgs { matches: val, verbose, print_details: true, + before_context, + after_context, }) } diff --git a/src/filters/filter_contains_ci.rs b/src/filters/filter_contains_ci.rs new file mode 100644 index 0000000..8374859 --- /dev/null +++ b/src/filters/filter_contains_ci.rs @@ -0,0 +1,21 @@ +use crate::{filters::Filter, format::Format}; + +#[derive(Clone)] +pub struct FilterContainsCi { + substring_lower: String, +} + +impl FilterContainsCi { + pub fn new(substring: String) -> Self { + FilterContainsCi { + substring_lower: substring.to_lowercase(), + } + } +} + +impl Filter for FilterContainsCi { + fn matches(&self, record: &[u8], _fmt: &Format) -> bool { + let record_lower = record.to_ascii_lowercase(); + memchr::memmem::find(&record_lower, self.substring_lower.as_bytes()).is_some() + } +} diff --git a/src/filters/mod.rs b/src/filters/mod.rs index 7e04751..2e14e35 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -1,9 +1,11 @@ mod filter_contains; +mod filter_contains_ci; mod filter_slow; mod locking_filter; mod system_filter; pub use filter_contains::FilterContains; +pub use filter_contains_ci::FilterContainsCi; pub use filter_slow::FilterSlow; pub use locking_filter::LockingFilter; pub use system_filter::SystemFilter; diff --git a/src/main.rs b/src/main.rs index 4d54ad1..1cbb9f8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -144,6 +144,15 @@ fn main() -> Result<()> { converted_args.print_details = false; output_results(converted_args, Severity::Log, &mut aggregators, &filters)?; } + Some(("grep", sub_matches)) => { + let term = sub_matches + .get_one::("TERM") + .expect("TERM is required"); + filters.push(Box::new(crate::filters::FilterContainsCi::new( + term.clone(), + ))); + output_results(converted_args, Severity::Log, &mut aggregators, &filters)?; + } Some(("peaks" | "stats", _)) => { error!("Not implemented"); } diff --git a/src/output_results/mod.rs b/src/output_results/mod.rs index 94d96a4..dd72fe1 100644 --- a/src/output_results/mod.rs +++ b/src/output_results/mod.rs @@ -81,6 +81,45 @@ pub fn output_results( debug!("File did read in: {:?}", timing.elapsed()); + if converted_args.before_context > 0 || converted_args.after_context > 0 { + let before_n = converted_args.before_context; + let after_n = converted_args.after_context; + let records = collect_records(bytes); + let mut before_buf: std::collections::VecDeque<&[u8]> = + std::collections::VecDeque::new(); + let mut after_remaining: usize = 0; + + for record in records { + if let Some((severity, log_time)) = record_passes(record, &filter_container) { + for prev in &before_buf { + print!("{}", String::from_utf8_lossy(prev)); + } + before_buf.clear(); + print!("{}", String::from_utf8_lossy(record)); + aggragate_record( + aggregators, + record, + &filter_container.format, + severity, + log_time, + )?; + after_remaining = after_n; + } else if after_remaining > 0 { + print!("{}", String::from_utf8_lossy(record)); + after_remaining -= 1; + } else { + before_buf.push_back(record); + while before_buf.len() > before_n { + before_buf.pop_front(); + } + } + } + for agg in &mut *aggregators { + agg.print(); + } + continue; + } + let partials: Result>>> = ranges .par_iter() .map(|range| -> Result>> { @@ -218,6 +257,56 @@ fn aggragate_record( Ok(()) } +fn collect_records(bytes: &[u8]) -> Vec<&[u8]> { + let mut records = Vec::new(); + let mut record_start = 0usize; + let mut offset = 0usize; + + for line in bytes.split(|&b| b == b'\n') { + let line_len = line.len() + 1; + if is_record_start(line) && offset != 0 { + records.push(&bytes[record_start..offset]); + record_start = offset; + } + offset += line_len; + } + if record_start < bytes.len() { + records.push(&bytes[record_start..]); + } + records +} + +fn record_passes<'a>( + record: &[u8], + filter_container: &FilterContainer<'a>, +) -> Option<(Severity, DateTime)> { + for filter in &filter_container.filters { + if !filter.matches(record, &filter_container.format) { + return None; + } + } + let text = unsafe { std::str::from_utf8_unchecked(record) }; + let severity = filter_container.format.severity_from_string(text); + if i32::from(severity) < filter_container.min_severity { + return None; + } + let mut parts = text.split_whitespace(); + let ts_str = format!("{} {} {}", parts.next()?, parts.next()?, parts.next()?); + let log_time = parse_timestamp_from_string(ts_str.as_str()).ok()?; + if filter_container.begin.is_some_and(|b| log_time < b) { + return None; + } + if filter_container.end.is_some_and(|e| log_time > e) { + return None; + } + for custom_filter in filter_container.custom_filters { + if !custom_filter.matches(record, &filter_container.format) { + return None; + } + } + Some((severity, log_time)) +} + #[inline] fn is_record_start(record: &[u8]) -> bool { record.len() >= 23 diff --git a/tests/files/debian_default.log b/tests/files/debian_default.log new file mode 100644 index 0000000..f64bfc0 --- /dev/null +++ b/tests/files/debian_default.log @@ -0,0 +1,36 @@ +2025-05-02 18:18:26.523 EEST [2240722] LOG: starting PostgreSQL 16.9 (Ubuntu 16.9-1.pgdg24.04+1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0, 64-bit +2025-05-02 18:18:26.523 EEST [2240722] LOG: listening on IPv4 address "0.0.0.0", port 5432 +2025-05-02 18:18:26.523 EEST [2240722] LOG: listening on IPv6 address "::", port 5432 +2025-05-02 18:18:26.525 EEST [2240722] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" +2025-05-02 18:18:26.533 EEST [2240726] LOG: database system was shut down at 2025-05-01 18:18:26 EEST +2025-05-02 18:18:26.544 EEST [2240722] LOG: database system is ready to accept connections +2025-05-02 18:18:26.545 EEST [2240733] LOG: TimescaleDB background worker launcher connected to shared catalogs +2025-05-02 18:18:26.555 EEST [2698052] krl@postgres ERROR: column "xxxx" does not exist at character 8 +2025-05-02 18:18:26.555 EEST [2698052] krl@postgres STATEMENT: select xxxx ; +2025-05-02 18:25:03.959 EEST [2702612] krl@postgres LOG: statement: vacuum pgbench_branches +2025-05-02 18:25:03.960 EEST [2702612] krl@postgres LOG: statement: vacuum pgbench_tellers +2025-05-02 18:25:03.961 EEST [2702612] krl@postgres LOG: statement: truncate pgbench_history +2025-05-02 18:25:03.976 EEST [2702613] krl@postgres LOG: statement: BEGIN; +2025-05-02 18:25:03.976 EEST [2702613] krl@postgres LOG: statement: UPDATE pgbench_accounts SET abalance = abalance + -3458 WHERE aid = 38089884; +2025-05-02 18:25:05.255 EEST [2702613] krl@postgres LOG: duration: 1278.620 ms +2025-05-02 18:25:05.255 EEST [2702613] krl@postgres LOG: statement: SELECT abalance FROM pgbench_accounts WHERE aid = 38089884; +2025-05-02 18:25:05.616 EEST [2702613] krl@postgres LOG: duration: 361.550 ms +2025-05-02 18:25:05.616 EEST [2702613] krl@postgres LOG: statement: UPDATE pgbench_tellers SET tbalance = tbalance + -3458 WHERE tid = 1002; +2025-05-02 18:25:05.617 EEST [2702613] krl@postgres LOG: statement: UPDATE pgbench_branches SET bbalance = bbalance + -3458 WHERE bid = 449; +2025-05-02 18:25:05.617 EEST [2702613] krl@postgres LOG: statement: INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (1002, 449, 38089884, -3458, CURRENT_TIMESTAMP); +2025-05-02 18:25:05.617 EEST [2702613] krl@postgres LOG: statement: END; +2025-05-02 18:25:05.617 EEST [2702613] krl@postgres LOG: statement: BEGIN; +2025-05-02 18:25:05.617 EEST [2702613] krl@postgres LOG: statement: UPDATE pgbench_accounts SET abalance = abalance + -500 WHERE aid = 7792362; +2025-05-02 18:25:06.841 EEST [2702613] krl@postgres LOG: duration: 1223.986 ms +2025-05-02 18:25:06.841 EEST [2702613] krl@postgres LOG: statement: SELECT abalance FROM pgbench_accounts WHERE aid = 7792362; +2025-05-02 18:25:07.205 EEST [2702613] krl@postgres LOG: duration: 363.545 ms +2025-05-02 18:25:07.205 EEST [2702613] krl@postgres LOG: statement: UPDATE pgbench_tellers SET tbalance = tbalance + -500 WHERE tid = 3852; +2025-05-02 18:25:07.206 EEST [2702613] krl@postgres LOG: statement: UPDATE pgbench_branches SET bbalance = bbalance + -500 WHERE bid = 383; +2025-05-02 18:25:07.206 EEST [2702613] krl@postgres LOG: statement: INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (3852, 383, 7792362, -500, CURRENT_TIMESTAMP); +2025-05-02 18:25:07.206 EEST [2702613] krl@postgres LOG: statement: END; +2025-05-02 18:25:51.151 EEST [2698052] krl@postgres ERROR: syntax error at or near "adsdas" at character 23 +2025-05-02 18:25:51.151 EEST [2698052] krl@postgres STATEMENT: select dadasdas + dasda + adsdas; +2025-05-02 18:26:27.648 EEST [2380175] FATAL: terminating background worker "TimescaleDB Background Worker Scheduler" due to administrator command +2025-05-02 18:26:27.649 EEST [2308351] LOG: background worker "TimescaleDB Background Worker Scheduler" (PID 2380175) exited with exit code 1 diff --git a/tests/grep.rs b/tests/grep.rs new file mode 100644 index 0000000..470332f --- /dev/null +++ b/tests/grep.rs @@ -0,0 +1,63 @@ +use assert_cmd::cargo; +use assert_cmd::prelude::*; +use std::process::Command; + +#[test] +fn grep_finds_matching_lines() -> Result<(), Box> { + let mut cmd = Command::new(cargo::cargo_bin!("pgweasel")); + + cmd.args(["grep", "dadasd", "./tests/files/debian_default.log"]) + .assert() + .success() + .stdout(predicates::str::contains("dasda")); + + Ok(()) +} + +#[test] +fn grep_is_case_insensitive() -> Result<(), Box> { + let mut cmd = Command::new(cargo::cargo_bin!("pgweasel")); + + cmd.args(["grep", "DADASD", "./tests/files/debian_default.log"]) + .assert() + .success() + .stdout(predicates::str::contains("dasda")); + + Ok(()) +} + +#[test] +fn grep_after_context() -> Result<(), Box> { + let mut cmd = Command::new(cargo::cargo_bin!("pgweasel")); + + cmd.args([ + "-A", + "1", + "grep", + "dadasd", + "./tests/files/debian_default.log", + ]) + .assert() + .success() + .stdout(predicates::str::contains("terminating background worker")); + + Ok(()) +} + +#[test] +fn grep_before_context() -> Result<(), Box> { + let mut cmd = Command::new(cargo::cargo_bin!("pgweasel")); + + cmd.args([ + "-B", + "1", + "grep", + "dadasd", + "./tests/files/debian_default.log", + ]) + .assert() + .success() + .stdout(predicates::str::contains("syntax error")); + + Ok(()) +}