From 0c13d26aecc2aa1d26d034cdd1f37c3fdb32cd24 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 29 Apr 2026 13:30:58 +0000 Subject: [PATCH] perf(detectors): quick-reject pre-screen on auth detectors (-31% detector CPU) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Profiling on a 30K-file polyglot fixture (kept at ~/projects/polyglot-bench: spring-petclinic-microservices, airflow, istio, eShop, angular/components, nuxt, actix/examples, ktor-samples, nlohmann/json, play-samples, PSScriptAnalyzer, terraform-aws-eks; 14 distinct languages) showed the three cross-cutting auth detectors burning 55% of all detector CPU because they ran the lines × patterns double loop on every supported-language file — even files with zero auth keywords. Fix: per-detector PRE_SCREEN Pattern with all distinctive literal substrings of the underlying patterns. One regex pass over file content; if no keyword present, the file cannot match — short-circuit before the line loop. Measured impact (JFR ExecutionSample, JDK 25, polyglot fixture): CertificateAuthDetector: 244 → 147 samples (-39.8%, -0.97s CPU) SessionHeaderAuthDetector: 206 → 43 samples (-79.1%, -1.63s CPU) LdapAuthDetector: 47 → 25 samples (-46.8%, -0.22s CPU) Auth subtotal: 497 → 215 samples (-56.7%, -2.82s) All detectors total: 902 → 624 samples (-30.8%, -2.78s) Detection semantics unchanged — pre-screen rejects only files where no underlying pattern can match (keyword absent). Tests covering keyword-bearing fixtures pass through pre-screen and run the existing logic byte-for-byte. Tests: 3689 / 0 failures / 0 errors / 32 skipped. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../detector/auth/CertificateAuthDetector.java | 17 +++++++++++++++++ .../iq/detector/auth/LdapAuthDetector.java | 9 +++++++++ .../auth/SessionHeaderAuthDetector.java | 14 ++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java index 5dd1616f..9195d94d 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java @@ -78,6 +78,20 @@ private record PatternDef(Pattern regex, String authType) {} private static final Pattern CERT_PATH_RE = Pattern.compile("['\"]([^'\"]*\\.(?:pem|crt|key|cert|pfx|p12))['\"]"); private static final Pattern TENANT_ID_RE = Pattern.compile("AZURE_TENANT_ID\\s*[=:]\\s*['\"]?([a-f0-9-]+)['\"]?"); + // Quick-reject pre-screen: a single regex pass over file content. If no + // distinctive literal substring from any pattern in ALL_PATTERNS is + // present, the file cannot match — short-circuit before the lines × patterns + // double loop. Profiling on polyglot-bench (29.7K files, 14 languages) showed + // this detector accounting for ~27% of detector CPU because it scanned every + // YAML/JSON in supported-languages even when no auth keyword was present. + private static final Pattern PRE_SCREEN = Pattern.compile( + "ssl_verify_client|requestCert|clientAuth|X509|" + + "AddCertificateForwarding|CertificateAuthenticationDefaults|" + + "\\.x509\\(|javax\\.net\\.ssl|SSLContext|tls\\.createServer|" + + "trustStore|AzureAd|AZURE_TENANT_ID|AZURE_CLIENT_ID|" + + "ClientCertificateCredential|AddMicrosoftIdentityWebApi|" + + "msal|MSAL|@azure/msal|\\.pem|\\.crt|\\.cert"); + @Override public String getName() { return "certificate_auth"; @@ -95,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) { if (text == null || text.isEmpty()) { return DetectorResult.empty(); } + if (!PRE_SCREEN.matcher(text).find()) { + return DetectorResult.empty(); + } String filePath = ctx.filePath(); String[] lines = text.split("\n", -1); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java index d46f38ae..2044cd67 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java @@ -59,6 +59,12 @@ public class LdapAuthDetector extends AbstractRegexDetector { "csharp", CSHARP_PATTERNS ); + // Quick-reject pre-screen — see CertificateAuthDetector for rationale. + // Most code files don't mention LDAP at all; one regex pass over content + // skips the lines × patterns double loop in those cases. + private static final Pattern PRE_SCREEN = Pattern.compile( + "(?i:ldap)|DirectoryServices|DirectoryEntry"); + @Override public String getName() { return "ldap_auth"; @@ -80,6 +86,9 @@ public DetectorResult detect(DetectorContext ctx) { if (text == null || text.isEmpty()) { return DetectorResult.empty(); } + if (!PRE_SCREEN.matcher(text).find()) { + return DetectorResult.empty(); + } List nodes = new ArrayList<>(); String[] lines = text.split("\n", -1); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java index 6ffe5718..1bbdbde2 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java @@ -78,6 +78,17 @@ private record PatternDef(Pattern regex, String authType, NodeKind nodeKind) {} PROP_CSRF, PROP_CSRF ); + // Quick-reject pre-screen — see CertificateAuthDetector for rationale. + // Single regex pass over file content; if no distinctive substring of any + // pattern in ALL_PATTERNS is present, the file cannot match — short-circuit + // before the lines × patterns double loop. Profiling on polyglot-bench + // showed this detector at ~23% of detector CPU; most TS/Python files have + // no auth keyword at all. + private static final Pattern PRE_SCREEN = Pattern.compile( + "express-session|cookie-session|@SessionAttributes|SessionMiddleware|" + + "HttpSession|SESSION_ENGINE|" + + "(?i:X-API|Authorization|api[_-]?key|csurf|csrf|getHeader)"); + @Override public String getName() { return "session_header_auth"; @@ -98,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) { if (text == null || text.isEmpty()) { return DetectorResult.empty(); } + if (!PRE_SCREEN.matcher(text).find()) { + return DetectorResult.empty(); + } List nodes = new ArrayList<>(); String[] lines = text.split("\n", -1);