diff --git a/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java index 5dd1616f..9195d94d 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java @@ -78,6 +78,20 @@ private record PatternDef(Pattern regex, String authType) {} private static final Pattern CERT_PATH_RE = Pattern.compile("['\"]([^'\"]*\\.(?:pem|crt|key|cert|pfx|p12))['\"]"); private static final Pattern TENANT_ID_RE = Pattern.compile("AZURE_TENANT_ID\\s*[=:]\\s*['\"]?([a-f0-9-]+)['\"]?"); + // Quick-reject pre-screen: a single regex pass over file content. If no + // distinctive literal substring from any pattern in ALL_PATTERNS is + // present, the file cannot match — short-circuit before the lines × patterns + // double loop. Profiling on polyglot-bench (29.7K files, 14 languages) showed + // this detector accounting for ~27% of detector CPU because it scanned every + // YAML/JSON in supported-languages even when no auth keyword was present. + private static final Pattern PRE_SCREEN = Pattern.compile( + "ssl_verify_client|requestCert|clientAuth|X509|" + + "AddCertificateForwarding|CertificateAuthenticationDefaults|" + + "\\.x509\\(|javax\\.net\\.ssl|SSLContext|tls\\.createServer|" + + "trustStore|AzureAd|AZURE_TENANT_ID|AZURE_CLIENT_ID|" + + "ClientCertificateCredential|AddMicrosoftIdentityWebApi|" + + "msal|MSAL|@azure/msal|\\.pem|\\.crt|\\.cert"); + @Override public String getName() { return "certificate_auth"; @@ -95,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) { if (text == null || text.isEmpty()) { return DetectorResult.empty(); } + if (!PRE_SCREEN.matcher(text).find()) { + return DetectorResult.empty(); + } String filePath = ctx.filePath(); String[] lines = text.split("\n", -1); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java index d46f38ae..2044cd67 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java @@ -59,6 +59,12 @@ public class LdapAuthDetector extends AbstractRegexDetector { "csharp", CSHARP_PATTERNS ); + // Quick-reject pre-screen — see CertificateAuthDetector for rationale. + // Most code files don't mention LDAP at all; one regex pass over content + // skips the lines × patterns double loop in those cases. + private static final Pattern PRE_SCREEN = Pattern.compile( + "(?i:ldap)|DirectoryServices|DirectoryEntry"); + @Override public String getName() { return "ldap_auth"; @@ -80,6 +86,9 @@ public DetectorResult detect(DetectorContext ctx) { if (text == null || text.isEmpty()) { return DetectorResult.empty(); } + if (!PRE_SCREEN.matcher(text).find()) { + return DetectorResult.empty(); + } List nodes = new ArrayList<>(); String[] lines = text.split("\n", -1); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java index 6ffe5718..1bbdbde2 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java @@ -78,6 +78,17 @@ private record PatternDef(Pattern regex, String authType, NodeKind nodeKind) {} PROP_CSRF, PROP_CSRF ); + // Quick-reject pre-screen — see CertificateAuthDetector for rationale. + // Single regex pass over file content; if no distinctive substring of any + // pattern in ALL_PATTERNS is present, the file cannot match — short-circuit + // before the lines × patterns double loop. Profiling on polyglot-bench + // showed this detector at ~23% of detector CPU; most TS/Python files have + // no auth keyword at all. + private static final Pattern PRE_SCREEN = Pattern.compile( + "express-session|cookie-session|@SessionAttributes|SessionMiddleware|" + + "HttpSession|SESSION_ENGINE|" + + "(?i:X-API|Authorization|api[_-]?key|csurf|csrf|getHeader)"); + @Override public String getName() { return "session_header_auth"; @@ -98,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) { if (text == null || text.isEmpty()) { return DetectorResult.empty(); } + if (!PRE_SCREEN.matcher(text).find()) { + return DetectorResult.empty(); + } List nodes = new ArrayList<>(); String[] lines = text.split("\n", -1);