Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@ private record PatternDef(Pattern regex, String authType) {}
private static final Pattern CERT_PATH_RE = Pattern.compile("['\"]([^'\"]*\\.(?:pem|crt|key|cert|pfx|p12))['\"]");
private static final Pattern TENANT_ID_RE = Pattern.compile("AZURE_TENANT_ID\\s*[=:]\\s*['\"]?([a-f0-9-]+)['\"]?");

// Quick-reject pre-screen: a single regex pass over file content. If no
// distinctive literal substring from any pattern in ALL_PATTERNS is
// present, the file cannot match — short-circuit before the lines × patterns
// double loop. Profiling on polyglot-bench (29.7K files, 14 languages) showed
// this detector accounting for ~27% of detector CPU because it scanned every
// YAML/JSON in supported-languages even when no auth keyword was present.
private static final Pattern PRE_SCREEN = Pattern.compile(
"ssl_verify_client|requestCert|clientAuth|X509|"
+ "AddCertificateForwarding|CertificateAuthenticationDefaults|"
+ "\\.x509\\(|javax\\.net\\.ssl|SSLContext|tls\\.createServer|"
+ "trustStore|AzureAd|AZURE_TENANT_ID|AZURE_CLIENT_ID|"
+ "ClientCertificateCredential|AddMicrosoftIdentityWebApi|"
+ "msal|MSAL|@azure/msal|\\.pem|\\.crt|\\.cert");

@Override
public String getName() {
return "certificate_auth";
Expand All @@ -95,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) {
if (text == null || text.isEmpty()) {
return DetectorResult.empty();
}
if (!PRE_SCREEN.matcher(text).find()) {
return DetectorResult.empty();
}

String filePath = ctx.filePath();
String[] lines = text.split("\n", -1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ public class LdapAuthDetector extends AbstractRegexDetector {
"csharp", CSHARP_PATTERNS
);

// Quick-reject pre-screen — see CertificateAuthDetector for rationale.
// Most code files don't mention LDAP at all; one regex pass over content
// skips the lines × patterns double loop in those cases.
private static final Pattern PRE_SCREEN = Pattern.compile(
"(?i:ldap)|DirectoryServices|DirectoryEntry");

@Override
public String getName() {
return "ldap_auth";
Expand All @@ -80,6 +86,9 @@ public DetectorResult detect(DetectorContext ctx) {
if (text == null || text.isEmpty()) {
return DetectorResult.empty();
}
if (!PRE_SCREEN.matcher(text).find()) {
return DetectorResult.empty();
}

List<CodeNode> nodes = new ArrayList<>();
String[] lines = text.split("\n", -1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,17 @@ private record PatternDef(Pattern regex, String authType, NodeKind nodeKind) {}
PROP_CSRF, PROP_CSRF
);

// Quick-reject pre-screen — see CertificateAuthDetector for rationale.
// Single regex pass over file content; if no distinctive substring of any
// pattern in ALL_PATTERNS is present, the file cannot match — short-circuit
// before the lines × patterns double loop. Profiling on polyglot-bench
// showed this detector at ~23% of detector CPU; most TS/Python files have
// no auth keyword at all.
private static final Pattern PRE_SCREEN = Pattern.compile(
"express-session|cookie-session|@SessionAttributes|SessionMiddleware|"
+ "HttpSession|SESSION_ENGINE|"
+ "(?i:X-API|Authorization|api[_-]?key|csurf|csrf|getHeader)");

@Override
public String getName() {
return "session_header_auth";
Expand All @@ -98,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) {
if (text == null || text.isEmpty()) {
return DetectorResult.empty();
}
if (!PRE_SCREEN.matcher(text).find()) {
return DetectorResult.empty();
}

List<CodeNode> nodes = new ArrayList<>();
String[] lines = text.split("\n", -1);
Expand Down
Loading