Skip to content

Commit 0c13d26

Browse files
aksOpsclaude
andcommitted
perf(detectors): quick-reject pre-screen on auth detectors (-31% detector CPU)
Profiling on a 30K-file polyglot fixture (kept at ~/projects/polyglot-bench: spring-petclinic-microservices, airflow, istio, eShop, angular/components, nuxt, actix/examples, ktor-samples, nlohmann/json, play-samples, PSScriptAnalyzer, terraform-aws-eks; 14 distinct languages) showed the three cross-cutting auth detectors burning 55% of all detector CPU because they ran the lines × patterns double loop on every supported-language file — even files with zero auth keywords. Fix: per-detector PRE_SCREEN Pattern with all distinctive literal substrings of the underlying patterns. One regex pass over file content; if no keyword present, the file cannot match — short-circuit before the line loop. Measured impact (JFR ExecutionSample, JDK 25, polyglot fixture): CertificateAuthDetector: 244 → 147 samples (-39.8%, -0.97s CPU) SessionHeaderAuthDetector: 206 → 43 samples (-79.1%, -1.63s CPU) LdapAuthDetector: 47 → 25 samples (-46.8%, -0.22s CPU) Auth subtotal: 497 → 215 samples (-56.7%, -2.82s) All detectors total: 902 → 624 samples (-30.8%, -2.78s) Detection semantics unchanged — pre-screen rejects only files where no underlying pattern can match (keyword absent). Tests covering keyword-bearing fixtures pass through pre-screen and run the existing logic byte-for-byte. Tests: 3689 / 0 failures / 0 errors / 32 skipped. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 9f0d3d2 commit 0c13d26

3 files changed

Lines changed: 40 additions & 0 deletions

File tree

src/main/java/io/github/randomcodespace/iq/detector/auth/CertificateAuthDetector.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,20 @@ private record PatternDef(Pattern regex, String authType) {}
7878
private static final Pattern CERT_PATH_RE = Pattern.compile("['\"]([^'\"]*\\.(?:pem|crt|key|cert|pfx|p12))['\"]");
7979
private static final Pattern TENANT_ID_RE = Pattern.compile("AZURE_TENANT_ID\\s*[=:]\\s*['\"]?([a-f0-9-]+)['\"]?");
8080

81+
// Quick-reject pre-screen: a single regex pass over file content. If no
82+
// distinctive literal substring from any pattern in ALL_PATTERNS is
83+
// present, the file cannot match — short-circuit before the lines × patterns
84+
// double loop. Profiling on polyglot-bench (29.7K files, 14 languages) showed
85+
// this detector accounting for ~27% of detector CPU because it scanned every
86+
// YAML/JSON in supported-languages even when no auth keyword was present.
87+
private static final Pattern PRE_SCREEN = Pattern.compile(
88+
"ssl_verify_client|requestCert|clientAuth|X509|"
89+
+ "AddCertificateForwarding|CertificateAuthenticationDefaults|"
90+
+ "\\.x509\\(|javax\\.net\\.ssl|SSLContext|tls\\.createServer|"
91+
+ "trustStore|AzureAd|AZURE_TENANT_ID|AZURE_CLIENT_ID|"
92+
+ "ClientCertificateCredential|AddMicrosoftIdentityWebApi|"
93+
+ "msal|MSAL|@azure/msal|\\.pem|\\.crt|\\.cert");
94+
8195
@Override
8296
public String getName() {
8397
return "certificate_auth";
@@ -95,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) {
95109
if (text == null || text.isEmpty()) {
96110
return DetectorResult.empty();
97111
}
112+
if (!PRE_SCREEN.matcher(text).find()) {
113+
return DetectorResult.empty();
114+
}
98115

99116
String filePath = ctx.filePath();
100117
String[] lines = text.split("\n", -1);

src/main/java/io/github/randomcodespace/iq/detector/auth/LdapAuthDetector.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ public class LdapAuthDetector extends AbstractRegexDetector {
5959
"csharp", CSHARP_PATTERNS
6060
);
6161

62+
// Quick-reject pre-screen — see CertificateAuthDetector for rationale.
63+
// Most code files don't mention LDAP at all; one regex pass over content
64+
// skips the lines × patterns double loop in those cases.
65+
private static final Pattern PRE_SCREEN = Pattern.compile(
66+
"(?i:ldap)|DirectoryServices|DirectoryEntry");
67+
6268
@Override
6369
public String getName() {
6470
return "ldap_auth";
@@ -80,6 +86,9 @@ public DetectorResult detect(DetectorContext ctx) {
8086
if (text == null || text.isEmpty()) {
8187
return DetectorResult.empty();
8288
}
89+
if (!PRE_SCREEN.matcher(text).find()) {
90+
return DetectorResult.empty();
91+
}
8392

8493
List<CodeNode> nodes = new ArrayList<>();
8594
String[] lines = text.split("\n", -1);

src/main/java/io/github/randomcodespace/iq/detector/auth/SessionHeaderAuthDetector.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,17 @@ private record PatternDef(Pattern regex, String authType, NodeKind nodeKind) {}
7878
PROP_CSRF, PROP_CSRF
7979
);
8080

81+
// Quick-reject pre-screen — see CertificateAuthDetector for rationale.
82+
// Single regex pass over file content; if no distinctive substring of any
83+
// pattern in ALL_PATTERNS is present, the file cannot match — short-circuit
84+
// before the lines × patterns double loop. Profiling on polyglot-bench
85+
// showed this detector at ~23% of detector CPU; most TS/Python files have
86+
// no auth keyword at all.
87+
private static final Pattern PRE_SCREEN = Pattern.compile(
88+
"express-session|cookie-session|@SessionAttributes|SessionMiddleware|"
89+
+ "HttpSession|SESSION_ENGINE|"
90+
+ "(?i:X-API|Authorization|api[_-]?key|csurf|csrf|getHeader)");
91+
8192
@Override
8293
public String getName() {
8394
return "session_header_auth";
@@ -98,6 +109,9 @@ public DetectorResult detect(DetectorContext ctx) {
98109
if (text == null || text.isEmpty()) {
99110
return DetectorResult.empty();
100111
}
112+
if (!PRE_SCREEN.matcher(text).find()) {
113+
return DetectorResult.empty();
114+
}
101115

102116
List<CodeNode> nodes = new ArrayList<>();
103117
String[] lines = text.split("\n", -1);

0 commit comments

Comments
 (0)