Skip to content

Commit f80a7dd

Browse files
aksOpsclaude
andcommitted
fix: resolve ANTLR performance regression — skip parsing for regex-only detectors, add parse cache
Root cause: After ANTLR migration, detectors were paying ANTLR parse cost even when their detectWithAst() just delegated to detectWithRegex(). The JavaScript ANTLR grammar was especially slow (20s for a single 129KB file). Changes: - Override detect() in 25+ detectors to skip ANTLR and call regex directly - ExpressRouteDetector: skip ANTLR entirely (regex produces identical results) - AntlrParserFactory: add thread-local parse cache so multiple detectors on the same file share a single ANTLR parse (10x reduction for Python) - Analyzer: clear parse cache after each file, add debug-level perf logging Benchmark results (before -> after): - contoso-real-estate: 63s -> 922ms (68x faster) - spring-boot: 40s -> 25s (38% faster) - kafka: 87s -> 53s (39% faster) Node count regression also fixed: contoso back to 4,034 (was 4,018). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5427eeb commit f80a7dd

39 files changed

Lines changed: 180 additions & 211 deletions

src/main/java/io/github/randomcodespace/iq/analyzer/Analyzer.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import io.github.randomcodespace.iq.detector.DetectorRegistry;
88
import io.github.randomcodespace.iq.detector.DetectorResult;
99
import io.github.randomcodespace.iq.detector.DetectorUtils;
10+
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
1011
import io.github.randomcodespace.iq.model.CodeNode;
1112
import org.slf4j.Logger;
1213
import org.slf4j.LoggerFactory;
@@ -193,6 +194,7 @@ public AnalysisResult run(Path repoPath, Consumer<String> onProgress) {
193194
* Analyze a single file: read content, parse if structured, run matching detectors.
194195
*/
195196
DetectorResult analyzeFile(DiscoveredFile file, Path repoPath) {
197+
Instant fileStart = Instant.now();
196198
Path absPath = repoPath.resolve(file.path());
197199

198200
// Read file content
@@ -234,7 +236,13 @@ DetectorResult analyzeFile(DiscoveredFile file, Path repoPath) {
234236

235237
for (Detector detector : detectors) {
236238
try {
239+
Instant detStart = Instant.now();
237240
DetectorResult result = detector.detect(ctx);
241+
long detMs = Duration.between(detStart, Instant.now()).toMillis();
242+
if (detMs > 100) {
243+
log.debug("Slow detector {} on {} ({} bytes): {}ms",
244+
detector.getName(), file.path(), content.length(), detMs);
245+
}
238246
allNodes.addAll(result.nodes());
239247
allEdges.addAll(result.edges());
240248
} catch (Exception e) {
@@ -243,6 +251,14 @@ DetectorResult analyzeFile(DiscoveredFile file, Path repoPath) {
243251
}
244252
}
245253

254+
// Clear ANTLR parse cache after all detectors have run for this file
255+
AntlrParserFactory.clearCache();
256+
257+
long fileMs = Duration.between(fileStart, Instant.now()).toMillis();
258+
if (fileMs > 500) {
259+
log.debug("Slow file {} ({}): {}ms", file.path(), file.language(), fileMs);
260+
}
261+
246262
// Set module on all nodes that don't have one yet
247263
if (moduleName != null) {
248264
for (CodeNode node : allNodes) {

src/main/java/io/github/randomcodespace/iq/detector/AbstractAntlrDetector.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,19 @@ public DetectorResult detect(DetectorContext ctx) {
3939
/**
4040
* Parse the source content into an ANTLR parse tree.
4141
* Return null if the language is not supported or content is empty.
42+
* Default returns null (no parse tree); override for AST-based detection.
4243
*/
43-
protected abstract ParseTree parse(DetectorContext ctx);
44+
protected ParseTree parse(DetectorContext ctx) {
45+
return null;
46+
}
4447

4548
/**
4649
* Detect code patterns by walking the ANTLR parse tree.
50+
* Default delegates to regex fallback; override for AST-based detection.
4751
*/
48-
protected abstract DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx);
52+
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
53+
return detectWithRegex(ctx);
54+
}
4955

5056
/**
5157
* Fallback detection using regex when AST parsing fails.

src/main/java/io/github/randomcodespace/iq/detector/cpp/CppStructuresDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeEdge;
@@ -38,13 +37,9 @@ private static boolean isForwardDeclaration(String line) {
3837
return stripped.endsWith(";") && !stripped.contains("{");
3938
}
4039
@Override
41-
protected ParseTree parse(DetectorContext ctx) {
42-
if (!"cpp".equals(ctx.language()) && !"c".equals(ctx.language())) return null;
43-
return AntlrParserFactory.parse("cpp", ctx.content());
44-
}
45-
46-
@Override
47-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
40+
public DetectorResult detect(DetectorContext ctx) {
41+
// Skip ANTLR parsing — regex is the primary detection method for this detector
42+
// ANTLR infrastructure is in place for future enhancement
4843
return detectWithRegex(ctx);
4944
}
5045

src/main/java/io/github/randomcodespace/iq/detector/csharp/CSharpEfcoreDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeEdge;
@@ -31,13 +30,9 @@ public class CSharpEfcoreDetector extends AbstractAntlrDetector {
3130
@Override
3231
public Set<String> getSupportedLanguages() { return Set.of("csharp"); }
3332
@Override
34-
protected ParseTree parse(DetectorContext ctx) {
35-
if (!"csharp".equals(ctx.language())) return null;
36-
return AntlrParserFactory.parse("csharp", ctx.content());
37-
}
38-
39-
@Override
40-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
33+
public DetectorResult detect(DetectorContext ctx) {
34+
// Skip ANTLR parsing — regex is the primary detection method for this detector
35+
// ANTLR infrastructure is in place for future enhancement
4136
return detectWithRegex(ctx);
4237
}
4338

src/main/java/io/github/randomcodespace/iq/detector/csharp/CSharpMinimalApisDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeEdge;
@@ -31,13 +30,9 @@ public class CSharpMinimalApisDetector extends AbstractAntlrDetector {
3130
@Override
3231
public Set<String> getSupportedLanguages() { return Set.of("csharp"); }
3332
@Override
34-
protected ParseTree parse(DetectorContext ctx) {
35-
if (!"csharp".equals(ctx.language())) return null;
36-
return AntlrParserFactory.parse("csharp", ctx.content());
37-
}
38-
39-
@Override
40-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
33+
public DetectorResult detect(DetectorContext ctx) {
34+
// Skip ANTLR parsing — regex is the primary detection method for this detector
35+
// ANTLR infrastructure is in place for future enhancement
4136
return detectWithRegex(ctx);
4237
}
4338

src/main/java/io/github/randomcodespace/iq/detector/csharp/CSharpStructuresDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeEdge;
@@ -34,13 +33,9 @@ public class CSharpStructuresDetector extends AbstractAntlrDetector {
3433
@Override
3534
public Set<String> getSupportedLanguages() { return Set.of("csharp"); }
3635
@Override
37-
protected ParseTree parse(DetectorContext ctx) {
38-
if (!"csharp".equals(ctx.language())) return null;
39-
return AntlrParserFactory.parse("csharp", ctx.content());
40-
}
41-
42-
@Override
43-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
36+
public DetectorResult detect(DetectorContext ctx) {
37+
// Skip ANTLR parsing — regex is the primary detection method for this detector
38+
// ANTLR infrastructure is in place for future enhancement
4439
return detectWithRegex(ctx);
4540
}
4641

src/main/java/io/github/randomcodespace/iq/detector/go/GoOrmDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeEdge;
@@ -48,13 +47,9 @@ private static String detectOrm(String text) {
4847
return null;
4948
}
5049
@Override
51-
protected ParseTree parse(DetectorContext ctx) {
52-
if (!"go".equals(ctx.language())) return null;
53-
return AntlrParserFactory.parse("go", ctx.content());
54-
}
55-
56-
@Override
57-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
50+
public DetectorResult detect(DetectorContext ctx) {
51+
// Skip ANTLR parsing — regex is the primary detection method for this detector
52+
// ANTLR infrastructure is in place for future enhancement
5853
return detectWithRegex(ctx);
5954
}
6055

src/main/java/io/github/randomcodespace/iq/detector/go/GoStructuresDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeEdge;
@@ -37,13 +36,9 @@ public Set<String> getSupportedLanguages() {
3736
return Set.of("go");
3837
}
3938
@Override
40-
protected ParseTree parse(DetectorContext ctx) {
41-
if (!"go".equals(ctx.language())) return null;
42-
return AntlrParserFactory.parse("go", ctx.content());
43-
}
44-
45-
@Override
46-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
39+
public DetectorResult detect(DetectorContext ctx) {
40+
// Skip ANTLR parsing — regex is the primary detection method for this detector
41+
// ANTLR infrastructure is in place for future enhancement
4742
return detectWithRegex(ctx);
4843
}
4944

src/main/java/io/github/randomcodespace/iq/detector/go/GoWebDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeNode;
@@ -45,13 +44,9 @@ private static String detectFramework(String text) {
4544
return "net_http";
4645
}
4746
@Override
48-
protected ParseTree parse(DetectorContext ctx) {
49-
if (!"go".equals(ctx.language())) return null;
50-
return AntlrParserFactory.parse("go", ctx.content());
51-
}
52-
53-
@Override
54-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
47+
public DetectorResult detect(DetectorContext ctx) {
48+
// Skip ANTLR parsing — regex is the primary detection method for this detector
49+
// ANTLR infrastructure is in place for future enhancement
5550
return detectWithRegex(ctx);
5651
}
5752

src/main/java/io/github/randomcodespace/iq/detector/kotlin/KotlinStructuresDetector.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
5-
import org.antlr.v4.runtime.tree.ParseTree;
65
import io.github.randomcodespace.iq.detector.DetectorContext;
76
import io.github.randomcodespace.iq.detector.DetectorResult;
87
import io.github.randomcodespace.iq.model.CodeEdge;
@@ -32,13 +31,9 @@ public class KotlinStructuresDetector extends AbstractAntlrDetector {
3231
@Override
3332
public Set<String> getSupportedLanguages() { return Set.of("kotlin"); }
3433
@Override
35-
protected ParseTree parse(DetectorContext ctx) {
36-
if (!"kotlin".equals(ctx.language())) return null;
37-
return AntlrParserFactory.parse("kotlin", ctx.content());
38-
}
39-
40-
@Override
41-
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
34+
public DetectorResult detect(DetectorContext ctx) {
35+
// Skip ANTLR parsing — regex is the primary detection method for this detector
36+
// ANTLR infrastructure is in place for future enhancement
4237
return detectWithRegex(ctx);
4338
}
4439

0 commit comments

Comments
 (0)