Skip to content

Commit d7a6813

Browse files
aksOpsclaude
andcommitted
fix: parallelize LanguageEnricher with per-file timeout and minified skip
LanguageEnricher.enrich() was single-threaded, reading every file from disk and running extractors sequentially — causing the enrich command to hang or take very long on large codebases (44K+ files). Fix: - Parallelize with virtual threads (same pattern as Analyzer) - Add 30-second per-file timeout with cancel(true) - Add bounded executor shutdown (10s + shutdownNow + 5s) - Add isLikelyMinified() check to skip JS/TS files >50KB with avg line >1000 chars (catches webpack output without .min suffix) - Add Thread.interrupted() checks for cooperative cancellation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent f855c8a commit d7a6813

1 file changed

Lines changed: 104 additions & 22 deletions

File tree

src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageEnricher.java

Lines changed: 104 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
import java.util.List;
1717
import java.util.Map;
1818
import java.util.TreeMap;
19+
import java.util.concurrent.Executors;
20+
import java.util.concurrent.Future;
21+
import java.util.concurrent.TimeUnit;
1922

2023
/**
2124
* Runs all {@link LanguageExtractor} beans after {@link io.github.randomcodespace.iq.intelligence.lexical.LexicalEnricher}
@@ -80,44 +83,106 @@ public void enrich(List<CodeNode> nodes, List<CodeEdge> edges, Path rootPath) {
8083
}
8184
}
8285

83-
int edgesAdded = 0;
84-
int typeHintsAdded = 0;
85-
86+
// Collect files that have a matching extractor
87+
record FileTask(String filePath, List<CodeNode> fileNodes, LanguageExtractor extractor, String language) {}
88+
List<FileTask> tasks = new ArrayList<>();
8689
for (Map.Entry<String, List<CodeNode>> entry : nodesByFile.entrySet()) {
8790
String filePath = entry.getKey();
88-
List<CodeNode> fileNodes = entry.getValue();
89-
9091
String language = detectLanguage(filePath);
9192
if (language == null) continue;
92-
9393
String resolvedLanguage = LANGUAGE_ALIASES.getOrDefault(language, language);
9494
LanguageExtractor extractor = extractorByLanguage.get(resolvedLanguage);
9595
if (extractor == null) continue;
96+
tasks.add(new FileTask(filePath, entry.getValue(), extractor, language));
97+
}
9698

97-
String content = readFile(rootPath, filePath);
98-
if (content == null) continue;
99+
if (tasks.isEmpty()) {
100+
log.info("Language enrichment: no files matched any extractor");
101+
return;
102+
}
99103

100-
DetectorContext ctx = new DetectorContext(filePath, language, content, nodeRegistry, null);
104+
// Process files in parallel with per-file timeout
105+
var newEdges = java.util.Collections.synchronizedList(new ArrayList<CodeEdge>());
106+
var edgesAdded = new java.util.concurrent.atomic.AtomicInteger(0);
107+
var typeHintsAdded = new java.util.concurrent.atomic.AtomicInteger(0);
108+
109+
var executor = Executors.newVirtualThreadPerTaskExecutor();
110+
try {
111+
List<Future<?>> futures = new ArrayList<>(tasks.size());
112+
for (FileTask task : tasks) {
113+
futures.add(executor.submit(() -> {
114+
if (Thread.interrupted()) {
115+
Thread.currentThread().interrupt();
116+
return null;
117+
}
118+
String content = readFile(rootPath, task.filePath());
119+
if (content == null) return null;
120+
121+
// Skip minified files — they hang parsers and contain no useful structure
122+
if (isLikelyMinified(task.filePath(), content)) {
123+
log.debug("Skipping minified file for enrichment: {}", task.filePath());
124+
return null;
125+
}
101126

102-
for (CodeNode node : fileNodes) {
127+
DetectorContext ctx = new DetectorContext(
128+
task.filePath(), task.language(), content, nodeRegistry, null);
129+
130+
for (CodeNode node : task.fileNodes()) {
131+
if (Thread.interrupted()) {
132+
Thread.currentThread().interrupt();
133+
break;
134+
}
135+
try {
136+
LanguageExtractionResult result = task.extractor().extract(ctx, node);
137+
newEdges.addAll(result.callEdges());
138+
newEdges.addAll(result.symbolReferences());
139+
edgesAdded.addAndGet(result.callEdges().size() + result.symbolReferences().size());
140+
for (Map.Entry<String, String> hint : result.typeHints().entrySet()) {
141+
node.getProperties().put(hint.getKey(), hint.getValue());
142+
typeHintsAdded.incrementAndGet();
143+
}
144+
} catch (Exception e) {
145+
log.warn("LanguageExtractor {} failed on node {} in {}: {}",
146+
task.extractor().getClass().getSimpleName(),
147+
node.getId(), task.filePath(), e.getMessage());
148+
}
149+
}
150+
return null;
151+
}));
152+
}
153+
154+
// Collect with per-file timeout
155+
for (int i = 0; i < futures.size(); i++) {
103156
try {
104-
LanguageExtractionResult result = extractor.extract(ctx, node);
105-
edges.addAll(result.callEdges());
106-
edges.addAll(result.symbolReferences());
107-
edgesAdded += result.callEdges().size() + result.symbolReferences().size();
108-
for (Map.Entry<String, String> hint : result.typeHints().entrySet()) {
109-
node.getProperties().put(hint.getKey(), hint.getValue());
110-
typeHintsAdded++;
157+
futures.get(i).get(30, TimeUnit.SECONDS);
158+
} catch (java.util.concurrent.TimeoutException e) {
159+
futures.get(i).cancel(true);
160+
log.warn("Language enrichment timed out for {} (30s), skipping", tasks.get(i).filePath());
161+
} catch (java.util.concurrent.ExecutionException e) {
162+
log.warn("Language enrichment failed for {}: {}", tasks.get(i).filePath(), e.getMessage());
163+
} catch (InterruptedException e) {
164+
Thread.currentThread().interrupt();
165+
break;
166+
}
167+
}
168+
} finally {
169+
executor.shutdown();
170+
try {
171+
if (!executor.awaitTermination(10, TimeUnit.SECONDS)) {
172+
executor.shutdownNow();
173+
if (!executor.awaitTermination(5, TimeUnit.SECONDS)) {
174+
log.warn("Language enrichment executor did not terminate cleanly");
111175
}
112-
} catch (Exception e) {
113-
log.warn("LanguageExtractor {} failed on node {} in {}: {}",
114-
extractor.getClass().getSimpleName(), node.getId(), filePath, e.getMessage());
115176
}
177+
} catch (InterruptedException e) {
178+
executor.shutdownNow();
179+
Thread.currentThread().interrupt();
116180
}
117181
}
118182

119-
log.info("Language enrichment: {} edges added, {} type hints added across {} extractors",
120-
edgesAdded, typeHintsAdded, extractorByLanguage.size());
183+
edges.addAll(newEdges);
184+
log.info("Language enrichment: {} edges added, {} type hints added across {} extractors ({} files)",
185+
edgesAdded.get(), typeHintsAdded.get(), extractorByLanguage.size(), tasks.size());
121186
}
122187

123188
private Map<String, CodeNode> buildRegistry(List<CodeNode> nodes) {
@@ -144,6 +209,23 @@ private String readFile(Path rootPath, String filePath) {
144209
}
145210
}
146211

212+
/**
213+
* Check if a file is likely minified (long lines, large size) to skip enrichment.
214+
*/
215+
private static boolean isLikelyMinified(String filePath, String content) {
216+
if (content.length() < 50_000) return false;
217+
String name = filePath.contains("/") ? filePath.substring(filePath.lastIndexOf('/') + 1) : filePath;
218+
boolean jsOrCss = name.endsWith(".js") || name.endsWith(".mjs") || name.endsWith(".cjs")
219+
|| name.endsWith(".css") || name.endsWith(".jsx") || name.endsWith(".ts");
220+
if (!jsOrCss && !name.endsWith(".min.js") && !name.endsWith(".bundle.js")) return false;
221+
int newlines = 0;
222+
for (int i = 0; i < content.length(); i++) {
223+
if (content.charAt(i) == '\n') newlines++;
224+
}
225+
if (newlines == 0) newlines = 1;
226+
return content.length() / newlines > 1000;
227+
}
228+
147229
/**
148230
* Map file extension to language string (mirrors FileDiscovery conventions).
149231
*/

0 commit comments

Comments
 (0)