Skip to content

Commit 6886769

Browse files
aksOpsclaude
andcommitted
fix: skip ANTLR for TS/JS grammars and harden H2 concurrency
The TypeScript ANTLR grammar has exponential ATN prediction on certain inputs regardless of file size (e.g. .d.ts, .mjs, .cjs files taking 10-40+ seconds even at <1KB). Since all TS/JS detectors already have comprehensive regex fallback paths, ANTLR is now skipped entirely for TypeScript and JavaScript in AntlrParserFactory.parse(). The H2 AnalysisCache synchronized methods are replaced with a ReentrantReadWriteLock to prevent ClosedChannelException from concurrent virtual thread writes to H2's MVStore file channel. Read operations use readLock, write operations use writeLock. The H2 connection URL also adds DB_CLOSE_ON_EXIT=FALSE and WRITE_DELAY=0 to prevent background writes that can corrupt the file channel. Benchmark results on 308K files: zero SLOW ANTLR warnings for TS/JS, zero ClosedChannelException/MVStoreException, 2-6ms/file average. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 442591d commit 6886769

3 files changed

Lines changed: 144 additions & 68 deletions

File tree

src/main/java/io/github/randomcodespace/iq/cache/AnalysisCache.java

Lines changed: 113 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import java.util.List;
2525
import java.util.Map;
2626
import java.util.UUID;
27+
import java.util.concurrent.locks.ReadWriteLock;
28+
import java.util.concurrent.locks.ReentrantReadWriteLock;
2729

2830
/**
2931
* H2-backed cache for incremental analysis results.
@@ -85,6 +87,15 @@ CREATE TABLE IF NOT EXISTS analysis_runs (
8587

8688
private static final ObjectMapper MAPPER = new ObjectMapper();
8789

90+
/**
91+
* Read-write lock replacing synchronized methods.
92+
* Read operations (isCached, loadCachedResults, getHashForPath, etc.) use readLock.
93+
* Write operations (storeResults, replaceAll, clear, removeFile) use writeLock.
94+
* This prevents ClosedChannelException from concurrent virtual thread writes
95+
* to H2's MVStore file channel.
96+
*/
97+
private final ReadWriteLock rwLock = new ReentrantReadWriteLock();
98+
8899
private final Connection conn;
89100
private final Path dbPath;
90101

@@ -106,7 +117,7 @@ public AnalysisCache(Path dbPath) {
106117
dbFile = dbFile.substring(0, dbFile.length() - 3);
107118
}
108119
this.conn = DriverManager.getConnection(
109-
"jdbc:h2:file:" + dbFile + ";AUTO_SERVER=FALSE;MODE=MySQL");
120+
"jdbc:h2:file:" + dbFile + ";AUTO_SERVER=FALSE;MODE=MySQL;DB_CLOSE_ON_EXIT=FALSE;WRITE_DELAY=0");
110121
initDb();
111122
} catch (Exception e) {
112123
throw new RuntimeException("Failed to open analysis cache at " + dbPath, e);
@@ -158,7 +169,8 @@ private void checkCacheVersion() throws SQLException {
158169
/**
159170
* Return the commit SHA from the most recent analysis run, or null.
160171
*/
161-
public synchronized String getLastCommit() {
172+
public String getLastCommit() {
173+
rwLock.readLock().lock();
162174
try (var stmt = conn.prepareStatement(
163175
"SELECT commit_sha FROM analysis_runs ORDER BY timestamp DESC LIMIT 1")) {
164176
try (ResultSet rs = stmt.executeQuery()) {
@@ -168,6 +180,8 @@ public synchronized String getLastCommit() {
168180
}
169181
} catch (SQLException e) {
170182
log.debug("Failed to get last commit", e);
183+
} finally {
184+
rwLock.readLock().unlock();
171185
}
172186
return null;
173187
}
@@ -177,7 +191,8 @@ public synchronized String getLastCommit() {
177191
/**
178192
* Check whether results for the given content hash are cached.
179193
*/
180-
public synchronized boolean isCached(String contentHash) {
194+
public boolean isCached(String contentHash) {
195+
rwLock.readLock().lock();
181196
try (var stmt = conn.prepareStatement(
182197
"SELECT 1 FROM files WHERE content_hash = ?")) {
183198
stmt.setString(1, contentHash);
@@ -187,14 +202,17 @@ public synchronized boolean isCached(String contentHash) {
187202
} catch (SQLException e) {
188203
log.debug("Cache lookup failed", e);
189204
return false;
205+
} finally {
206+
rwLock.readLock().unlock();
190207
}
191208
}
192209

193210
/**
194211
* Look up the content hash stored for a given file path.
195212
* Returns null if the path has not been cached yet.
196213
*/
197-
public synchronized String getHashForPath(String filePath) {
214+
public String getHashForPath(String filePath) {
215+
rwLock.readLock().lock();
198216
try (var stmt = conn.prepareStatement(
199217
"SELECT content_hash FROM files WHERE path = ? LIMIT 1")) {
200218
stmt.setString(1, filePath);
@@ -204,6 +222,8 @@ public synchronized String getHashForPath(String filePath) {
204222
} catch (SQLException e) {
205223
log.debug("Hash lookup by path failed", e);
206224
return null;
225+
} finally {
226+
rwLock.readLock().unlock();
207227
}
208228
}
209229

@@ -212,8 +232,9 @@ public synchronized String getHashForPath(String filePath) {
212232
/**
213233
* Persist analysis results for a single file.
214234
*/
215-
public synchronized void storeResults(String contentHash, String filePath, String language,
235+
public void storeResults(String contentHash, String filePath, String language,
216236
List<CodeNode> nodes, List<CodeEdge> edges) {
237+
rwLock.writeLock().lock();
217238
try {
218239
conn.setAutoCommit(false);
219240
String now = Instant.now().toString();
@@ -277,6 +298,7 @@ public synchronized void storeResults(String contentHash, String filePath, Strin
277298
conn.setAutoCommit(true);
278299
} catch (SQLException ignored) {
279300
}
301+
rwLock.writeLock().unlock();
280302
}
281303
}
282304

@@ -287,7 +309,8 @@ public synchronized void storeResults(String contentHash, String filePath, Strin
287309
*
288310
* @return a CachedResult with the nodes and edges, or null if not cached
289311
*/
290-
public synchronized CachedResult loadCachedResults(String contentHash) {
312+
public CachedResult loadCachedResults(String contentHash) {
313+
rwLock.readLock().lock();
291314
try {
292315
List<CodeNode> nodes = new ArrayList<>();
293316
try (var stmt = conn.prepareStatement("SELECT data FROM nodes WHERE content_hash = ?")) {
@@ -318,6 +341,8 @@ public synchronized CachedResult loadCachedResults(String contentHash) {
318341
} catch (SQLException e) {
319342
log.debug("Failed to load cached results for hash {}", contentHash, e);
320343
return null;
344+
} finally {
345+
rwLock.readLock().unlock();
321346
}
322347
}
323348

@@ -326,7 +351,8 @@ public synchronized CachedResult loadCachedResults(String contentHash) {
326351
/**
327352
* Delete all cached results associated with a content hash.
328353
*/
329-
public synchronized void removeFile(String contentHash) {
354+
public void removeFile(String contentHash) {
355+
rwLock.writeLock().lock();
330356
try {
331357
conn.setAutoCommit(false);
332358
try (var stmt = conn.prepareStatement("DELETE FROM nodes WHERE content_hash = ?")) {
@@ -353,6 +379,7 @@ public synchronized void removeFile(String contentHash) {
353379
conn.setAutoCommit(true);
354380
} catch (SQLException ignored) {
355381
}
382+
rwLock.writeLock().unlock();
356383
}
357384
}
358385

@@ -361,7 +388,8 @@ public synchronized void removeFile(String contentHash) {
361388
/**
362389
* Record an analysis run with its commit SHA and file count.
363390
*/
364-
public synchronized void recordRun(String commitSha, int fileCount) {
391+
public void recordRun(String commitSha, int fileCount) {
392+
rwLock.writeLock().lock();
365393
try (var stmt = conn.prepareStatement(
366394
"INSERT INTO analysis_runs (run_id, commit_sha, timestamp, file_count) VALUES (?, ?, ?, ?)")) {
367395
stmt.setString(1, UUID.randomUUID().toString());
@@ -371,6 +399,8 @@ public synchronized void recordRun(String commitSha, int fileCount) {
371399
stmt.execute();
372400
} catch (SQLException e) {
373401
log.warn("Failed to record analysis run", e);
402+
} finally {
403+
rwLock.writeLock().unlock();
374404
}
375405
}
376406

@@ -379,31 +409,39 @@ public synchronized void recordRun(String commitSha, int fileCount) {
379409
/**
380410
* Return cache statistics.
381411
*/
382-
public synchronized Map<String, Object> getStats() {
383-
Map<String, Object> stats = new LinkedHashMap<>();
412+
public Map<String, Object> getStats() {
413+
rwLock.readLock().lock();
384414
try {
385-
stats.put("cached_files", countFiles());
386-
stats.put("cached_nodes", getNodeCount());
387-
stats.put("cached_edges", countEdges());
388-
stats.put("total_runs", countAnalysisRuns());
389-
stats.put("db_path", dbPath.toString());
390-
} catch (SQLException e) {
391-
stats.put("error", e.getMessage());
415+
Map<String, Object> stats = new LinkedHashMap<>();
416+
try {
417+
stats.put("cached_files", countFiles());
418+
stats.put("cached_nodes", countNodesInternal());
419+
stats.put("cached_edges", countEdges());
420+
stats.put("total_runs", countAnalysisRuns());
421+
stats.put("db_path", dbPath.toString());
422+
} catch (SQLException e) {
423+
stats.put("error", e.getMessage());
424+
}
425+
return stats;
426+
} finally {
427+
rwLock.readLock().unlock();
392428
}
393-
return stats;
394429
}
395430

396431
/**
397432
* Clear all cached data.
398433
*/
399-
public synchronized void clear() {
434+
public void clear() {
435+
rwLock.writeLock().lock();
400436
try (var stmt = conn.createStatement()) {
401437
stmt.execute("DELETE FROM edges");
402438
stmt.execute("DELETE FROM nodes");
403439
stmt.execute("DELETE FROM files");
404440
stmt.execute("DELETE FROM analysis_runs");
405441
} catch (SQLException e) {
406442
log.warn("Failed to clear cache", e);
443+
} finally {
444+
rwLock.writeLock().unlock();
407445
}
408446
}
409447

@@ -420,7 +458,8 @@ public synchronized void clear() {
420458
* @param nodes enriched nodes (including new SERVICE nodes, layer classifications, etc.)
421459
* @param edges enriched edges (including linker edges, CONTAINS edges, etc.)
422460
*/
423-
public synchronized void replaceAll(List<CodeNode> nodes, List<CodeEdge> edges) {
461+
public void replaceAll(List<CodeNode> nodes, List<CodeEdge> edges) {
462+
rwLock.writeLock().lock();
424463
try {
425464
conn.setAutoCommit(false);
426465

@@ -484,6 +523,7 @@ public synchronized void replaceAll(List<CodeNode> nodes, List<CodeEdge> edges)
484523
conn.setAutoCommit(true);
485524
} catch (SQLException ignored) {
486525
}
526+
rwLock.writeLock().unlock();
487527
}
488528
}
489529

@@ -633,26 +673,39 @@ private long countAnalysisRuns() throws SQLException {
633673
/**
634674
* Return the total number of cached nodes.
635675
*/
636-
public synchronized long getNodeCount() {
676+
public long getNodeCount() {
677+
rwLock.readLock().lock();
678+
try {
679+
return countNodesInternal();
680+
} catch (SQLException e) {
681+
log.debug("Failed to count nodes", e);
682+
return 0;
683+
} finally {
684+
rwLock.readLock().unlock();
685+
}
686+
}
687+
688+
/** Internal node count -- caller must hold the appropriate lock. */
689+
private long countNodesInternal() throws SQLException {
637690
try (var stmt = conn.createStatement();
638691
ResultSet rs = stmt.executeQuery("SELECT COUNT(DISTINCT id) FROM nodes")) {
639692
rs.next();
640693
return rs.getLong(1);
641-
} catch (SQLException e) {
642-
log.debug("Failed to count nodes", e);
643-
return 0;
644694
}
645695
}
646696

647697
/**
648698
* Return the total number of cached edges.
649699
*/
650-
public synchronized long getEdgeCount() {
700+
public long getEdgeCount() {
701+
rwLock.readLock().lock();
651702
try {
652703
return countEdges();
653704
} catch (SQLException e) {
654705
log.debug("Failed to count edges", e);
655706
return 0;
707+
} finally {
708+
rwLock.readLock().unlock();
656709
}
657710
}
658711

@@ -677,44 +730,54 @@ public void storeBatchResults(String batchId, String filePath, String language,
677730
*
678731
* @return list of all cached nodes
679732
*/
680-
public synchronized List<CodeNode> loadAllNodes() {
681-
List<CodeNode> nodes = new ArrayList<>();
682-
// Deduplicate by id, keeping the LAST inserted version (most complete data)
683-
try (var stmt = conn.prepareStatement("""
684-
SELECT n.data FROM nodes n
685-
INNER JOIN (SELECT id, MAX(row_id) AS max_id FROM nodes GROUP BY id) m
686-
ON n.id = m.id AND n.row_id = m.max_id
687-
""")) {
688-
try (ResultSet rs = stmt.executeQuery()) {
689-
while (rs.next()) {
690-
CodeNode node = deserializeNode(rs.getString(1));
691-
if (node != null) nodes.add(node);
733+
public List<CodeNode> loadAllNodes() {
734+
rwLock.readLock().lock();
735+
try {
736+
List<CodeNode> nodes = new ArrayList<>();
737+
// Deduplicate by id, keeping the LAST inserted version (most complete data)
738+
try (var stmt = conn.prepareStatement("""
739+
SELECT n.data FROM nodes n
740+
INNER JOIN (SELECT id, MAX(row_id) AS max_id FROM nodes GROUP BY id) m
741+
ON n.id = m.id AND n.row_id = m.max_id
742+
""")) {
743+
try (ResultSet rs = stmt.executeQuery()) {
744+
while (rs.next()) {
745+
CodeNode node = deserializeNode(rs.getString(1));
746+
if (node != null) nodes.add(node);
747+
}
692748
}
749+
} catch (SQLException e) {
750+
log.debug("Failed to load all nodes", e);
693751
}
694-
} catch (SQLException e) {
695-
log.debug("Failed to load all nodes", e);
752+
return nodes;
753+
} finally {
754+
rwLock.readLock().unlock();
696755
}
697-
return nodes;
698756
}
699757

700758
/**
701759
* Load all cached edges across all files.
702760
*
703761
* @return list of all cached edges
704762
*/
705-
public synchronized List<CodeEdge> loadAllEdges() {
706-
List<CodeEdge> edges = new ArrayList<>();
707-
try (var stmt = conn.prepareStatement("SELECT data FROM edges")) {
708-
try (ResultSet rs = stmt.executeQuery()) {
709-
while (rs.next()) {
710-
CodeEdge edge = deserializeEdge(rs.getString(1));
711-
if (edge != null) edges.add(edge);
763+
public List<CodeEdge> loadAllEdges() {
764+
rwLock.readLock().lock();
765+
try {
766+
List<CodeEdge> edges = new ArrayList<>();
767+
try (var stmt = conn.prepareStatement("SELECT data FROM edges")) {
768+
try (ResultSet rs = stmt.executeQuery()) {
769+
while (rs.next()) {
770+
CodeEdge edge = deserializeEdge(rs.getString(1));
771+
if (edge != null) edges.add(edge);
772+
}
712773
}
774+
} catch (SQLException e) {
775+
log.debug("Failed to load all edges", e);
713776
}
714-
} catch (SQLException e) {
715-
log.debug("Failed to load all edges", e);
777+
return edges;
778+
} finally {
779+
rwLock.readLock().unlock();
716780
}
717-
return edges;
718781
}
719782

720783
/**

src/main/java/io/github/randomcodespace/iq/grammar/AntlrParserFactory.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,17 @@ public static ParseTree parse(String language, String content) {
102102
return null;
103103
}
104104

105+
// Skip ANTLR for TypeScript/JavaScript entirely — the TS grammar has exponential
106+
// ATN prediction on certain inputs regardless of file size (e.g. .d.ts, .mjs, .cjs
107+
// files taking 10-40+ seconds even at <1KB). All TS/JS detectors already have
108+
// comprehensive regex fallback paths, so ANTLR adds risk without value here.
109+
String lang = language.toLowerCase();
110+
if ("typescript".equals(lang) || "javascript".equals(lang)) {
111+
log.debug("Skipping ANTLR parse for {} — using regex fallback (TS/JS grammar bypass)",
112+
language);
113+
return null;
114+
}
115+
105116
// Check thread-local cache — same content object means same file
106117
var cached = PARSE_CACHE.get();
107118
if (cached != null && cached.getKey() == content) {

0 commit comments

Comments
 (0)