Skip to content

Commit ff517b7

Browse files
aksOpsclaude
andcommitted
Migrate analysis cache from SQLite to H2 for virtual thread compatibility
SQLite JDBC uses JNI which pins virtual threads to platform threads, killing parallelism. H2 is pure Java with MVCC concurrency — no JNI, fully virtual-thread friendly. Also adds minified file detection (skip detectors for *.min.js etc.) and Neo4j shutdown hook hardening. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c00a47c commit ff517b7

7 files changed

Lines changed: 97 additions & 46 deletions

File tree

pom.xml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,10 @@
128128
<version>4.13.2</version>
129129
</dependency>
130130

131-
<!-- SQLite JDBC for incremental analysis cache -->
131+
<!-- H2 Database for incremental analysis cache (pure Java — no JNI, virtual-thread friendly) -->
132132
<dependency>
133-
<groupId>org.xerial</groupId>
134-
<artifactId>sqlite-jdbc</artifactId>
135-
<version>3.49.1.0</version>
133+
<groupId>com.h2database</groupId>
134+
<artifactId>h2</artifactId>
136135
</dependency>
137136

138137
<!-- Testing -->

src/main/java/io/github/randomcodespace/iq/analyzer/Analyzer.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import io.github.randomcodespace.iq.detector.DetectorUtils;
1212
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
1313
import io.github.randomcodespace.iq.model.CodeNode;
14+
import io.github.randomcodespace.iq.model.NodeKind;
1415
import org.slf4j.Logger;
1516
import org.slf4j.LoggerFactory;
1617
import org.springframework.stereotype.Service;
@@ -302,6 +303,32 @@ private AnalysisResult runWithCache(Path root, Integer parallelism, AnalysisCach
302303
/**
303304
* Analyze a single file: read content, parse if structured, run matching detectors.
304305
*/
306+
/**
307+
* Check whether a file is minified (e.g. *.min.js, *.bundle.js) and large
308+
* enough that running detectors would be wasteful.
309+
* <p>
310+
* Heuristic: filename ends with .min.js or .bundle.js, file is &gt; 10 KB,
311+
* and average line length exceeds 500 characters.
312+
*/
313+
private boolean isMinified(DiscoveredFile file, String content) {
314+
String name = file.path().getFileName().toString();
315+
if (!(name.endsWith(".min.js") || name.endsWith(".bundle.js")
316+
|| name.endsWith(".min.css") || name.endsWith(".min.mjs"))) {
317+
return false;
318+
}
319+
if (file.sizeBytes() <= 10_240) {
320+
return false;
321+
}
322+
// Average line length check
323+
String[] lines = content.split("\n", -1);
324+
if (lines.length == 0) return false;
325+
long totalChars = 0;
326+
for (String line : lines) {
327+
totalChars += line.length();
328+
}
329+
return (totalChars / lines.length) > 500;
330+
}
331+
305332
DetectorResult analyzeFile(DiscoveredFile file, Path repoPath) {
306333
Instant fileStart = Instant.now();
307334
Path absPath = repoPath.resolve(file.path());
@@ -316,6 +343,20 @@ DetectorResult analyzeFile(DiscoveredFile file, Path repoPath) {
316343
return DetectorResult.empty();
317344
}
318345

346+
// Minified file detection: create a node with minified=true but skip detectors
347+
if (isMinified(file, content)) {
348+
log.debug("Skipping detectors for minified file: {}", file.path());
349+
String moduleName = DetectorUtils.deriveModuleName(file.path().toString(), file.language());
350+
CodeNode node = new CodeNode(
351+
"file:" + file.path() + ":module:" + (moduleName != null ? moduleName : file.path().getFileName().toString()),
352+
NodeKind.MODULE,
353+
file.path().getFileName().toString());
354+
node.setFilePath(file.path().toString());
355+
node.setModule(moduleName);
356+
node.setProperties(new java.util.LinkedHashMap<>(Map.of("minified", true)));
357+
return DetectorResult.of(List.of(node), List.of());
358+
}
359+
319360
// Parse structured data if applicable
320361
Object parsedData = null;
321362
if (STRUCTURED_LANGUAGES.contains(file.language())) {

src/main/java/io/github/randomcodespace/iq/api/GraphController.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ public Map<String, Object> getDetailedStats(
5757
@RequestParam(defaultValue = "all") String category) {
5858
Path root = Path.of(config.getRootPath()).toAbsolutePath().normalize();
5959
Path cachePath = root.resolve(config.getCacheDir()).resolve("analysis-cache.db");
60+
// H2 stores data in analysis-cache.mv.db — check for that file on disk
61+
Path h2File = root.resolve(config.getCacheDir()).resolve("analysis-cache.mv.db");
6062

61-
if (!Files.exists(cachePath)) {
63+
if (!Files.exists(h2File)) {
6264
throw new ResponseStatusException(HttpStatus.NOT_FOUND,
6365
"No analysis cache found. Run analyze first.");
6466
}

src/main/java/io/github/randomcodespace/iq/cache/AnalysisCache.java

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -26,47 +26,46 @@
2626
import java.util.UUID;
2727

2828
/**
29-
* SQLite-backed cache for incremental analysis results.
29+
* H2-backed cache for incremental analysis results.
3030
* <p>
3131
* Stores per-file parse results (nodes and edges) keyed by content hash,
3232
* enabling fast incremental re-analysis when only a subset of files change.
3333
* <p>
34-
* Uses plain JDBC with SQLite -- not Neo4j -- as the cache is a flat
35-
* lookup table, not a graph.
34+
* Uses H2 in embedded mode — pure Java, no JNI, MVCC concurrency,
35+
* fully compatible with virtual threads.
3636
*/
3737
public class AnalysisCache implements Closeable {
3838

3939
private static final Logger log = LoggerFactory.getLogger(AnalysisCache.class);
4040

4141
private static final String SCHEMA_SQL = """
4242
CREATE TABLE IF NOT EXISTS files (
43-
content_hash TEXT PRIMARY KEY,
44-
path TEXT NOT NULL,
45-
language TEXT NOT NULL,
46-
parsed_at TEXT NOT NULL
43+
content_hash VARCHAR PRIMARY KEY,
44+
path VARCHAR NOT NULL,
45+
language VARCHAR NOT NULL,
46+
parsed_at VARCHAR NOT NULL
4747
);
4848
4949
CREATE TABLE IF NOT EXISTS nodes (
50-
id TEXT PRIMARY KEY,
51-
content_hash TEXT NOT NULL,
52-
kind TEXT NOT NULL,
53-
data TEXT NOT NULL,
50+
id VARCHAR PRIMARY KEY,
51+
content_hash VARCHAR NOT NULL,
52+
kind VARCHAR NOT NULL,
53+
data VARCHAR NOT NULL,
5454
FOREIGN KEY (content_hash) REFERENCES files(content_hash)
5555
);
5656
5757
CREATE TABLE IF NOT EXISTS edges (
58-
source TEXT NOT NULL,
59-
target TEXT NOT NULL,
60-
content_hash TEXT NOT NULL,
61-
kind TEXT NOT NULL,
62-
data TEXT NOT NULL,
63-
FOREIGN KEY (content_hash) REFERENCES files(content_hash)
58+
source VARCHAR NOT NULL,
59+
target VARCHAR NOT NULL,
60+
content_hash VARCHAR NOT NULL,
61+
kind VARCHAR NOT NULL,
62+
data VARCHAR NOT NULL
6463
);
6564
6665
CREATE TABLE IF NOT EXISTS analysis_runs (
67-
run_id TEXT PRIMARY KEY,
68-
commit_sha TEXT,
69-
timestamp TEXT NOT NULL,
66+
run_id VARCHAR PRIMARY KEY,
67+
commit_sha VARCHAR,
68+
timestamp VARCHAR NOT NULL,
7069
file_count INTEGER NOT NULL
7170
);
7271
@@ -82,30 +81,29 @@ CREATE TABLE IF NOT EXISTS analysis_runs (
8281

8382
/**
8483
* Open or create an analysis cache at the given path.
84+
* <p>
85+
* The path should point to the desired database file location.
86+
* H2 will append {@code .mv.db} to the actual file on disk.
8587
*
86-
* @param dbPath path to the SQLite database file
88+
* @param dbPath path to the H2 database file (without extension)
8789
*/
8890
public AnalysisCache(Path dbPath) {
8991
this.dbPath = dbPath;
9092
try {
9193
Files.createDirectories(dbPath.getParent());
92-
this.conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath);
93-
// Configure pragmas using separate statements, each closed before the next
94-
executePragma("PRAGMA journal_mode=WAL");
95-
executePragma("PRAGMA busy_timeout=5000");
96-
executePragma("PRAGMA foreign_keys=ON");
94+
// Strip .db extension if present — H2 appends its own .mv.db
95+
String dbFile = dbPath.toString();
96+
if (dbFile.endsWith(".db")) {
97+
dbFile = dbFile.substring(0, dbFile.length() - 3);
98+
}
99+
this.conn = DriverManager.getConnection(
100+
"jdbc:h2:file:" + dbFile + ";AUTO_SERVER=FALSE;MODE=MySQL");
97101
initDb();
98102
} catch (Exception e) {
99103
throw new RuntimeException("Failed to open analysis cache at " + dbPath, e);
100104
}
101105
}
102106

103-
private void executePragma(String pragma) throws SQLException {
104-
try (var stmt = conn.createStatement()) {
105-
stmt.execute(pragma);
106-
}
107-
}
108-
109107
private void initDb() throws SQLException {
110108
for (String sql : SCHEMA_SQL.split(";")) {
111109
String trimmed = sql.trim();
@@ -158,14 +156,13 @@ public boolean isCached(String contentHash) {
158156
*/
159157
public void storeResults(String contentHash, String filePath, String language,
160158
List<CodeNode> nodes, List<CodeEdge> edges) {
161-
synchronized (conn) {
162159
try {
163160
conn.setAutoCommit(false);
164161
String now = Instant.now().toString();
165162

166-
// Upsert file record
163+
// Upsert file record (H2 MySQL mode supports INSERT ... ON DUPLICATE KEY UPDATE)
167164
try (var stmt = conn.prepareStatement(
168-
"INSERT OR REPLACE INTO files (content_hash, path, language, parsed_at) VALUES (?, ?, ?, ?)")) {
165+
"MERGE INTO files (content_hash, path, language, parsed_at) KEY (content_hash) VALUES (?, ?, ?, ?)")) {
169166
stmt.setString(1, contentHash);
170167
stmt.setString(2, filePath);
171168
stmt.setString(3, language);
@@ -185,7 +182,7 @@ public void storeResults(String contentHash, String filePath, String language,
185182

186183
// Insert nodes
187184
try (var stmt = conn.prepareStatement(
188-
"INSERT OR IGNORE INTO nodes (id, content_hash, kind, data) VALUES (?, ?, ?, ?)")) {
185+
"INSERT INTO nodes (id, content_hash, kind, data) VALUES (?, ?, ?, ?)")) {
189186
for (CodeNode node : nodes) {
190187
stmt.setString(1, node.getId());
191188
stmt.setString(2, contentHash);
@@ -223,7 +220,6 @@ public void storeResults(String contentHash, String filePath, String language,
223220
} catch (SQLException ignored) {
224221
}
225222
}
226-
} // synchronized
227223
}
228224

229225
// --- Load cached results ---

src/main/java/io/github/randomcodespace/iq/cli/StatsCommand.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
/**
2929
* Show rich categorized statistics from an already-analyzed graph.
30-
* Reads from the SQLite analysis cache -- no re-scan.
30+
* Reads from the H2 analysis cache -- no re-scan.
3131
*/
3232
@Component
3333
@Command(name = "stats", mixinStandardHelpOptions = true,
@@ -80,8 +80,10 @@ public Integer call() {
8080

8181
Path root = path.toAbsolutePath().normalize();
8282
Path cachePath = root.resolve(config.getCacheDir()).resolve("analysis-cache.db");
83+
// H2 stores data in analysis-cache.mv.db — check for that file on disk
84+
Path h2File = root.resolve(config.getCacheDir()).resolve("analysis-cache.mv.db");
8385

84-
if (!Files.exists(cachePath)) {
86+
if (!Files.exists(h2File)) {
8587
CliOutput.warn("No analysis cache found at " + cachePath);
8688
CliOutput.info("Run 'code-iq analyze' first to scan the codebase.");
8789
return 1;

src/main/java/io/github/randomcodespace/iq/config/Neo4jConfig.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,16 @@ public class Neo4jConfig {
2828
@Bean(destroyMethod = "shutdown")
2929
DatabaseManagementService databaseManagementService(
3030
@Value("${codeiq.graph.path:.osscodeiq/graph.db}") String dbPath) {
31-
return new DatabaseManagementServiceBuilder(Path.of(dbPath)).build();
31+
DatabaseManagementService dbms = new DatabaseManagementServiceBuilder(Path.of(dbPath)).build();
32+
// Ensure clean shutdown even if Spring context is not closed gracefully
33+
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
34+
try {
35+
dbms.shutdown();
36+
} catch (Exception ignored) {
37+
// Already shut down by Spring's destroyMethod, or JVM is exiting
38+
}
39+
}, "neo4j-shutdown-hook"));
40+
return dbms;
3241
}
3342

3443
@Bean

src/main/java/io/github/randomcodespace/iq/mcp/McpTools.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,10 @@ public String getDetailedStats(
6565
try {
6666
java.nio.file.Path root = java.nio.file.Path.of(config.getRootPath()).toAbsolutePath().normalize();
6767
java.nio.file.Path cachePath = root.resolve(config.getCacheDir()).resolve("analysis-cache.db");
68+
// H2 stores data in analysis-cache.mv.db — check for that file on disk
69+
java.nio.file.Path h2File = root.resolve(config.getCacheDir()).resolve("analysis-cache.mv.db");
6870

69-
if (!java.nio.file.Files.exists(cachePath)) {
71+
if (!java.nio.file.Files.exists(h2File)) {
7072
return toJson(Map.of("error", "No analysis cache found. Run analyze first."));
7173
}
7274

0 commit comments

Comments
 (0)