Skip to content

Commit 163d800

Browse files
aksOpsclaude
andcommitted
fix: enrich bulk-load missing properties + progress logging
Two issues in EnrichCommand bulk-load: 1. Node properties (framework, http_method, auth_type, etc.) were not persisted to Neo4j. Only core fields (id, kind, label, fqn, etc.) were saved. Now includes annotations and all prop_* detector properties. 2. No progress output during bulk-load. For 140K nodes the command appeared stuck. Now logs progress every 10K items with percentage. Also reduced batch size from 5000 to 2000 for consistency with GraphStore. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7ed49ba commit 163d800

1 file changed

Lines changed: 29 additions & 9 deletions

File tree

src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,12 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins
174174
tx.commit();
175175
}
176176

177-
// Bulk-load nodes in batches of 5000 using UNWIND
178-
int nodeBatchSize = 5000;
177+
// Bulk-load nodes in batches using UNWIND
178+
int nodeBatchSize = 2000;
179179
int nodesLoaded = 0;
180-
for (int i = 0; i < enrichedNodes.size(); i += nodeBatchSize) {
181-
int end = Math.min(i + nodeBatchSize, enrichedNodes.size());
180+
int totalNodes = enrichedNodes.size();
181+
for (int i = 0; i < totalNodes; i += nodeBatchSize) {
182+
int end = Math.min(i + nodeBatchSize, totalNodes);
182183
var batch = new ArrayList<Map<String, Object>>(end - i);
183184
for (int j = i; j < end; j++) {
184185
CodeNode node = enrichedNodes.get(j);
@@ -192,6 +193,17 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins
192193
if (node.getLineStart() != null) props.put("lineStart", node.getLineStart());
193194
if (node.getLineEnd() != null) props.put("lineEnd", node.getLineEnd());
194195
if (node.getLayer() != null) props.put("layer", node.getLayer());
196+
if (node.getAnnotations() != null && !node.getAnnotations().isEmpty()) {
197+
props.put("annotations", String.join(",", node.getAnnotations()));
198+
}
199+
// Include detector properties (framework, http_method, auth_type, etc.)
200+
if (node.getProperties() != null) {
201+
for (var entry : node.getProperties().entrySet()) {
202+
if (entry.getValue() != null) {
203+
props.put("prop_" + entry.getKey(), entry.getValue().toString());
204+
}
205+
}
206+
}
195207
batch.add(props);
196208
}
197209
try (Transaction tx = db.beginTx()) {
@@ -200,8 +212,11 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins
200212
tx.commit();
201213
}
202214
nodesLoaded += batch.size();
215+
if (nodesLoaded % 10000 < nodeBatchSize || nodesLoaded >= totalNodes) {
216+
CliOutput.info(" nodes: " + nf.format(nodesLoaded) + "/" + nf.format(totalNodes)
217+
+ " (" + (100 * nodesLoaded / totalNodes) + "%)");
218+
}
203219
}
204-
CliOutput.info(" Loaded " + nf.format(nodesLoaded) + " nodes into Neo4j");
205220

206221
// Create index on id for edge resolution
207222
try (Transaction tx = db.beginTx()) {
@@ -248,10 +263,12 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins
248263
validEdgeMaps.add(props);
249264
}
250265

251-
int edgeBatchSize = 5000;
266+
int edgeBatchSize = 2000;
252267
int edgesLoaded = 0;
253-
for (int i = 0; i < validEdgeMaps.size(); i += edgeBatchSize) {
254-
int end = Math.min(i + edgeBatchSize, validEdgeMaps.size());
268+
int totalEdges = validEdgeMaps.size();
269+
CliOutput.info(" Loading " + nf.format(totalEdges) + " edges...");
270+
for (int i = 0; i < totalEdges; i += edgeBatchSize) {
271+
int end = Math.min(i + edgeBatchSize, totalEdges);
255272
var batch = validEdgeMaps.subList(i, end);
256273
try (Transaction tx = db.beginTx()) {
257274
tx.execute(
@@ -262,8 +279,11 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins
262279
tx.commit();
263280
}
264281
edgesLoaded += batch.size();
282+
if (edgesLoaded % 10000 < edgeBatchSize || edgesLoaded >= totalEdges) {
283+
CliOutput.info(" edges: " + nf.format(edgesLoaded) + "/" + nf.format(totalEdges)
284+
+ " (" + (100 * edgesLoaded / totalEdges) + "%)");
285+
}
265286
}
266-
CliOutput.info(" Loaded " + nf.format(edgesLoaded) + " edges into Neo4j");
267287

268288
// Create additional indexes for fast queries
269289
try (Transaction tx = db.beginTx()) {

0 commit comments

Comments
 (0)