Skip to content

Commit 2663ac5

Browse files
aksOpsclaude
andcommitted
fix: create stub nodes for external edge references — zero edge loss
Previously, edges referencing nodes not in the graph (cross-repo references, external libraries, wildcard targets like *:ClassName) were silently dropped during Neo4j bulk load because MATCH found no matching source/target nodes. Fix: pre-scan all edges for missing source/target IDs, create minimal stub nodes (kind=external) via MERGE before edge creation. This ensures every edge in H2 is preserved in Neo4j. The stub nodes are tagged kind=external so they can be filtered in queries if needed, but the edges connecting real nodes to external references are now visible in the graph. Logged: "Neo4j: creating N stub nodes for external edge references" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0d5e9d8 commit 2663ac5

1 file changed

Lines changed: 46 additions & 3 deletions

File tree

src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.util.HashMap;
1818
import java.util.HashSet;
1919
import java.util.LinkedHashMap;
20+
import java.util.LinkedHashSet;
2021
import java.util.List;
2122
import java.util.Map;
2223
import java.util.Optional;
@@ -147,11 +148,49 @@ public void bulkSave(List<CodeNode> nodes) {
147148
savedNodeIds.add(node.getId());
148149
}
149150

150-
// 5. Save edges using UNWIND for batch inserts
151+
// 5. Collect all edges and identify missing target/source nodes
151152
List<CodeEdge> allEdges = nodes.stream()
152153
.flatMap(n -> n.getEdges().stream())
153154
.toList();
154155
int totalEdges = allEdges.size();
156+
157+
// Pre-scan for stub nodes needed (edges referencing nodes not in the graph)
158+
Set<String> stubNodeIds = new LinkedHashSet<>();
159+
for (CodeEdge edge : allEdges) {
160+
String sourceId = edge.getSourceId();
161+
String targetId = edge.getTarget() != null ? edge.getTarget().getId() : null;
162+
if (sourceId != null && !savedNodeIds.contains(sourceId)) stubNodeIds.add(sourceId);
163+
if (targetId != null && !savedNodeIds.contains(targetId)) stubNodeIds.add(targetId);
164+
}
165+
166+
// Create stub nodes for external references so no edges are lost
167+
if (!stubNodeIds.isEmpty()) {
168+
log.info("Neo4j: creating {} stub nodes for external edge references", stubNodeIds.size());
169+
List<Map<String, Object>> stubBatch = new ArrayList<>();
170+
for (String stubId : stubNodeIds) {
171+
stubBatch.add(Map.of(PROP_ID, stubId, PROP_KIND, "external", "label", stubId));
172+
savedNodeIds.add(stubId);
173+
if (stubBatch.size() >= batchSize) {
174+
try (Transaction tx = graphDb.beginTx()) {
175+
tx.execute("UNWIND $batch AS n MERGE (node:CodeNode {id: n.id}) "
176+
+ "ON CREATE SET node.kind = n.kind, node.label = n.label",
177+
Map.of("batch", stubBatch));
178+
tx.commit();
179+
}
180+
stubBatch.clear();
181+
}
182+
}
183+
if (!stubBatch.isEmpty()) {
184+
try (Transaction tx = graphDb.beginTx()) {
185+
tx.execute("UNWIND $batch AS n MERGE (node:CodeNode {id: n.id}) "
186+
+ "ON CREATE SET node.kind = n.kind, node.label = n.label",
187+
Map.of("batch", stubBatch));
188+
tx.commit();
189+
}
190+
}
191+
}
192+
193+
// 6. Save edges using UNWIND for batch inserts
155194
log.info("Neo4j: persisting {} edges...", totalEdges);
156195

157196
int created = 0;
@@ -162,8 +201,12 @@ public void bulkSave(List<CodeNode> nodes) {
162201
for (CodeEdge edge : batch) {
163202
String sourceId = edge.getSourceId();
164203
String targetId = edge.getTarget() != null ? edge.getTarget().getId() : null;
165-
if (targetId == null || sourceId == null
166-
|| !savedNodeIds.contains(sourceId) || !savedNodeIds.contains(targetId)) {
204+
if (targetId == null || sourceId == null) {
205+
skipped++;
206+
continue;
207+
}
208+
// Stubs were already created above — all IDs should exist now
209+
if (!savedNodeIds.contains(sourceId) || !savedNodeIds.contains(targetId)) {
167210
skipped++;
168211
continue;
169212
}

0 commit comments

Comments
 (0)