Skip to content

Commit beaf335

Browse files
aksOpsclaude
andcommitted
fix: buffer edges in GraphBuilder for 100% cross-backend parity
Edges are now buffered during detector result merging and flushed only after all nodes are present. This ensures backends that validate node existence (NetworkX, SQLite, KuzuDB) all produce identical results regardless of file processing order. Verified: 2,298 nodes / 2,890 edges on contoso-real-estate and 25,893 nodes / 30,636 edges on spring-boot — identical across all 3 backends. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 794e1f7 commit beaf335

1 file changed

Lines changed: 32 additions & 6 deletions

File tree

src/code_intelligence/graph/builder.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,17 @@ def link(self, store: GraphStore) -> list[GraphEdge]:
194194

195195

196196
class GraphBuilder:
197-
"""Aggregates detector results and runs cross-file linkers to build a graph."""
197+
"""Aggregates detector results and runs cross-file linkers to build a graph.
198+
199+
Edges are buffered and flushed after all nodes are added to ensure
200+
consistent behavior across all storage backends. Some backends
201+
(NetworkX, SQLite, KuzuDB) reject edges referencing non-existent
202+
nodes, so all nodes must be present before edges are inserted.
203+
"""
198204

199205
def __init__(self, backend: GraphBackend | None = None) -> None:
200206
self._store = GraphStore(backend=backend)
207+
self._pending_edges: list[GraphEdge] = []
201208
self._linkers: list[Linker] = [
202209
TopicLinker(),
203210
EntityLinker(),
@@ -210,9 +217,18 @@ def add_nodes(self, nodes: list[GraphNode]) -> None:
210217
self._store.add_node(node)
211218

212219
def add_edges(self, edges: list[GraphEdge]) -> None:
213-
"""Add a batch of edges to the graph store."""
214-
for edge in edges:
220+
"""Buffer edges for deferred insertion."""
221+
self._pending_edges.extend(edges)
222+
223+
def flush_edges(self) -> None:
224+
"""Insert all buffered edges into the store.
225+
226+
Call this after all nodes have been added so that backends
227+
which validate node existence won't reject valid cross-file edges.
228+
"""
229+
for edge in self._pending_edges:
215230
self._store.add_edge(edge)
231+
self._pending_edges.clear()
216232

217233
def merge_detector_result(self, result: object) -> None:
218234
"""Merge a DetectorResult into the graph.
@@ -223,10 +239,13 @@ def merge_detector_result(self, result: object) -> None:
223239
nodes: list[GraphNode] = getattr(result, "nodes", [])
224240
edges: list[GraphEdge] = getattr(result, "edges", [])
225241
self.add_nodes(nodes)
226-
self.add_edges(edges)
242+
self.add_edges(edges) # buffered, not inserted yet
227243

228244
def run_linkers(self) -> None:
229-
"""Run all registered linkers and add the resulting edges."""
245+
"""Flush pending edges, then run all registered linkers."""
246+
# Flush detector edges first so linkers see the full graph
247+
self.flush_edges()
248+
230249
for linker in self._linkers:
231250
try:
232251
new_edges = linker.link(self._store)
@@ -239,14 +258,21 @@ def run_linkers(self) -> None:
239258
if new_nodes:
240259
self.add_nodes(new_nodes)
241260

242-
self.add_edges(new_edges)
261+
# Linker edges go to pending buffer too
262+
self._pending_edges.extend(new_edges)
243263
except Exception:
244264
logger.warning(
245265
"Linker %s failed",
246266
type(linker).__name__,
247267
exc_info=True,
248268
)
249269

270+
# Flush linker edges (linker-created nodes are already added above)
271+
self.flush_edges()
272+
250273
def build(self) -> GraphStore:
251274
"""Return the assembled graph store."""
275+
# Safety: flush any remaining edges
276+
if self._pending_edges:
277+
self.flush_edges()
252278
return self._store

0 commit comments

Comments
 (0)