RandomCodeSpace · aksOps · Apr 25, 2026 · Apr 24, 2026
@@ -124,8 +124,10 @@ func LocalSearch(ctx context.Context, st *store.Store, emb *embedder.Embedder, i
 			seenEntities[es.e.ID] = true
 			result.Entities = append(result.Entities, es.e)
 
-			// Walk relationships
-			rels, err := st.RelationshipsForEntity(ctx, es.e.ID, graphDepth)
+			// Walk relationships scoped to the top-hit doc set so the
+			// graph expansion cannot leak edges from unrelated
+			// documents into a scoped local-search result.
+			rels, err := st.RelationshipsForEntityInDocs(ctx, es.e.ID, graphDepth, docIDList)
 			if err != nil {
 				continue
 			}

@@ -0,0 +1,107 @@
+package search
+
+import (
+	"context"
+	"testing"
+
+	"github.com/RandomCodeSpace/docsiq/internal/store"
+)
+
+// TestLocalSearch_GraphExpansionScopedToTopHitDocs is the RAN-35 regression
+// guard. LocalSearch used to re-expand through every relationship a seed
+// entity touched, regardless of doc_id, so a scoped query could surface
+// unrelated-doc edges into the result set. After the fix, the graph walk
+// must stay inside the top-hit doc set.
+//
+// Fixture:
+//
+//	d-alpha: chunk "alpha" + entity "alpha" with one edge alpha -> beta
+//	         (doc_id=d-alpha)
+//	d-delta: chunk "delta" + entity "alpha" shares a second edge
+//	         alpha -> gamma (doc_id=d-delta)
+//
+// A query for "almost alpha" tops out on chunk "c-alpha" (d-alpha). With
+// graphDepth=1 the result must include the d-alpha edge and must NOT
+// include the d-delta edge, even though the seed entity "alpha" has a
+// relationship row in d-delta.
+func TestLocalSearch_GraphExpansionScopedToTopHitDocs(t *testing.T) {
+	st, emb, _ := seedCorpus(t)
+	ctx := context.Background()
+
+	must := func(err error) {
+		t.Helper()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Seed entities with vectors close to query so they rank in the
+	// top-K entity set. "alpha" shares an ID so scoped and unscoped
+	// edges collide on a single seed.
+	entAlpha := &store.Entity{ID: "ent-alpha", Name: "alpha", Vector: []float32{1, 0, 0, 0}}
+	entBeta := &store.Entity{ID: "ent-beta", Name: "beta", Vector: []float32{0, 1, 0, 0}}
+	entGamma := &store.Entity{ID: "ent-gamma", Name: "gamma", Vector: []float32{0, 0, 1, 0}}
+	must(st.UpsertEntity(ctx, entAlpha))
+	must(st.UpsertEntity(ctx, entBeta))
+	must(st.UpsertEntity(ctx, entGamma))
+
+	// In-scope edge: alpha -> beta in d-alpha (the top-hit doc).
+	must(st.InsertRelationship(ctx, &store.Relationship{
+		ID: "rel-in-scope", SourceID: "ent-alpha", TargetID: "ent-beta",
+		Predicate: "knows", DocID: "d-alpha",
+	}))
+	// Out-of-scope edge: alpha -> gamma in d-delta (unrelated doc).
+	must(st.InsertRelationship(ctx, &store.Relationship{
+		ID: "rel-out-of-scope", SourceID: "ent-alpha", TargetID: "ent-gamma",
+		Predicate: "knows", DocID: "d-delta",
+	}))
+
+	res, err := LocalSearch(ctx, st, emb, nil, "almost alpha", 1, 1)
+	if err != nil {
+		t.Fatalf("LocalSearch: %v", err)
+	}
+
+	// Expect the top chunk to belong to d-alpha.
+	if len(res.Chunks) == 0 || res.Chunks[0].Chunk.DocID != "d-alpha" {
+		t.Fatalf("top chunk: want doc d-alpha; got %+v", res.Chunks)
+	}
+
+	// Every returned relationship must belong to a top-hit doc.
+	topHitDocs := map[string]bool{}
+	for _, c := range res.Chunks {
+		topHitDocs[c.Chunk.DocID] = true
+	}
+	for _, r := range res.Rels {
+		if !topHitDocs[r.DocID] {
+			t.Errorf("relationship %s leaked from unrelated doc %q (top-hit docs: %v)",
+				r.ID, r.DocID, topHitDocs)
+		}
+		if r.ID == "rel-out-of-scope" {
+			t.Errorf("scoped local search returned out-of-scope edge %s (doc=%s)", r.ID, r.DocID)
+		}
+	}
+
+	// Sanity: the in-scope edge should actually be there — otherwise
+	// the negative assertion above is vacuous.
+	var sawInScope bool
+	for _, r := range res.Rels {
+		if r.ID == "rel-in-scope" {
+			sawInScope = true
+			break
+		}
+	}
+	if !sawInScope {
+		t.Errorf("scoped local search did not return the in-scope edge rel-in-scope; rels=%v", relIDs(res.Rels))
+	}
+}
+
+// relIDs is a tiny helper so assertion failures above print readable ids
+// instead of a slice of pointers.
+func relIDs(rs []*store.Relationship) []string {
+	out := make([]string, len(rs))
+	for i, r := range rs {
+		out[i] = r.ID
+	}
+	return out
+}
+
@@ -0,0 +1,121 @@
+package store
+
+import (
+	"context"
+	"testing"
+)
+
+// Fixture layout:
+//
+//   docA: e1 -[rA1]-> e2 -[rA2]-> e3
+//   docB: e1 -[rB1]-> e4       (entity e1 is shared across both docs)
+//
+// A depth-2 BFS from e1 scoped to docA must reach e2 and e3 via edges rA1
+// and rA2, and must NOT return rB1 (from the unrelated document).
+func TestRelationshipsForEntityInDocs_OnlyReturnsEdgesFromScopedDocs(t *testing.T) {
+	t.Parallel()
+	st := newTestStore(t)
+	ctx := context.Background()
+
+	must := func(err error) {
+		t.Helper()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	must(st.UpsertDocument(ctx, &Document{ID: "docA", Path: "/a", Title: "A", DocType: "txt", FileHash: "hashA"}))
+	must(st.UpsertDocument(ctx, &Document{ID: "docB", Path: "/b", Title: "B", DocType: "txt", FileHash: "hashB"}))
+
+	for _, id := range []string{"e1", "e2", "e3", "e4"} {
+		must(st.UpsertEntity(ctx, &Entity{ID: id, Name: id}))
+	}
+	must(st.InsertRelationship(ctx, &Relationship{ID: "rA1", SourceID: "e1", TargetID: "e2", Predicate: "p", DocID: "docA"}))
+	must(st.InsertRelationship(ctx, &Relationship{ID: "rA2", SourceID: "e2", TargetID: "e3", Predicate: "p", DocID: "docA"}))
+	must(st.InsertRelationship(ctx, &Relationship{ID: "rB1", SourceID: "e1", TargetID: "e4", Predicate: "p", DocID: "docB"}))
+
+	// Sanity: the unscoped walk must surface the out-of-scope edge.
+	// (That is precisely the leak RAN-35 is closing.)
+	all, err := st.RelationshipsForEntity(ctx, "e1", 2)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var sawLeakUnscoped bool
+	for _, r := range all {
+		if r.ID == "rB1" {
+			sawLeakUnscoped = true
+			break
+		}
+	}
+	if !sawLeakUnscoped {
+		t.Fatalf("fixture sanity: unscoped walk did not include rB1 — test setup is wrong")
+	}
+
+	// Scoped walk must exclude rB1.
+	got, err := st.RelationshipsForEntityInDocs(ctx, "e1", 2, []string{"docA"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	ids := map[string]string{}
+	for _, r := range got {
+		ids[r.ID] = r.DocID
+		if r.DocID != "docA" {
+			t.Errorf("scoped walk returned edge %s from unrelated doc %q", r.ID, r.DocID)
+		}
+	}
+	for _, want := range []string{"rA1", "rA2"} {
+		if _, ok := ids[want]; !ok {
+			t.Errorf("scoped walk: missing expected in-scope edge %s", want)
+		}
+	}
+	if _, leaked := ids["rB1"]; leaked {
+		t.Errorf("scoped walk leaked out-of-scope edge rB1 from docB")
+	}
+	if len(got) != 2 {
+		t.Errorf("scoped walk: want exactly 2 in-scope edges, got %d (%v)", len(got), ids)
+	}
+}
+
+func TestRelationshipsForEntityInDocs_EmptyDocsReturnsNil(t *testing.T) {
+	t.Parallel()
+	st := newTestStore(t)
+	ctx := context.Background()
+
+	got, err := st.RelationshipsForEntityInDocs(ctx, "anything", 2, nil)
+	if err != nil {
+		t.Fatalf("empty docIDs: want (nil, nil); got err=%v", err)
+	}
+	if len(got) != 0 {
+		t.Fatalf("empty docIDs: want 0 relationships, got %d", len(got))
+	}
+}
+
+// Depth must still bound traversal. With depth=1 we should see only the
+// direct edge (rA1) out of e1, not rA2 which is one hop further out.
+func TestRelationshipsForEntityInDocs_RespectsDepthLimit(t *testing.T) {
+	t.Parallel()
+	st := newTestStore(t)
+	ctx := context.Background()
+
+	must := func(err error) {
+		t.Helper()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	must(st.UpsertDocument(ctx, &Document{ID: "docA", Path: "/a", Title: "A", DocType: "txt", FileHash: "hashA"}))
+	for _, id := range []string{"e1", "e2", "e3"} {
+		must(st.UpsertEntity(ctx, &Entity{ID: id, Name: id}))
+	}
+	must(st.InsertRelationship(ctx, &Relationship{ID: "rA1", SourceID: "e1", TargetID: "e2", Predicate: "p", DocID: "docA"}))
+	must(st.InsertRelationship(ctx, &Relationship{ID: "rA2", SourceID: "e2", TargetID: "e3", Predicate: "p", DocID: "docA"}))
+
+	got, err := st.RelationshipsForEntityInDocs(ctx, "e1", 1, []string{"docA"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(got) != 1 || got[0].ID != "rA1" {
+		t.Fatalf("depth=1 from e1: want [rA1]; got %v", got)
+	}
+}
@@ -828,6 +828,104 @@ func (s *Store) RelationshipsForEntity(ctx context.Context, entityID string, dep
 	return all, nil
 }
 
+// RelationshipsForEntityInDocs is the doc-scoped variant of
+// RelationshipsForEntity. Each BFS hop only traverses relationships whose
+// doc_id is in the provided set, so callers doing a scoped local search
+// cannot leak edges from unrelated documents into the result set.
+//
+// Passing an empty docIDs slice returns no relationships — scoped search
+// with no anchor documents has no valid expansion.
+//
+// The doc_id IN-list is chunked at 900 (below SQLite's default 999
+// variable limit); combined with the frontier list, each hop may split
+// across multiple queries.
+func (s *Store) RelationshipsForEntityInDocs(ctx context.Context, entityID string, depth int, docIDs []string) ([]*Relationship, error) {
+	if len(docIDs) == 0 {
+		return nil, nil
+	}
+
+	visited := map[string]bool{entityID: true}
+	seenRel := make(map[string]struct{})
+	frontier := []string{entityID}
+	var all []*Relationship
+
+	const docChunkSize = 900
+	const frontierChunkSize = 900
+
+	for d := 0; d < depth && len(frontier) > 0; d++ {
+		var nextFrontier []string
+		for fStart := 0; fStart < len(frontier); fStart += frontierChunkSize {
+			fEnd := fStart + frontierChunkSize
+			if fEnd > len(frontier) {
+				fEnd = len(frontier)
+			}
+			fChunk := frontier[fStart:fEnd]
+			fPlaceholders := strings.Repeat("?,", len(fChunk))
+			fPlaceholders = fPlaceholders[:len(fPlaceholders)-1]
+
+			for dStart := 0; dStart < len(docIDs); dStart += docChunkSize {
+				dEnd := dStart + docChunkSize
+				if dEnd > len(docIDs) {
+					dEnd = len(docIDs)
+				}
+				dChunk := docIDs[dStart:dEnd]
+				dPlaceholders := strings.Repeat("?,", len(dChunk))
+				dPlaceholders = dPlaceholders[:len(dPlaceholders)-1]
+
+				args := make([]any, 0, len(fChunk)*2+len(dChunk))
+				for _, id := range fChunk {
+					args = append(args, id)
+				}
+				for _, id := range fChunk {
+					args = append(args, id)
+				}
+				for _, id := range dChunk {
+					args = append(args, id)
+				}
+
+				q := fmt.Sprintf(`SELECT id,source_id,target_id,predicate,description,weight,doc_id
+				                  FROM relationships
+				                  WHERE (source_id IN (%s) OR target_id IN (%s))
+				                    AND doc_id IN (%s)`,
+					fPlaceholders, fPlaceholders, dPlaceholders)
+				rows, err := s.db.QueryContext(ctx, q, args...)
+				if err != nil {
+					return nil, err
+				}
+				for rows.Next() {
+					var r Relationship
+					var docID sql.NullString
+					if err := rows.Scan(&r.ID, &r.SourceID, &r.TargetID, &r.Predicate, &r.Description, &r.Weight, &docID); err != nil {
+						rows.Close()
+						return nil, err
+					}
+					if docID.Valid {
+						r.DocID = docID.String
+					}
+					if _, dup := seenRel[r.ID]; dup {
+						continue
+					}
+					seenRel[r.ID] = struct{}{}
+					all = append(all, &r)
+					for _, nid := range []string{r.SourceID, r.TargetID} {
+						if !visited[nid] {
+							visited[nid] = true
+							nextFrontier = append(nextFrontier, nid)
+						}
+					}
+				}
+				if err := rows.Err(); err != nil {
+					rows.Close()
+					return nil, err
+				}
+				rows.Close()
+			}
+		}
+		frontier = nextFrontier
+	}
+	return all, nil
+}
+
 func (s *Store) FindRelationships(ctx context.Context, fromID, toID, predicate string) ([]*Relationship, error) {
 	q := `SELECT id,source_id,target_id,predicate,description,weight,doc_id FROM relationships WHERE 1=1`
 	args := []any{}