-
Notifications
You must be signed in to change notification settings - Fork 1
fix(search): scope local-search graph expansion to top-hit docs (RAN-35) #80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| package search | ||
|
|
||
| import ( | ||
| "context" | ||
| "testing" | ||
|
|
||
| "github.com/RandomCodeSpace/docsiq/internal/store" | ||
| ) | ||
|
|
||
| // TestLocalSearch_GraphExpansionScopedToTopHitDocs is the RAN-35 regression | ||
| // guard. LocalSearch used to re-expand through every relationship a seed | ||
| // entity touched, regardless of doc_id, so a scoped query could surface | ||
| // unrelated-doc edges into the result set. After the fix, the graph walk | ||
| // must stay inside the top-hit doc set. | ||
| // | ||
| // Fixture: | ||
| // | ||
| // d-alpha: chunk "alpha" + entity "alpha" with one edge alpha -> beta | ||
| // (doc_id=d-alpha) | ||
| // d-delta: chunk "delta" + entity "alpha" shares a second edge | ||
| // alpha -> gamma (doc_id=d-delta) | ||
| // | ||
| // A query for "almost alpha" tops out on chunk "c-alpha" (d-alpha). With | ||
| // graphDepth=1 the result must include the d-alpha edge and must NOT | ||
| // include the d-delta edge, even though the seed entity "alpha" has a | ||
| // relationship row in d-delta. | ||
| func TestLocalSearch_GraphExpansionScopedToTopHitDocs(t *testing.T) { | ||
| st, emb, _ := seedCorpus(t) | ||
| ctx := context.Background() | ||
|
|
||
| must := func(err error) { | ||
| t.Helper() | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| } | ||
|
|
||
| // Seed entities with vectors close to query so they rank in the | ||
| // top-K entity set. "alpha" shares an ID so scoped and unscoped | ||
| // edges collide on a single seed. | ||
| entAlpha := &store.Entity{ID: "ent-alpha", Name: "alpha", Vector: []float32{1, 0, 0, 0}} | ||
| entBeta := &store.Entity{ID: "ent-beta", Name: "beta", Vector: []float32{0, 1, 0, 0}} | ||
| entGamma := &store.Entity{ID: "ent-gamma", Name: "gamma", Vector: []float32{0, 0, 1, 0}} | ||
| must(st.UpsertEntity(ctx, entAlpha)) | ||
| must(st.UpsertEntity(ctx, entBeta)) | ||
| must(st.UpsertEntity(ctx, entGamma)) | ||
|
|
||
| // In-scope edge: alpha -> beta in d-alpha (the top-hit doc). | ||
| must(st.InsertRelationship(ctx, &store.Relationship{ | ||
| ID: "rel-in-scope", SourceID: "ent-alpha", TargetID: "ent-beta", | ||
| Predicate: "knows", DocID: "d-alpha", | ||
| })) | ||
| // Out-of-scope edge: alpha -> gamma in d-delta (unrelated doc). | ||
| must(st.InsertRelationship(ctx, &store.Relationship{ | ||
| ID: "rel-out-of-scope", SourceID: "ent-alpha", TargetID: "ent-gamma", | ||
| Predicate: "knows", DocID: "d-delta", | ||
| })) | ||
|
|
||
| res, err := LocalSearch(ctx, st, emb, nil, "almost alpha", 1, 1) | ||
| if err != nil { | ||
| t.Fatalf("LocalSearch: %v", err) | ||
| } | ||
|
|
||
| // Expect the top chunk to belong to d-alpha. | ||
| if len(res.Chunks) == 0 || res.Chunks[0].Chunk.DocID != "d-alpha" { | ||
| t.Fatalf("top chunk: want doc d-alpha; got %+v", res.Chunks) | ||
| } | ||
|
|
||
| // Every returned relationship must belong to a top-hit doc. | ||
| topHitDocs := map[string]bool{} | ||
| for _, c := range res.Chunks { | ||
| topHitDocs[c.Chunk.DocID] = true | ||
| } | ||
| for _, r := range res.Rels { | ||
| if !topHitDocs[r.DocID] { | ||
| t.Errorf("relationship %s leaked from unrelated doc %q (top-hit docs: %v)", | ||
| r.ID, r.DocID, topHitDocs) | ||
| } | ||
| if r.ID == "rel-out-of-scope" { | ||
| t.Errorf("scoped local search returned out-of-scope edge %s (doc=%s)", r.ID, r.DocID) | ||
| } | ||
| } | ||
|
|
||
| // Sanity: the in-scope edge should actually be there — otherwise | ||
| // the negative assertion above is vacuous. | ||
| var sawInScope bool | ||
| for _, r := range res.Rels { | ||
| if r.ID == "rel-in-scope" { | ||
| sawInScope = true | ||
| break | ||
| } | ||
| } | ||
| if !sawInScope { | ||
| t.Errorf("scoped local search did not return the in-scope edge rel-in-scope; rels=%v", relIDs(res.Rels)) | ||
| } | ||
| } | ||
|
|
||
| // relIDs is a tiny helper so assertion failures above print readable ids | ||
| // instead of a slice of pointers. | ||
| func relIDs(rs []*store.Relationship) []string { | ||
| out := make([]string, len(rs)) | ||
| for i, r := range rs { | ||
| out[i] = r.ID | ||
| } | ||
| return out | ||
| } | ||
|
|
121 changes: 121 additions & 0 deletions
121
internal/store/relationships_for_entity_in_docs_test.go
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| package store | ||
|
|
||
| import ( | ||
| "context" | ||
| "testing" | ||
| ) | ||
|
|
||
| // Fixture layout: | ||
| // | ||
| // docA: e1 -[rA1]-> e2 -[rA2]-> e3 | ||
| // docB: e1 -[rB1]-> e4 (entity e1 is shared across both docs) | ||
| // | ||
| // A depth-2 BFS from e1 scoped to docA must reach e2 and e3 via edges rA1 | ||
| // and rA2, and must NOT return rB1 (from the unrelated document). | ||
| func TestRelationshipsForEntityInDocs_OnlyReturnsEdgesFromScopedDocs(t *testing.T) { | ||
| t.Parallel() | ||
| st := newTestStore(t) | ||
| ctx := context.Background() | ||
|
|
||
| must := func(err error) { | ||
| t.Helper() | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| } | ||
|
|
||
| must(st.UpsertDocument(ctx, &Document{ID: "docA", Path: "/a", Title: "A", DocType: "txt", FileHash: "hashA"})) | ||
| must(st.UpsertDocument(ctx, &Document{ID: "docB", Path: "/b", Title: "B", DocType: "txt", FileHash: "hashB"})) | ||
|
|
||
| for _, id := range []string{"e1", "e2", "e3", "e4"} { | ||
| must(st.UpsertEntity(ctx, &Entity{ID: id, Name: id})) | ||
| } | ||
| must(st.InsertRelationship(ctx, &Relationship{ID: "rA1", SourceID: "e1", TargetID: "e2", Predicate: "p", DocID: "docA"})) | ||
| must(st.InsertRelationship(ctx, &Relationship{ID: "rA2", SourceID: "e2", TargetID: "e3", Predicate: "p", DocID: "docA"})) | ||
| must(st.InsertRelationship(ctx, &Relationship{ID: "rB1", SourceID: "e1", TargetID: "e4", Predicate: "p", DocID: "docB"})) | ||
|
|
||
| // Sanity: the unscoped walk must surface the out-of-scope edge. | ||
| // (That is precisely the leak RAN-35 is closing.) | ||
| all, err := st.RelationshipsForEntity(ctx, "e1", 2) | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| var sawLeakUnscoped bool | ||
| for _, r := range all { | ||
| if r.ID == "rB1" { | ||
| sawLeakUnscoped = true | ||
| break | ||
| } | ||
| } | ||
| if !sawLeakUnscoped { | ||
| t.Fatalf("fixture sanity: unscoped walk did not include rB1 — test setup is wrong") | ||
| } | ||
|
|
||
| // Scoped walk must exclude rB1. | ||
| got, err := st.RelationshipsForEntityInDocs(ctx, "e1", 2, []string{"docA"}) | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| ids := map[string]string{} | ||
| for _, r := range got { | ||
| ids[r.ID] = r.DocID | ||
| if r.DocID != "docA" { | ||
| t.Errorf("scoped walk returned edge %s from unrelated doc %q", r.ID, r.DocID) | ||
| } | ||
| } | ||
| for _, want := range []string{"rA1", "rA2"} { | ||
| if _, ok := ids[want]; !ok { | ||
| t.Errorf("scoped walk: missing expected in-scope edge %s", want) | ||
| } | ||
| } | ||
| if _, leaked := ids["rB1"]; leaked { | ||
| t.Errorf("scoped walk leaked out-of-scope edge rB1 from docB") | ||
| } | ||
| if len(got) != 2 { | ||
| t.Errorf("scoped walk: want exactly 2 in-scope edges, got %d (%v)", len(got), ids) | ||
| } | ||
| } | ||
|
|
||
| func TestRelationshipsForEntityInDocs_EmptyDocsReturnsNil(t *testing.T) { | ||
| t.Parallel() | ||
| st := newTestStore(t) | ||
| ctx := context.Background() | ||
|
|
||
| got, err := st.RelationshipsForEntityInDocs(ctx, "anything", 2, nil) | ||
| if err != nil { | ||
| t.Fatalf("empty docIDs: want (nil, nil); got err=%v", err) | ||
| } | ||
| if len(got) != 0 { | ||
| t.Fatalf("empty docIDs: want 0 relationships, got %d", len(got)) | ||
| } | ||
| } | ||
|
|
||
| // Depth must still bound traversal. With depth=1 we should see only the | ||
| // direct edge (rA1) out of e1, not rA2 which is one hop further out. | ||
| func TestRelationshipsForEntityInDocs_RespectsDepthLimit(t *testing.T) { | ||
| t.Parallel() | ||
| st := newTestStore(t) | ||
| ctx := context.Background() | ||
|
|
||
| must := func(err error) { | ||
| t.Helper() | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| } | ||
|
|
||
| must(st.UpsertDocument(ctx, &Document{ID: "docA", Path: "/a", Title: "A", DocType: "txt", FileHash: "hashA"})) | ||
| for _, id := range []string{"e1", "e2", "e3"} { | ||
| must(st.UpsertEntity(ctx, &Entity{ID: id, Name: id})) | ||
| } | ||
| must(st.InsertRelationship(ctx, &Relationship{ID: "rA1", SourceID: "e1", TargetID: "e2", Predicate: "p", DocID: "docA"})) | ||
| must(st.InsertRelationship(ctx, &Relationship{ID: "rA2", SourceID: "e2", TargetID: "e3", Predicate: "p", DocID: "docA"})) | ||
|
|
||
| got, err := st.RelationshipsForEntityInDocs(ctx, "e1", 1, []string{"docA"}) | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| if len(got) != 1 || got[0].ID != "rA1" { | ||
| t.Fatalf("depth=1 from e1: want [rA1]; got %v", got) | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.