diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 0cb6046..a7c1bdc 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -191,6 +191,88 @@ func (h *handlers) getDocument(w http.ResponseWriter, r *http.Request) { writeJSON(w, 200, doc) } +func (h *handlers) getDocumentChunks(w http.ResponseWriter, r *http.Request) { + st, ok := h.resolveStore(w, r) + if !ok { + return + } + id := r.PathValue("id") + doc, err := st.GetDocument(r.Context(), id) + if err != nil { + writeError(w, r, 500, err.Error(), err) + return + } + if doc == nil { + writeError(w, r, 404, "document not found", nil) + return + } + chunks, err := st.ListChunksByDoc(r.Context(), id) + if err != nil { + writeError(w, r, 500, err.Error(), err) + return + } + out := make([]map[string]any, 0, len(chunks)) + for _, c := range chunks { + out = append(out, map[string]any{ + "id": c.ID, + "chunk_index": c.ChunkIndex, + "content": c.Content, + "token_count": c.TokenCount, + }) + } + writeJSON(w, 200, out) +} + +func (h *handlers) entityGraph(w http.ResponseWriter, r *http.Request) { + st, ok := h.resolveStore(w, r) + if !ok { + return + } + q := r.URL.Query() + limit := intQuery(q.Get("limit"), 500) + typ := q.Get("type") + + entities, err := st.ListEntities(r.Context(), typ, limit, 0) + if err != nil { + writeError(w, r, 500, err.Error(), err) + return + } + rels, err := st.AllRelationships(r.Context()) + if err != nil { + writeError(w, r, 500, err.Error(), err) + return + } + + nodes := make([]map[string]any, 0, len(entities)) + keep := make(map[string]bool, len(entities)) + for _, e := range entities { + keep[e.ID] = true + nodes = append(nodes, map[string]any{ + "id": e.ID, + "label": e.Name, + "kind": "entity", + "type": e.Type, + "description": e.Description, + "rank": e.Rank, + "community": e.CommunityID, + }) + } + edges := make([]map[string]any, 0) + for _, rel := range rels { + if !keep[rel.SourceID] || !keep[rel.TargetID] { + continue + } + edges = append(edges, map[string]any{ + "id": rel.ID, + "source": rel.SourceID, + "target": rel.TargetID, + "label": rel.Predicate, + "weight": rel.Weight, + }) + } + writeJSON(w, 200, map[string]any{"nodes": nodes, "edges": edges}) +} + type searchRequest struct { Query string `json:"query"` Mode string `json:"mode"` // local | global diff --git a/internal/api/router.go b/internal/api/router.go index d3efad8..c052afe 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -169,8 +169,10 @@ func NewRouter(prov llm.Provider, emb *embedder.Embedder, cfg *config.Config, re mux.HandleFunc("GET /api/stats", h.getStats) mux.HandleFunc("GET /api/documents", h.listDocuments) mux.HandleFunc("GET /api/documents/{id}", h.getDocument) + mux.HandleFunc("GET /api/documents/{id}/chunks", h.getDocumentChunks) mux.HandleFunc("GET /api/documents/{id}/versions", h.getDocumentVersions) mux.HandleFunc("POST /api/search", h.search) + mux.HandleFunc("GET /api/graph", h.entityGraph) mux.HandleFunc("GET /api/graph/neighborhood", h.graphNeighborhood) mux.HandleFunc("GET /api/entities", h.listEntities) mux.HandleFunc("GET /api/communities", h.listCommunities) diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index 75243b0..65e6fd5 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -313,15 +313,21 @@ func (p *Pipeline) indexFile(ctx context.Context, path string, opts IndexOptions return fmt.Errorf("batch insert chunks: %w", err) } - // Phase 1c: Embed chunks - vecs, err := p.embedder.EmbedTexts(ctx, texts) - if err != nil { - return fmt.Errorf("embed: %w", err) - } - slog.Debug("📊 chunks embedded", "path", path, "chunks", len(vecs)) + // Phase 1c: Embed chunks. Skip when the embedder is nil (provider=none / + // graph-only flow); chunks are still persisted, downstream extraction + // uses raw text rather than vectors. CLAUDE.md guarantees this no-op path. + if p.embedder != nil { + vecs, err := p.embedder.EmbedTexts(ctx, texts) + if err != nil { + return fmt.Errorf("embed: %w", err) + } + slog.Debug("📊 chunks embedded", "path", path, "chunks", len(vecs)) - if err := p.store.BatchUpsertEmbeddings(ctx, p.provider.ModelID(), chunkIDs, vecs); err != nil { - return fmt.Errorf("batch store embeddings: %w", err) + if err := p.store.BatchUpsertEmbeddings(ctx, p.provider.ModelID(), chunkIDs, vecs); err != nil { + return fmt.Errorf("batch store embeddings: %w", err) + } + } else { + slog.Debug("⏭️ skipping embedding (provider=none)", "path", path, "chunks", len(texts)) } // Phase 2: Run graph extraction, claims, and structured doc in parallel diff --git a/ui/src/hooks/api/keys.ts b/ui/src/hooks/api/keys.ts index d3f7b50..16b27e4 100644 --- a/ui/src/hooks/api/keys.ts +++ b/ui/src/hooks/api/keys.ts @@ -8,6 +8,8 @@ export const qk = { notesSearch: (project: string, q: string) => ["notes-search", project, q] as const, docs: (project: string) => ["docs", project] as const, doc: (project: string, id: string) => ["doc", project, id] as const, + docChunks: (project: string, id: string) => ["doc-chunks", project, id] as const, + entityGraph: (project: string) => ["entity-graph", project] as const, search: (project: string, q: string, mode: string) => ["search", project, q, mode] as const, entities: (project: string) => ["entities", project] as const, communities: (project: string) => ["communities", project] as const, diff --git a/ui/src/hooks/api/useDocs.ts b/ui/src/hooks/api/useDocs.ts index 62927aa..f835966 100644 --- a/ui/src/hooks/api/useDocs.ts +++ b/ui/src/hooks/api/useDocs.ts @@ -3,6 +3,13 @@ import { apiFetch } from "@/lib/api-client"; import { qk } from "./keys"; import type { Document } from "@/types/api"; +export interface DocChunk { + id: string; + chunk_index: number; + content: string; + token_count: number; +} + export function useDocs(project: string) { return useQuery({ queryKey: qk.docs(project), @@ -22,3 +29,16 @@ export function useDoc(project: string, id: string | undefined) { queryFn: () => apiFetch(`/api/documents/${encodeURIComponent(id!)}?project=${encodeURIComponent(project)}`), }); } + +export function useDocChunks(project: string, id: string | undefined) { + return useQuery({ + queryKey: qk.docChunks(project, id ?? ""), + enabled: !!id, + queryFn: async () => { + const res = await apiFetch( + `/api/documents/${encodeURIComponent(id!)}/chunks?project=${encodeURIComponent(project)}`, + ); + return Array.isArray(res) ? res : []; + }, + }); +} diff --git a/ui/src/hooks/api/useGraph.ts b/ui/src/hooks/api/useGraph.ts index 7e70ac9..aa1cc52 100644 --- a/ui/src/hooks/api/useGraph.ts +++ b/ui/src/hooks/api/useGraph.ts @@ -32,3 +32,29 @@ export function useNotesGraph(project: string) { }, }); } + +// Entity graph from the indexing pipeline (entities + relationships extracted +// by the LLM). Distinct from useNotesGraph, which surfaces wikilinks between +// hand-authored notes. +export function useEntityGraph(project: string) { + return useQuery({ + queryKey: qk.entityGraph(project), + queryFn: async (): Promise => { + const res = await apiFetch( + `/api/graph?project=${encodeURIComponent(project)}`, + ); + const rawNodes = res?.nodes ?? []; + const rawEdges = res?.edges ?? []; + const nodes: GraphNode[] = rawNodes.map((n) => ({ + id: n.id ?? "", + label: n.label ?? n.title ?? n.id ?? "", + kind: (n.kind as GraphNode["kind"]) ?? "entity", + })); + const ids = new Set(nodes.map((n) => n.id)); + const edges: GraphEdge[] = rawEdges + .filter((e) => ids.has(e.source) && ids.has(e.target)) + .map((e) => ({ source: e.source, target: e.target })); + return { nodes, edges }; + }, + }); +} diff --git a/ui/src/routes/Graph.tsx b/ui/src/routes/Graph.tsx index b7b4ab8..da56748 100644 --- a/ui/src/routes/Graph.tsx +++ b/ui/src/routes/Graph.tsx @@ -1,43 +1,92 @@ +import { useState } from "react"; import { GraphCanvas } from "@/components/graph/GraphCanvas"; -import { useNotesGraph } from "@/hooks/api/useGraph"; +import { useEntityGraph, useNotesGraph } from "@/hooks/api/useGraph"; import { useProjectStore } from "@/stores/project"; import { EmptyState, ErrorState, LoadingSkeleton } from "@/components/empty"; +type View = "entity" | "notes"; + export default function Graph() { const project = useProjectStore((s) => s.slug); - const { data, isLoading, error, refetch } = useNotesGraph(project); - const err = error as Error | null | undefined; + const entity = useEntityGraph(project); + const notes = useNotesGraph(project); + + // Default view: entity graph if it has nodes, else notes graph. Honour + // an explicit user toggle once made. + const [override, setOverride] = useState(null); + const entityHasNodes = (entity.data?.nodes.length ?? 0) > 0; + const view: View = override ?? (entityHasNodes ? "entity" : "notes"); + const active = view === "entity" ? entity : notes; + const data = active.data; + const err = active.error as Error | null | undefined; + + const Toggle = () => ( +
+ + +
+ ); - if (isLoading) { + if (active.isLoading) { return ( -
- +
+ +
+ +
); } if (err) { return ( -
- refetch()} - /> +
+ +
+ active.refetch()} + /> +
); } if (!data || data.nodes.length === 0) { return ( -
- +
+ +
+ ` followed by `docsiq index --finalize` to extract entities and relationships." + : "Add markdown notes with [[wikilinks]] under this project to build the notes graph." + } + /> +
); } return (
+
); diff --git a/ui/src/routes/documents/DocumentView.tsx b/ui/src/routes/documents/DocumentView.tsx index 5ef22d0..ed789e9 100644 --- a/ui/src/routes/documents/DocumentView.tsx +++ b/ui/src/routes/documents/DocumentView.tsx @@ -1,28 +1,45 @@ +import { useMemo } from "react"; import { useParams } from "react-router-dom"; -import { useDoc } from "@/hooks/api/useDocs"; +import MarkdownIt from "markdown-it"; +import { useDoc, useDocChunks } from "@/hooks/api/useDocs"; import { useProjectStore } from "@/stores/project"; import { EmptyState, ErrorState, LoadingSkeleton } from "@/components/empty"; import { useDocumentTitle } from "@/hooks/useDocumentTitle"; +const md = new MarkdownIt({ html: false, linkify: true, breaks: false }); + export default function DocumentView() { const { id } = useParams(); const project = useProjectStore((s) => s.slug); const { data, isLoading, error, refetch } = useDoc(project, id); + const { data: chunks, isLoading: chunksLoading } = useDocChunks(project, id); const err = error as Error | null | undefined; const docLabel = data?.title || data?.path; useDocumentTitle(docLabel ? [docLabel, "Documents"] : undefined); + const orderedChunks = useMemo( + () => (chunks ? [...chunks].sort((a, b) => a.chunk_index - b.chunk_index) : []), + [chunks], + ); + + const renderedHTML = useMemo(() => { + if (orderedChunks.length === 0) return ""; + const text = orderedChunks.map((c) => c.content).join("\n\n"); + const isMarkdown = data?.doc_type === "md" || data?.doc_type === "markdown"; + return isMarkdown ? md.render(text) : ""; + }, [orderedChunks, data?.doc_type]); + if (isLoading) { return ( -
+
); } if (err) { return ( -
+
+
); } + return ( -
-

{data.title || data.path}

-
- {data.doc_type} · v{data.version} -
+
+
+

{data.title || data.path}

+
+ {data.doc_type} · v{data.version} + {orderedChunks.length > 0 && ` · ${orderedChunks.length} chunk${orderedChunks.length === 1 ? "" : "s"}`} +
+
+ + {chunksLoading ? ( + + ) : orderedChunks.length === 0 ? ( + + ) : renderedHTML ? ( +
+ ) : ( +
+          {orderedChunks.map((c) => c.content).join("\n\n")}
+        
+ )}
); }