-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrouter.go
More file actions
430 lines (384 loc) · 15.4 KB
/
router.go
File metadata and controls
430 lines (384 loc) · 15.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
package api
import (
"context"
"fmt"
"io/fs"
"log/slog"
"net/http"
"path"
"runtime/debug"
"strings"
"time"
"github.com/RandomCodeSpace/docsiq/internal/config"
"github.com/RandomCodeSpace/docsiq/internal/embedder"
"github.com/RandomCodeSpace/docsiq/internal/llm"
"github.com/RandomCodeSpace/docsiq/internal/mcp"
"github.com/RandomCodeSpace/docsiq/internal/obs"
"github.com/RandomCodeSpace/docsiq/internal/project"
"github.com/RandomCodeSpace/docsiq/internal/workq"
"github.com/RandomCodeSpace/docsiq/ui"
)
// RouterOption configures NewRouter. Zero-or-more options are appended to the
// existing positional arguments without breaking any existing call site.
type RouterOption func(*routerOptions)
type routerOptions struct {
vecIndexes *VectorIndexes
stores *projectStores
workq *workq.Pool
}
// WithVectorIndexes wires a per-project HNSW index cache into the
// search handlers and MCP server. Nil (default) makes LocalSearch fall
// back to brute-force per request.
func WithVectorIndexes(vi *VectorIndexes) RouterOption {
return func(o *routerOptions) { o.vecIndexes = vi }
}
// WithWorkq injects a bounded worker pool for background indexing jobs.
// When nil (default), upload() falls back to a detached goroutine — the
// dev/test path.
func WithWorkq(p *workq.Pool) RouterOption {
return func(o *routerOptions) { o.workq = p }
}
// WithProjectStores lets callers inject a pre-built ProjectStores
// cache so they can close it at shutdown. Nil (default) causes
// NewRouter to allocate its own — fine for tests, but real servers
// should supply one for controlled teardown.
func WithProjectStores(p *ProjectStores) RouterOption {
return func(o *routerOptions) {
if p != nil {
o.stores = p.inner()
}
}
}
// NewRouter builds the single http.ServeMux with all routes.
//
// Wave-2 signature change: the long-lived *store.Store positional
// argument is gone. Handlers resolve per-project stores via a shared
// Storer (the projectStores cache). Callers that want lifecycle control
// over that cache can inject it with WithProjectStores; otherwise one
// is created internally (leaked for process lifetime — fine for tests).
func NewRouter(prov llm.Provider, emb *embedder.Embedder, cfg *config.Config, registry *project.Registry, opts ...RouterOption) http.Handler {
ro := &routerOptions{}
for _, opt := range opts {
opt(ro)
}
stores := ro.stores
if stores == nil {
stores = newProjectStores(cfg.DataDir)
}
h := &handlers{
stores: stores,
provider: prov,
embedder: emb,
cfg: cfg,
vecIndexes: ro.vecIndexes,
workq: ro.workq,
}
nh := newNotesHandlersWithStores(stores, cfg, registry)
ph := &projectsHandler{registry: registry}
mux := http.NewServeMux()
// Public liveness + readiness probes. /healthz is dependency-free
// (process-is-running); /readyz aggregates a SQLite ping + LLM reach
// check with a 10s in-memory cache. Both are registered on the mux
// and also explicitly bypassed by bearerAuthMiddleware.
mux.Handle("GET /healthz", healthzHandler())
{
// Default-project store is the representative SQLite shard — a
// failure here means the whole server is hosed. Resolve lazily
// at handler-build time so tests that pass nil stores still work.
defaultSlug := cfg.DefaultProject
if defaultSlug == "" {
defaultSlug = "_default"
}
// Lazy SQLite pinger: resolve the default store at probe time, not
// at router-build time. This lets /readyz flip green once the
// default store becomes available without a restart, and — more
// importantly — surfaces a genuine Open failure (permissions,
// corrupt DB, disk full) as 503 instead of masking it with a
// hard-coded success.
sq := healthPingerFuncForRouter(func(ctx context.Context) error {
if stores == nil {
return fmt.Errorf("project stores not configured")
}
st, err := stores.Get(defaultSlug)
if err != nil {
return fmt.Errorf("open default store %q: %w", defaultSlug, err)
}
if st == nil {
return fmt.Errorf("nil default store %q", defaultSlug)
}
return st.DB().PingContext(ctx)
})
var llmp llmPinger
if prov != nil {
llmp = providerPinger{prov: prov}
}
mux.Handle("GET /readyz", readyzHandler(sq, llmp))
}
// Back-compat alias: GET /health was the pre-Block-4 probe. Clients
// that haven't migrated to /healthz still get a 200.
mux.Handle("GET /health", healthzHandler())
// Prometheus scrape endpoint — public, NOT gated by auth or project
// middleware (auth/project explicitly bypass /metrics below).
// TODO(docsiq): P2-2 consider optional scrape token via cfg.Server.MetricsKey
mux.Handle("GET /metrics", metricsHandler(registry, stores, cfg))
// Version metadata — public, no auth. Used for operator diagnostics
// and CI tooling ("what's running in prod?"). No secrets exposed.
mux.Handle("GET /api/version", versionHandler())
// MCP Streamable HTTP transport (POST /mcp, GET /mcp for SSE stream).
// When prov is nil (provider=none) we omit the MCP server entirely and
// return 503 on /mcp — the notes/graph/tree tools inside the MCP server
// do not need LLM, but the search and upload tools do; rather than
// partial registration (which would silently return errors on those
// tools), we gate the whole MCP endpoint on LLM availability. Clients
// that discover tools via /mcp will receive a clear HTTP 503 instead of
// a confusing empty tool list.
if prov != nil {
mcpServer := mcp.New(stores, prov, emb, cfg, registry, mcp.WithVectorIndexes(ro.vecIndexes))
mux.Handle("/mcp", mcpServer.Handler())
} else {
mux.HandleFunc("/mcp", func(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusServiceUnavailable, map[string]string{
"error": "LLM not configured; set llm.provider in config",
"code": "llm_disabled",
})
})
}
// Session exchange — public (is the auth boundary).
// POST exchanges a bearer key for a docsiq_session httpOnly cookie.
// DELETE clears the cookie (logout).
mux.HandleFunc("POST /api/session", newSessionHandler(cfg.Server.APIKey))
mux.HandleFunc("DELETE /api/session", newSessionDeleteHandler())
// REST API — docs pipeline (Phase-0)
mux.HandleFunc("GET /api/stats", h.getStats)
mux.HandleFunc("GET /api/documents", h.listDocuments)
mux.HandleFunc("GET /api/documents/{id}", h.getDocument)
mux.HandleFunc("GET /api/documents/{id}/chunks", h.getDocumentChunks)
mux.HandleFunc("GET /api/documents/{id}/versions", h.getDocumentVersions)
mux.HandleFunc("POST /api/search", h.search)
mux.HandleFunc("GET /api/graph", h.entityGraph)
mux.HandleFunc("GET /api/graph/neighborhood", h.graphNeighborhood)
mux.HandleFunc("GET /api/entities", h.listEntities)
mux.HandleFunc("GET /api/communities", h.listCommunities)
mux.HandleFunc("GET /api/communities/{id}", h.getCommunity)
mux.HandleFunc("GET /api/entities/{id}/claims", h.claimsForEntity)
mux.HandleFunc("GET /api/claims", h.listClaims)
mux.HandleFunc("POST /api/upload", h.upload)
mux.HandleFunc("GET /api/upload/progress", h.uploadProgress)
// REST API — project registry (Phase-4). Thin shim for UI dropdown.
mux.HandleFunc("GET /api/projects", ph.listProjects)
// REST API — notes (Phase-2). Every endpoint takes a project slug
// in the path. The project middleware still runs and resolves
// ?project= / X-Project; these handlers prefer the path value but
// fall back to ProjectFromContext when it is empty.
mux.HandleFunc("GET /api/projects/{project}/notes", nh.listNotes)
mux.HandleFunc("GET /api/projects/{project}/notes/{key...}", nh.readNote)
mux.HandleFunc("PUT /api/projects/{project}/notes/{key...}", nh.writeNote)
mux.HandleFunc("DELETE /api/projects/{project}/notes/{key...}", nh.deleteNote)
mux.HandleFunc("GET /api/projects/{project}/tree", nh.tree)
mux.HandleFunc("GET /api/projects/{project}/search", nh.searchNotes)
mux.HandleFunc("GET /api/projects/{project}/graph", nh.graph)
mux.HandleFunc("GET /api/projects/{project}/export", nh.export)
mux.HandleFunc("POST /api/projects/{project}/import", nh.importTar)
// REST API — hooks (Phase-3). SessionStart is the only handler; it
// resolves a git remote to a registered project slug and returns an
// "additionalContext" blob the AI client can inject into its prompt.
registerHookRoutes(mux, registry)
// Embedded UI
mux.Handle("/", spaHandler(ui.Assets, cfg))
// Middleware ordering (outermost → innermost):
// securityHeaders → logging → recovery → auth → project → mux
// securityHeaders sits outermost so CSP + baseline headers are
// applied to every response (including 401s, 404s, and panic
// recoveries). project scope sits BELOW auth (an unauthenticated
// caller never reaches the registry) and ABOVE the mux (so handlers
// and the MCP server see the resolved slug via ProjectFromContext).
// Block 3.2: requestTimeoutMiddleware sits INSIDE securityHeaders
// (so 503 timeouts still carry CSP) and OUTSIDE loggingMiddleware
// (so operators still see the latency spike in logs).
return securityHeadersMiddleware(cfg)(
requestTimeoutMiddleware(cfg)(
loggingMiddleware(
recoveryMiddleware(
bearerAuthMiddleware(cfg.Server.APIKey,
projectMiddleware(cfg, registry, mux))))))
}
func spaHandler(assets fs.FS, _ *config.Config) http.Handler {
fileServer := http.FileServer(http.FS(assets))
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasPrefix(r.URL.Path, "/api/") || strings.HasPrefix(r.URL.Path, "/mcp") {
http.NotFound(w, r)
return
}
// path.Clean here normalises a URL path for SPA-vs-asset
// classification — it is not the security boundary. The handler
// only reads from `assets fs.FS`, which is fs.Sub(embed.FS, "dist"):
// io/fs rejects any path containing ".." via fs.ValidPath, and
// embed.FS holds only compile-time files. Path traversal cannot
// reach the host filesystem regardless of what cleanPath becomes.
// nosemgrep: go.lang.security.filepath-clean-misuse.filepath-clean-misuse
cleanPath := strings.TrimPrefix(path.Clean(r.URL.Path), "/")
if cleanPath == "." || cleanPath == "" {
cleanPath = "index.html"
}
if cleanPath != "index.html" && strings.Contains(path.Base(cleanPath), ".") {
fileServer.ServeHTTP(w, r)
return
}
content, err := fs.ReadFile(assets, "index.html")
if err != nil {
http.Error(w, "index.html not found", http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.WriteHeader(http.StatusOK)
_, _ = w.Write(content)
})
}
// recoveryMiddleware catches panics in handlers, logs them with
// request context (req_id, route, method, user if authed) plus the
// full stack, then returns a 500 response. The enriched log surface
// is Block 3.7's requirement: during a production panic you need
// enough context to reconstruct the request without tailing raw
// stderr.
func recoveryMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
defer func() {
if rec := recover(); rec != nil {
// Gather every piece of request context that exists on
// the ctx — any absent value surfaces as "" and gets
// filtered from the attr list.
rid := RequestIDFromContext(r.Context())
user, _ := r.Context().Value(ctxUserKey{}).(string)
stack := debug.Stack()
attrs := []any{
"route", r.URL.Path,
"method", r.Method,
"panic", fmt.Sprint(rec),
"stack", string(stack),
}
if rid != "" {
attrs = append(attrs, "req_id", rid)
}
if user != "" {
attrs = append(attrs, "user", user)
}
slog.Error("❌ panic recovered", attrs...)
http.Error(w, "internal server error", http.StatusInternalServerError)
}
}()
next.ServeHTTP(w, r)
})
}
// loggingMiddleware assigns a per-request ID, records Prometheus
// metrics, and emits one structured "http" log line per request. The
// log emission is deferred so that a panic escaping recoveryMiddleware
// (e.g. a panic in securityHeadersMiddleware) is still observable.
func loggingMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Request ID: header pass-through, otherwise generate fresh
// 16-hex (8 random bytes). Put on ctx + echo back as response
// header.
rid := strings.TrimSpace(r.Header.Get("X-Request-ID"))
if rid == "" {
rid = newRequestID()
}
ctx := context.WithValue(r.Context(), ctxRequestIDKey{}, rid)
r = r.WithContext(ctx)
w.Header().Set("X-Request-ID", rid)
rw := &responseWriter{ResponseWriter: w, status: http.StatusOK}
start := time.Now()
// Defer the access log + metric emission so a panic still
// produces an observation. recoveryMiddleware catches the panic
// and writes a 500; we then see status=500 in the log. If a
// panic escapes recoveryMiddleware, Go still unwinds through
// our deferred func before the goroutine dies, so the log is
// emitted with whatever status had been set.
defer func() {
duration := time.Since(start)
recErr := recover()
// /metrics is self-referential — exclude to avoid scraping
// feedback loops.
if r.URL.Path != "/metrics" && obs.HTTP != nil {
route := r.Pattern
if route == "" {
route = "unknown"
}
obs.HTTP.Observe(route, r.Method, rw.status, duration)
}
level := slog.LevelInfo
if rw.status >= 500 || recErr != nil {
level = slog.LevelError
} else if rw.status >= 400 {
level = slog.LevelWarn
}
attrs := []any{
"req_id", rid,
"method", r.Method,
"path", r.URL.Path,
"route", r.Pattern,
"status", rw.status,
"duration_ms", duration.Milliseconds(),
"bytes_out", rw.bytes,
"auth", classifyAuth(r),
}
if project := ProjectFromContext(r.Context()); project != "" {
attrs = append(attrs, "project", project)
}
if recErr != nil {
attrs = append(attrs, "panic", recErr)
}
slog.Log(r.Context(), level, "http", attrs...)
if recErr != nil {
// Re-raise: let upstream recovery middleware (or the
// std library) handle the actual HTTP error response.
panic(recErr)
}
}()
next.ServeHTTP(rw, r)
})
}
// classifyAuth reports a coarse auth-method label for the access log.
// docsiq uses a single shared API key (no per-user identity), so we
// emit the channel the client used rather than a user_id.
func classifyAuth(r *http.Request) string {
if strings.HasPrefix(strings.TrimSpace(r.Header.Get("Authorization")), "Bearer ") {
return "bearer"
}
if c, err := r.Cookie(sessionCookieName); err == nil && c.Value != "" {
return "cookie"
}
return "anon"
}
// responseWriter wraps http.ResponseWriter to capture status code and
// bytes written. Both are read by loggingMiddleware for the access log
// and by Prometheus. bytes is tracked via Write; implicit 200-only
// writes go through WriteHeader, so we default status to 200.
type responseWriter struct {
http.ResponseWriter
status int
bytes int64
wroteHeader bool
}
func (rw *responseWriter) WriteHeader(code int) {
rw.status = code
rw.wroteHeader = true
rw.ResponseWriter.WriteHeader(code)
}
func (rw *responseWriter) Write(p []byte) (int, error) {
if !rw.wroteHeader {
// Implicit 200 per net/http contract.
rw.wroteHeader = true
}
n, err := rw.ResponseWriter.Write(p)
rw.bytes += int64(n)
return n, err
}
// Flush passes through to the underlying writer when it supports it —
// required for SSE and streaming responses. Standard http.ResponseWriter
// does NOT have Flush in its interface, so the type assertion is the
// correct idiom.
func (rw *responseWriter) Flush() {
if f, ok := rw.ResponseWriter.(http.Flusher); ok {
f.Flush()
}
}