Skip to content
13 changes: 10 additions & 3 deletions cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,22 +188,29 @@ var serveCmd = &cobra.Command{
}()

<-ctx.Done()
slog.Info("🛑 shutting down...")
shutCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
// Block 3.7: explicit per-phase log lines so operators can see
// exactly where a stuck shutdown is hung. Deadline raised from
// 10s to 30s to match the workq drain phase and the spec.
slog.Info("🛑 shutting down HTTP server...")
shutCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := srv.Shutdown(shutCtx); err != nil {
slog.Error("❌ shutdown error", "err", err)
slog.Error("❌ HTTP server shutdown error", "err", err)
return err
}
slog.Info("✅ HTTP server shutdown complete")

// Drain workq within its own 30s deadline. Server.Shutdown has already
// stopped accepting new HTTP requests, so no new jobs can be submitted;
// all that remains is letting in-flight pipelines finish or honour the
// cancelled ctx.
slog.Info("🛑 draining workq...")
drainCtx, drainCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer drainCancel()
if err := pool.Close(drainCtx); err != nil {
slog.Warn("⚠️ workq drain timeout; some indexing jobs were cancelled mid-flight", "err", err)
} else {
slog.Info("✅ workq drained")
}

slog.Info("✅ shutdown complete")
Expand Down
4 changes: 4 additions & 0 deletions internal/api/itest/doubles.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ func (p *FakeProvider) Name() string { return "fake" }
// ModelID returns a stable model identifier that callers can log.
func (p *FakeProvider) ModelID() string { return "fake-model-v1" }

// BatchCeiling declares no ceiling — this fake accepts any batch size
// since it never makes a real upstream call. Block 3.4.
func (p *FakeProvider) BatchCeiling() int { return 0 }

// Complete returns a deterministic string derived from the prompt. The
// output embeds the sha256 prefix of the prompt so tests can assert the
// provider actually saw what they sent.
Expand Down
99 changes: 99 additions & 0 deletions internal/api/panic_enrichment_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package api

import (
"bytes"
"context"
"log/slog"
"net/http"
"net/http/httptest"
"strings"
"testing"
)

// TestRecoveryMiddleware_EnrichedLog verifies that a panic is logged
// with req_id, route, method, user, and a stack trace. Block 3.7.
func TestRecoveryMiddleware_EnrichedLog(t *testing.T) {
// Capture slog output via a TextHandler into a buffer.
var buf bytes.Buffer
prev := slog.Default()
slog.SetDefault(slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{
Level: slog.LevelDebug,
})))
t.Cleanup(func() { slog.SetDefault(prev) })

panicky := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
panic("boom")
})
handler := recoveryMiddleware(panicky)

// Seed request with a request id (mimicking loggingMiddleware
// having already run) and a user ctx value.
req := httptest.NewRequest(http.MethodPost, "/api/documents/abc", nil)
ctx := context.WithValue(req.Context(), ctxRequestIDKey{}, "rid-test-123")
ctx = withUserForTest(ctx, "alice")
req = req.WithContext(ctx)

rec := httptest.NewRecorder()
handler.ServeHTTP(rec, req)

if rec.Code != http.StatusInternalServerError {
t.Fatalf("status = %d; want 500", rec.Code)
}

logOutput := buf.String()
for _, want := range []string{
"panic recovered",
"req_id=rid-test-123",
"method=POST",
"route=/api/documents/abc",
"user=alice",
"panic=boom",
} {
if !strings.Contains(logOutput, want) {
t.Errorf("log missing %q\nlog: %s", want, logOutput)
}
}
// A stack trace marker ("goroutine " or "runtime/panic.go") must
// appear somewhere in the log. slog serializes newlines as \n
// literals inside the stack attribute — either is acceptable.
if !strings.Contains(logOutput, "goroutine") && !strings.Contains(logOutput, "runtime/panic") {
t.Errorf("log missing stack trace marker\nlog: %s", logOutput)
}
}

// TestRecoveryMiddleware_NoUserNoReqID verifies the middleware still
// recovers cleanly when neither ctxUserKey nor request id are set.
func TestRecoveryMiddleware_NoUserNoReqID(t *testing.T) {
var buf bytes.Buffer
prev := slog.Default()
slog.SetDefault(slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{
Level: slog.LevelDebug,
})))
t.Cleanup(func() { slog.SetDefault(prev) })

panicky := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
panic("boom-bare")
})
handler := recoveryMiddleware(panicky)

req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
handler.ServeHTTP(rec, req)

if rec.Code != http.StatusInternalServerError {
t.Fatalf("status = %d; want 500", rec.Code)
}
logOutput := buf.String()
if strings.Contains(logOutput, "req_id=") {
t.Errorf("unexpected req_id attr on unset ctx\nlog: %s", logOutput)
}
if strings.Contains(logOutput, "user=") {
t.Errorf("unexpected user attr on unset ctx\nlog: %s", logOutput)
}
}

// withUserForTest is a test-only helper that injects a user id into
// ctx using the same key the real auth middleware would use.
func withUserForTest(ctx context.Context, user string) context.Context {
return context.WithValue(ctx, ctxUserKey{}, user)
}
6 changes: 6 additions & 0 deletions internal/api/request_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ import (
// struct keeps the key collision-free with other packages.
type ctxRequestIDKey struct{}

// ctxUserKey is the typed context key for the authenticated user ID.
// Reserved for future auth integration; today only the panic
// recoveryMiddleware reads it. Empty string means the request is
// unauthenticated (allowed on public routes). Block 3.7.
type ctxUserKey struct{}

// RequestIDFromContext returns the ID attached by loggingMiddleware. Empty
// string when called from a context that never hit the middleware (e.g.
// a direct handler call from a test that skips the router wrapper).
Expand Down
65 changes: 65 additions & 0 deletions internal/api/request_timeout.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package api

import (
"net/http"
"strings"

"github.com/RandomCodeSpace/docsiq/internal/config"
)

// isUploadRoute reports whether r should be granted UploadTimeout.
// The carve-out covers two long-running paths:
// - POST /api/upload — multipart document upload
// - POST /api/projects/{p}/import — tar / bulk notes import
//
// A /api/projects/* POST is only an upload if the trailing segment is
// /import — this avoids granting 10-minute timeouts to note-write POSTs
// on the same prefix. Block 3.2.
func isUploadRoute(r *http.Request) bool {
if r.Method != http.MethodPost {
return false
}
switch {
case r.URL.Path == "/api/upload":
return true
case strings.HasPrefix(r.URL.Path, "/api/projects/") && strings.HasSuffix(r.URL.Path, "/import"):
return true
}
return false
}

// requestTimeoutMiddleware wraps inner in http.TimeoutHandler with
// cfg.Server.RequestTimeout as the default bound, bumped to
// cfg.Server.UploadTimeout for upload routes.
//
// Zero timeout means "no cap" — useful for local dev. In that case
// inner is returned unchanged.
//
// Layering rationale (Block 3.2 comment): this middleware sits INSIDE
// securityHeadersMiddleware (so a 503 still carries CSP) and OUTSIDE
// loggingMiddleware (so the timeout is logged). See router.go.
func requestTimeoutMiddleware(cfg *config.Config) func(http.Handler) http.Handler {
return func(inner http.Handler) http.Handler {
reqTimeout := cfg.Server.RequestTimeout
upTimeout := cfg.Server.UploadTimeout

// Pre-build the two TimeoutHandler instances so each request
// just dispatches to one — no per-request allocation.
defaultTO := inner
if reqTimeout > 0 {
defaultTO = http.TimeoutHandler(inner, reqTimeout, "request timeout")
}
uploadTO := inner
if upTimeout > 0 {
uploadTO = http.TimeoutHandler(inner, upTimeout, "upload timeout")
}

return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if isUploadRoute(r) {
uploadTO.ServeHTTP(w, r)
return
}
defaultTO.ServeHTTP(w, r)
})
}
}
44 changes: 38 additions & 6 deletions internal/api/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package api

import (
"context"
"fmt"
"io/fs"
"log/slog"
"net/http"
"path"
"runtime/debug"
"strings"
"time"

Expand Down Expand Up @@ -166,11 +168,15 @@ func NewRouter(prov llm.Provider, emb *embedder.Embedder, cfg *config.Config, re
// recoveries). project scope sits BELOW auth (an unauthenticated
// caller never reaches the registry) and ABOVE the mux (so handlers
// and the MCP server see the resolved slug via ProjectFromContext).
// Block 3.2: requestTimeoutMiddleware sits INSIDE securityHeaders
// (so 503 timeouts still carry CSP) and OUTSIDE loggingMiddleware
// (so operators still see the latency spike in logs).
return securityHeadersMiddleware(cfg)(
loggingMiddleware(
recoveryMiddleware(
bearerAuthMiddleware(cfg.Server.APIKey,
projectMiddleware(cfg, registry, mux)))))
requestTimeoutMiddleware(cfg)(
loggingMiddleware(
recoveryMiddleware(
bearerAuthMiddleware(cfg.Server.APIKey,
projectMiddleware(cfg, registry, mux))))))
}

func spaHandler(assets fs.FS, _ *config.Config) http.Handler {
Expand Down Expand Up @@ -204,12 +210,38 @@ func spaHandler(assets fs.FS, _ *config.Config) http.Handler {
})
}

// recoveryMiddleware catches panics in handlers and returns a 500 response.
// recoveryMiddleware catches panics in handlers, logs them with
// request context (req_id, route, method, user if authed) plus the
// full stack, then returns a 500 response. The enriched log surface
// is Block 3.7's requirement: during a production panic you need
// enough context to reconstruct the request without tailing raw
// stderr.
func recoveryMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
defer func() {
if rec := recover(); rec != nil {
slog.Error("❌ panic recovered", "path", r.URL.Path, "panic", rec)
// Gather every piece of request context that exists on
// the ctx — any absent value surfaces as "" and gets
// filtered from the attr list.
rid := RequestIDFromContext(r.Context())
user, _ := r.Context().Value(ctxUserKey{}).(string)

stack := debug.Stack()

attrs := []any{
"route", r.URL.Path,
"method", r.Method,
"panic", fmt.Sprint(rec),
"stack", string(stack),
}
if rid != "" {
attrs = append(attrs, "req_id", rid)
}
if user != "" {
attrs = append(attrs, "user", user)
}

slog.Error("❌ panic recovered", attrs...)
http.Error(w, "internal server error", http.StatusInternalServerError)
}
}()
Expand Down
4 changes: 4 additions & 0 deletions internal/api/shutdown_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ import (
// goroutine blocks in a select on a channel that is not drained
// synchronously by *DB.Close(); go-sqlite3's docs acknowledge a
// brief post-Close window. Tolerated as a stdlib artifact.
// - database/sql.connectionCleaner — spawned by SetConnMaxLifetime
// (Block 3.6 sets this to 1h on every store). Exits when the
// *sql.DB is closed; same post-Close window as connectionOpener.
//
// goleak.IgnoreCurrent() baselines out any stdlib goroutines that
// predate test start (Go runtime housekeeping, test framework, etc.).
Expand All @@ -34,6 +37,7 @@ func TestShutdown_NoGoroutineLeaks(t *testing.T) {
goleak.IgnoreAnyFunction("net/http.(*persistConn).readLoop"),
goleak.IgnoreAnyFunction("net/http.(*persistConn).writeLoop"),
goleak.IgnoreAnyFunction("database/sql.(*DB).connectionOpener"),
goleak.IgnoreAnyFunction("database/sql.(*DB).connectionCleaner"),
goleak.IgnoreAnyFunction("internal/poll.runtime_pollWait"),
)

Expand Down
Loading
Loading