RandomCodeSpace · aksOps · Apr 24, 2026 · Apr 24, 2026
@@ -0,0 +1,52 @@
+name: nightly
+
+# Runs scale-tagged tests (large payloads, 10k HNSW recall benchmark,
+# 1000-note scale) that are excluded from every PR's CI to keep the
+# feedback loop fast. Also usable via workflow_dispatch for one-off
+# checks after a perf-sensitive change.
+on:
+  schedule:
+    - cron: "0 6 * * *" # 06:00 UTC daily
+  workflow_dispatch:
+
+permissions: read-all
+
+jobs:
+  scale-tests:
+    name: go scale tests
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    env:
+      CGO_ENABLED: "1"
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+
+      - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
+        with:
+          go-version-file: go.mod
+
+      - name: Go build cache
+        uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
+        with:
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-scale-${{ hashFiles('**/go.sum') }}
+          restore-keys: |
+            ${{ runner.os }}-go-scale-
+
+      # Scale tests need the embedded UI; build a minimal ui/dist so
+      # //go:embed is satisfied. We don't care about the frontend bundle
+      # contents here — just that something compiles.
+      - name: Seed ui/dist placeholder
+        run: |
+          mkdir -p ui/dist
+          printf '<!doctype html><title>nightly</title>' > ui/dist/index.html
+
+      - name: go test scale (no -race; workloads are sequential)
+        run: |
+          CGO_ENABLED=1 go test \
+            -tags "sqlite_fts5 scale" \
+            -timeout 1200s \
+            $(go list ./... | grep -v /ui/node_modules/)
@@ -5,10 +5,8 @@ import (
 	"bytes"
 	"compress/gzip"
 	"fmt"
-	"io"
 	"net/http"
 	"net/http/httptest"
-	"strings"
 	"testing"
 )
 
@@ -73,49 +71,3 @@ func TestImportTar_EntryCountCap(t *testing.T) {
 	}
 }
 
-// TestImportTar_TotalBytesCap is a regression test for P0-3.
-// A tar whose total uncompressed bytes across entries exceed
-// MaxImportTotalBytes must be rejected with 413.
-func TestImportTar_TotalBytesCap(t *testing.T) {
-	if testing.Short() {
-		// TODO(#62): large-tar import test skipped under -short; tracked in flake-register.
-		t.Skip("skipping large-tar test in -short mode")
-	}
-	h, slug, _ := setupNotesRouter(t)
-
-	// Each entry is just under MaxNoteBytes (10 MB). Two 256 MB entries
-	// would still fit under MaxImportTotalBytes (500 MB); we need > 500
-	// MB total. Use 52 entries × 10 MB = 520 MB — exceeds the cap.
-	// Build each entry's body once and reuse.
-	perEntry := 10 * 1024 * 1024 // 10 MB, equal to MaxNoteBytes
-	// Use slightly less to satisfy per-entry cap but still accumulate
-	// fast.
-	body := make([]byte, perEntry-1)
-	for i := range body {
-		body[i] = 'x'
-	}
-	entriesNeeded := int(MaxImportTotalBytes/int64(perEntry-1)) + 3
-	entries := make([]tarEntry, entriesNeeded)
-	for i := range entriesNeeded {
-		entries[i] = tarEntry{
-			name: fmt.Sprintf("big-%03d.md", i),
-			body: body,
-		}
-	}
-	tarBytes := newTarGz(t, entries)
-	req := httptest.NewRequest(http.MethodPost,
-		"/api/projects/"+slug+"/import", bytes.NewReader(tarBytes))
-	req.Header.Set("Content-Type", "application/gzip")
-	rec := httptest.NewRecorder()
-	h.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusRequestEntityTooLarge {
-		t.Fatalf("expected 413 for over-total-bytes tar, got %d body=%s",
-			rec.Code, rec.Body.String())
-	}
-	if !strings.Contains(strings.ToLower(rec.Body.String()), "total") &&
-		!strings.Contains(strings.ToLower(rec.Body.String()), "bytes") {
-		t.Logf("body=%s", rec.Body.String())
-	}
-	_ = io.EOF
-}
@@ -0,0 +1,57 @@
+//go:build scale
+
+package api
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+// TestImportTar_TotalBytesCap is a regression test for P0-3 kept behind
+// the `scale` build tag. It allocates ~520 MB of tar payload; the
+// dedicated nightly workflow runs it via `-tags "sqlite_fts5 scale"`.
+// Default PR CI does not compile this file.
+func TestImportTar_TotalBytesCap(t *testing.T) {
+	h, slug, _ := setupNotesRouter(t)
+
+	// Each entry is just under MaxNoteBytes (10 MB). Two 256 MB entries
+	// would still fit under MaxImportTotalBytes (500 MB); we need > 500
+	// MB total. Use 52 entries × 10 MB = 520 MB — exceeds the cap.
+	// Build each entry's body once and reuse.
+	perEntry := 10 * 1024 * 1024 // 10 MB, equal to MaxNoteBytes
+	// Use slightly less to satisfy per-entry cap but still accumulate
+	// fast.
+	body := make([]byte, perEntry-1)
+	for i := range body {
+		body[i] = 'x'
+	}
+	entriesNeeded := int(MaxImportTotalBytes/int64(perEntry-1)) + 3
+	entries := make([]tarEntry, entriesNeeded)
+	for i := range entriesNeeded {
+		entries[i] = tarEntry{
+			name: fmt.Sprintf("big-%03d.md", i),
+			body: body,
+		}
+	}
+	tarBytes := newTarGz(t, entries)
+	req := httptest.NewRequest(http.MethodPost,
+		"/api/projects/"+slug+"/import", bytes.NewReader(tarBytes))
+	req.Header.Set("Content-Type", "application/gzip")
+	rec := httptest.NewRecorder()
+	h.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusRequestEntityTooLarge {
+		t.Fatalf("expected 413 for over-total-bytes tar, got %d body=%s",
+			rec.Code, rec.Body.String())
+	}
+	if !strings.Contains(strings.ToLower(rec.Body.String()), "total") &&
+		!strings.Contains(strings.ToLower(rec.Body.String()), "bytes") {
+		t.Logf("body=%s", rec.Body.String())
+	}
+	_ = io.EOF
+}
@@ -0,0 +1,28 @@
+//go:build scale
+
+package notes
+
+import (
+	"fmt"
+	"testing"
+)
+
+// TestScale_1000Notes writes 1000 notes across 10 buckets and verifies
+// the key listing. Gated behind the `scale` build tag so default PR CI
+// stays fast; the nightly workflow runs it via `-tags "sqlite_fts5 scale"`.
+func TestScale_1000Notes(t *testing.T) {
+	dir := t.TempDir()
+	for i := 0; i < 1000; i++ {
+		k := fmt.Sprintf("bucket%d/note%d", i%10, i)
+		if err := Write(dir, &Note{Key: k, Content: "x"}); err != nil {
+			t.Fatalf("write %d: %v", i, err)
+		}
+	}
+	keys, err := ListKeys(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(keys) != 1000 {
+		t.Errorf("expected 1000, got %d", len(keys))
+	}
+}
@@ -225,27 +225,6 @@ func TestUnicodeKey(t *testing.T) {
 	}
 }
 
-func TestScale_1000Notes(t *testing.T) {
-	if testing.Short() {
-		// TODO(#63): 1000-note scale test skipped under -short; tracked in flake-register.
-		t.Skip("skipping 1000-note scale test in -short mode")
-	}
-	dir := t.TempDir()
-	for i := 0; i < 1000; i++ {
-		k := fmt.Sprintf("bucket%d/note%d", i%10, i)
-		if err := Write(dir, &Note{Key: k, Content: "x"}); err != nil {
-			t.Fatalf("write %d: %v", i, err)
-		}
-	}
-	keys, err := ListKeys(dir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if len(keys) != 1000 {
-		t.Errorf("expected 1000, got %d", len(keys))
-	}
-}
-
 func TestFrontmatterPreserved(t *testing.T) {
 	dir := t.TempDir()
 	n := &Note{

@@ -0,0 +1,83 @@
+//go:build scale
+
+package vectorindex
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+)
+
+// normalizeVec returns v scaled to unit L2 norm (new slice). Lives in
+// this scale-tagged file because TestHNSW_Recall10k is its only caller.
+func normalizeVec(v []float32) []float32 {
+	var s float32
+	for _, x := range v {
+		s += x * x
+	}
+	if s == 0 {
+		return v
+	}
+	n := sqrt32(s)
+	out := make([]float32, len(v))
+	for i := range v {
+		out[i] = v[i] / n
+	}
+	return out
+}
+
+// TestHNSW_Recall10k builds a 10k-vector index and verifies recall@10
+// stays above 0.95 across 20 query probes. The workload is fully
+// sequential — the race detector has nothing to catch here —
+// so nightly invokes it WITHOUT -race. Concurrency correctness is
+// covered by TestHNSW_ConcurrentAddSearch, which runs on every PR.
+//
+// Gated behind the `scale` build tag; the nightly workflow runs it via
+// `-tags "sqlite_fts5 scale"`.
+func TestHNSW_Recall10k(t *testing.T) {
+	const (
+		n   = 10_000
+		dim = 384
+		q   = 20 // number of query probes
+		k   = 10
+	)
+	rng := rand.New(rand.NewSource(7))
+	// Higher construction/search ef for a strong recall benchmark; the
+	// default (16/200/50) hits ~0.85 on random vectors which is noisy.
+	idx := NewHNSW(32, 400, 400)
+	vecs := make(map[string][]float32, n)
+	for i := 0; i < n; i++ {
+		id := fmt.Sprintf("v%d", i)
+		v := normalizeVec(randomVec(rng, dim))
+		vecs[id] = v
+		if err := idx.Add(id, v); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	var totalRecall float64
+	for qi := 0; qi < q; qi++ {
+		qv := normalizeVec(randomVec(rng, dim))
+		gold := bruteForceTopK(qv, vecs, k)
+		hits, err := idx.Search(qv, k)
+		if err != nil {
+			t.Fatal(err)
+		}
+		goldSet := map[string]bool{}
+		for _, id := range gold {
+			goldSet[id] = true
+		}
+		matches := 0
+		for _, h := range hits {
+			if goldSet[h.ID] {
+				matches++
+			}
+		}
+		totalRecall += float64(matches) / float64(k)
+	}
+	recall := totalRecall / float64(q)
+	t.Logf("HNSW recall@10 over %d queries (N=%d, dim=%d) = %.3f", q, n, dim, recall)
+	if recall < 0.95 {
+		t.Fatalf("recall@10 = %.3f, want >= 0.95", recall)
+	}
+}