Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ jobs:
- name: Install UI dependencies
run: npm --prefix ui ci

- name: npm audit
run: npm --prefix ui audit --audit-level=moderate

- name: Type check
run: npm --prefix ui run typecheck

Expand Down Expand Up @@ -92,12 +95,60 @@ jobs:
- name: go vet (cgo + fts5)
run: CGO_ENABLED=1 go vet -tags sqlite_fts5 $(go list ./... | grep -v /ui/node_modules/)

- name: govulncheck
run: |
set -eu
# govulncheck is a first-party golang.org/x module; @latest is
# acceptable here and dependabot can bump the install target.
go install golang.org/x/vuln/cmd/govulncheck@latest
CGO_ENABLED=1 govulncheck -tags sqlite_fts5 ./...

- name: go test (cgo + fts5)
run: CGO_ENABLED=1 go test -tags sqlite_fts5 -timeout 300s $(go list ./... | grep -v /ui/node_modules/)

- name: go build (cgo + fts5)
run: CGO_ENABLED=1 go build -tags sqlite_fts5 -o docsiq ./

- name: flake-register (every t.Skip / test.skip has a tracked TODO)
run: |
set -euo pipefail
# Every skip must be either:
# (a) on a line with an inline `// TODO(#N):` comment, OR
# (b) immediately preceded by a `// TODO(#N):` comment line.
# Fuzz-callback skips (input filtering) are excluded: they are
# not flake-register entries and carry no issue.
echo "Scanning for t.Skip( without a tracked TODO..."
violations=0
# Go side
while IFS=: read -r file lineno _; do
if sed -n "${lineno}p" "$file" | grep -qE '// TODO\(#[0-9]+\):'; then
continue
fi
prev=$((lineno - 1))
if [ "$prev" -gt 0 ] && sed -n "${prev}p" "$file" | grep -qE '// TODO\(#[0-9]+\):'; then
continue
fi
echo "::error file=$file,line=$lineno::t.Skip without TODO(#N): annotation"
violations=$((violations + 1))
done < <(grep -rn 't\.Skip(' --include='*.go' . | grep -v '_fuzz_test\.go' | grep -v node_modules || true)
# TypeScript side
while IFS=: read -r file lineno _; do
if sed -n "${lineno}p" "$file" | grep -qE '// TODO\(#[0-9]+\):'; then
continue
fi
prev=$((lineno - 1))
if [ "$prev" -gt 0 ] && sed -n "${prev}p" "$file" | grep -qE '// TODO\(#[0-9]+\):'; then
continue
fi
echo "::error file=$file,line=$lineno::test.skip without TODO(#N): annotation"
violations=$((violations + 1))
done < <(grep -rn 'test\.skip(' --include='*.ts' --include='*.tsx' ui/ 2>/dev/null | grep -v node_modules || true)
if [ "$violations" -gt 0 ]; then
echo "::error::Found $violations skipped test(s) without a tracking issue. File a flake-register issue and add // TODO(#N): <why> adjacent to the skip."
exit 1
fi
echo "All skips accounted for."

- name: Upload docsiq binary
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ jobs:
targets=(
"./internal/crawler::FuzzResolveURL"
"./internal/chunker::FuzzChunker"
"./internal/store::FuzzSearchTokenize"
"./internal/mcp::FuzzMCPToolArgs"
)
for entry in "${targets[@]}"; do
pkg="${entry%%::*}"
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/RandomCodeSpace/docsiq

go 1.25.5
go 1.25.9

require (
github.com/google/uuid v1.6.0
Expand Down
1 change: 1 addition & 0 deletions internal/api/notes_import_limits_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ func TestImportTar_EntryCountCap(t *testing.T) {
// MaxImportTotalBytes must be rejected with 413.
func TestImportTar_TotalBytesCap(t *testing.T) {
if testing.Short() {
// TODO(#62): large-tar import test skipped under -short; tracked in flake-register.
t.Skip("skipping large-tar test in -short mode")
}
h, slug, _ := setupNotesRouter(t)
Expand Down
1 change: 1 addition & 0 deletions internal/hookinstaller/installer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ func TestClaudeInstaller(t *testing.T) {

t.Run("symlinked_config_is_written_through", func(t *testing.T) {
if runtime.GOOS == "windows" {
// TODO(#65): environmental skip (windows symlink admin); tracked in flake-register.
t.Skip("symlink support requires admin on Windows")
}
home := fakeHome(t)
Expand Down
164 changes: 164 additions & 0 deletions internal/llm/mock/mock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
// Package mock provides a deterministic llm.Provider implementation for
// tests. It does NOT require any network, API key, or external process.
// Callers import it directly (no build tag) — the package lives under
// internal/ so it cannot leak into the public API surface.
package mock

import (
"context"
"crypto/sha256"
"encoding/binary"
"fmt"
"math"
"strings"

"github.com/RandomCodeSpace/docsiq/internal/llm"
)

// DefaultDims is the default embedding dimensionality.
const DefaultDims = 128

// Provider is a deterministic, in-memory llm.Provider useful for unit
// and integration tests. It inspects the prompt for known substrings
// and returns canned, schema-valid JSON; embeddings are derived from a
// SHA-256 of the input so equal text yields equal vectors.
type Provider struct {
Dims int
}

// Compile-time check that *Provider satisfies llm.Provider.
var _ llm.Provider = (*Provider)(nil)

// New returns a mock provider. Pass 0 for DefaultDims (128).
func New(dims int) *Provider {
if dims <= 0 {
dims = DefaultDims
}
return &Provider{Dims: dims}
}

func (p *Provider) Name() string { return "mock" }
func (p *Provider) ModelID() string { return "mock-llm" }

// Complete returns a deterministic response chosen by prompt substring.
// Schema must match what internal/extractor and internal/community
// expect; see entityPrompt in internal/extractor/entities.go and
// communityPrompt in internal/community/summarizer.go.
func (p *Provider) Complete(ctx context.Context, prompt string, _ ...llm.Option) (string, error) {
if err := ctx.Err(); err != nil {
return "", err
}
lower := strings.ToLower(prompt)

switch {
case strings.Contains(lower, "knowledge graph") && strings.Contains(lower, "entities"):
// Entity + relationship extraction. The pipeline parses this
// JSON via internal/extractor — schema must match exactly.
// Stable entity names derived from prompt-hash so different
// chunks yield different graphs; dedup then collapses across
// the corpus.
tag := hashTag(prompt, 2)
return fmt.Sprintf(`{
"entities": [
{"name": "Entity_%s_A", "type": "Concept", "description": "deterministic mock entity A"},
{"name": "Entity_%s_B", "type": "Concept", "description": "deterministic mock entity B"}
],
"relationships": [
{"source": "Entity_%s_A", "target": "Entity_%s_B", "predicate": "relates_to", "description": "mock edge", "weight": 1.0}
]
}`, tag, tag, tag, tag), nil

case strings.Contains(lower, "claim"):
tag := hashTag(prompt, 2)
return fmt.Sprintf(`{
"claims": [
{"subject": "Entity_%s_A", "predicate": "is", "object": "mock claim", "description": "deterministic"}
]
}`, tag), nil

case strings.Contains(lower, "community") || strings.Contains(lower, "summar"):
// Must match parseCommunityReport which looks for "TITLE:" and "SUMMARY:" prefixes.
return "TITLE: Mock community\nSUMMARY: A deterministic, test-only paragraph describing the community of entities in scope.", nil

default:
// Unknown prompt — return empty JSON so whatever caller gets
// it can proceed without a parse error.
return `{}`, nil
}
}

// Embed returns a Dims-length vector derived from SHA-256(text). Equal
// text yields equal vectors.
func (p *Provider) Embed(ctx context.Context, text string) ([]float32, error) {
if err := ctx.Err(); err != nil {
return nil, err
}
return hashEmbedding(text, p.Dims), nil
}

func (p *Provider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) {
out := make([][]float32, len(texts))
for i, t := range texts {
v, err := p.Embed(ctx, t)
if err != nil {
return nil, err
}
out[i] = v
}
return out, nil
}

// hashEmbedding derives a stable dims-length unit vector from SHA-256(text).
// Runs SHA-256 repeatedly with a counter suffix until dims float32s have
// been produced, then L2-normalises. O(dims) time, zero allocations in
// the hot path beyond the output slice.
func hashEmbedding(text string, dims int) []float32 {
if dims <= 0 {
dims = DefaultDims
}
out := make([]float32, dims)
seed := []byte(text)
var i int
for counter := uint32(0); i < dims; counter++ {
var ctrBuf [4]byte
binary.LittleEndian.PutUint32(ctrBuf[:], counter)
h := sha256.New()
h.Write(seed)
h.Write(ctrBuf[:])
sum := h.Sum(nil)
// Each sha256 gives 32 bytes → 8 float32s via uint32 LE.
for j := 0; j < len(sum) && i < dims; j += 4 {
u := binary.LittleEndian.Uint32(sum[j : j+4])
// Map uint32 into (-1, 1).
out[i] = float32(int32(u))/float32(math.MaxInt32) - 0
i++
}
}
// L2-normalise so cosine similarity stays well defined.
var norm float64
for _, v := range out {
norm += float64(v) * float64(v)
}
if norm == 0 {
out[0] = 1
return out
}
inv := float32(1.0 / math.Sqrt(norm))
for k := range out {
out[k] *= inv
}
return out
}

// hashTag returns the first n hex chars of SHA-256(s) — used as a
// stable, short identifier in canned entity names.
func hashTag(s string, n int) string {
sum := sha256.Sum256([]byte(s))
const hex = "0123456789abcdef"
out := make([]byte, n*2)
for i := 0; i < n; i++ {
out[2*i] = hex[sum[i]>>4]
out[2*i+1] = hex[sum[i]&0x0f]
}
return string(out)
}
76 changes: 76 additions & 0 deletions internal/mcp/tools_fuzz_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
//go:build sqlite_fts5

package mcp

import (
"encoding/json"
"strings"
"testing"
)

// FuzzMCPToolArgs asserts that the argument-coercion helpers (stringArg,
// intArg, and the `project` shortcut projectArg) never panic on any JSON
// payload an MCP client might send. We fuzz a JSON blob, unmarshal it
// into map[string]any (the exact type the real handlers receive via
// mcpgo.CallToolRequest.GetArguments()), and poke each helper with the
// known keys plus a couple of keys that intentionally don't exist.
func FuzzMCPToolArgs(f *testing.F) {
// Seeds cover the shapes that flow through the real tool registrations
// in tools.go: strings, numbers (float64 after JSON round-trip),
// booleans, nulls, nested objects, and arrays. Malformed JSON is fed
// via the "ignore" branch — the unmarshal error is expected and
// skipped so it does not count as a fuzzer-discovered crash.
seeds := []string{
`{}`,
`{"query":"hello"}`,
`{"query":""}`,
`{"top_k":5}`,
`{"top_k":5.5}`,
`{"top_k":-1}`,
`{"top_k":"not a number"}`,
`{"project":null}`,
`{"project":true}`,
`{"project":["nested","array"]}`,
`{"project":{"nested":"object"}}`,
`{"entity_name":"foo","depth":2}`,
`{"community_level":0}`,
`{"` + strings.Repeat("a", 1024) + `":"long-key"}`,
`not json at all`,
``,
}
for _, s := range seeds {
f.Add(s)
}

// All known argument keys used across internal/mcp/tools.go and
// notes_tools.go. Exhaustive is cheap; if a new tool adds a new
// key this list lags but the fuzz target still covers the helpers.
keys := []string{
"query", "top_k", "doc_type", "project",
"community_level", "entity_name", "depth",
"from", "to", "predicate",
"note_key", "content", "tags", "limit",
"max_nodes", "graph_depth", "doc_id", "type",
"nonexistent_key_for_default_path",
}

f.Fuzz(func(t *testing.T, raw string) {
var args map[string]any
if err := json.Unmarshal([]byte(raw), &args); err != nil {
// Not valid JSON — not our target. MCP transport layer
// already rejects these before they reach tool handlers.
t.Skip()
}
if args == nil {
// JSON "null" at the top level — nothing to coerce.
return
}

for _, k := range keys {
_ = stringArg(args, k, "default")
_ = intArg(args, k, 0)
}
// projectArg lives in server.go and wraps stringArg for "project".
_ = projectArg(args)
})
}
1 change: 1 addition & 0 deletions internal/notes/history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
func skipIfNoGit(t *testing.T) {
t.Helper()
if _, err := exec.LookPath("git"); err != nil {
// TODO(#65): environmental skip (git binary missing); tracked in flake-register.
t.Skip("git not available")
}
}
Expand Down
1 change: 1 addition & 0 deletions internal/notes/notes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ func TestUnicodeKey(t *testing.T) {

func TestScale_1000Notes(t *testing.T) {
if testing.Short() {
// TODO(#63): 1000-note scale test skipped under -short; tracked in flake-register.
t.Skip("skipping 1000-note scale test in -short mode")
}
dir := t.TempDir()
Expand Down
Loading
Loading