Skip to content

Commit a5a2a8f

Browse files
authored
Merge branch 'main' into dependabot/go_modules/github.com/spf13/cobra-1.10.2
2 parents aab8736 + 84f23f5 commit a5a2a8f

6 files changed

Lines changed: 73 additions & 9 deletions

File tree

internal/crawler/crawler.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,7 @@ func extractLinks(client *http.Client, pageURL string, base *url.URL) []string {
274274
}
275275

276276
func resolveURL(base, href string) string {
277-
if strings.HasPrefix(href, "#") || strings.HasPrefix(href, "mailto:") ||
278-
strings.HasPrefix(href, "javascript:") {
277+
if strings.HasPrefix(href, "#") {
279278
return ""
280279
}
281280
b, err := url.Parse(base)
@@ -286,6 +285,15 @@ func resolveURL(base, href string) string {
286285
if err != nil {
287286
return ""
288287
}
288+
// Reject any href with an explicit scheme other than http/https.
289+
// This allow-list covers mailto:, javascript:, data:, vbscript:,
290+
// tel:, file:, blob:, and anything else we don't crawl.
291+
if h.Scheme != "" {
292+
s := strings.ToLower(h.Scheme)
293+
if s != "http" && s != "https" {
294+
return ""
295+
}
296+
}
289297
resolved := b.ResolveReference(h)
290298
resolved.Fragment = ""
291299
resolved.RawQuery = ""

internal/crawler/crawler_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package crawler
2+
3+
import "testing"
4+
5+
func TestResolveURL_SchemeAllowList(t *testing.T) {
6+
const base = "https://example.com/docs/"
7+
cases := []struct {
8+
name string
9+
href string
10+
want string
11+
}{
12+
{"relative path", "guide.html", "https://example.com/docs/guide.html"},
13+
{"absolute http", "http://example.com/x", "http://example.com/x"},
14+
{"absolute https", "https://example.com/x", "https://example.com/x"},
15+
{"fragment only", "#anchor", ""},
16+
{"mailto", "mailto:a@b.c", ""},
17+
{"javascript", "javascript:alert(1)", ""},
18+
{"javascript case", "JavaScript:alert(1)", ""},
19+
{"data uri", "data:text/html,<script>alert(1)</script>", ""},
20+
{"vbscript", "vbscript:msgbox(1)", ""},
21+
{"tel", "tel:+15555555555", ""},
22+
{"file", "file:///etc/passwd", ""},
23+
{"blob", "blob:https://example.com/abc", ""},
24+
{"ftp", "ftp://example.com/file", ""},
25+
}
26+
for _, tc := range cases {
27+
t.Run(tc.name, func(t *testing.T) {
28+
got := resolveURL(base, tc.href)
29+
if got != tc.want {
30+
t.Errorf("resolveURL(%q, %q) = %q, want %q", base, tc.href, got, tc.want)
31+
}
32+
})
33+
}
34+
}

internal/hookinstaller/installer.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ func writeJSONMapAtomic(path string, data map[string]json.RawMessage) error {
131131
cleanup()
132132
return fmt.Errorf("close temp: %w", err)
133133
}
134-
// 0o644standard config perms. Writable by user only.
135-
if err := os.Chmod(tmpPath, 0o644); err != nil {
134+
// 0o600user-only read/write. No group/world bits.
135+
if err := os.Chmod(tmpPath, 0o600); err != nil {
136136
cleanup()
137137
return fmt.Errorf("chmod temp: %w", err)
138138
}
@@ -163,19 +163,19 @@ func validateHookPath(p string) error {
163163
return nil
164164
}
165165

166-
// ExtractHookScript writes the embedded hook.sh to dest with 0o755 perms.
166+
// ExtractHookScript writes the embedded hook.sh to dest with 0o700 perms.
167167
// Creates the parent directory if missing. Overwrites existing files —
168168
// callers that want to preserve a user-modified hook should check first.
169169
func ExtractHookScript(dest string) error {
170-
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
170+
if err := os.MkdirAll(filepath.Dir(dest), 0o700); err != nil {
171171
return fmt.Errorf("mkdir %s: %w", filepath.Dir(dest), err)
172172
}
173-
if err := os.WriteFile(dest, HookScript, 0o755); err != nil {
173+
if err := os.WriteFile(dest, HookScript, 0o700); err != nil {
174174
return fmt.Errorf("write %s: %w", dest, err)
175175
}
176176
// WriteFile masks perms through umask on create; reset explicitly so
177-
// the resulting file is reliably executable.
178-
if err := os.Chmod(dest, 0o755); err != nil {
177+
// the resulting file is reliably executable for the owner only.
178+
if err := os.Chmod(dest, 0o700); err != nil {
179179
return fmt.Errorf("chmod %s: %w", dest, err)
180180
}
181181
return nil

internal/vectorindex/hnsw_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,13 @@ func TestHNSW_Recall10k(t *testing.T) {
215215
if testing.Short() {
216216
t.Skip("skipping 10k benchmark in -short")
217217
}
218+
if raceEnabled {
219+
// Workload is fully sequential (no goroutines), so the race
220+
// detector has nothing to catch here — it just adds ~10× overhead
221+
// that dominates CI. Concurrency correctness is covered by
222+
// TestHNSW_ConcurrentAddSearch, which DOES run under -race.
223+
t.Skip("skipping 10k recall benchmark under -race (sequential workload)")
224+
}
218225
const (
219226
n = 10_000
220227
dim = 384

internal/vectorindex/race_off.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
//go:build !race
2+
3+
package vectorindex
4+
5+
const raceEnabled = false

internal/vectorindex/race_on.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
//go:build race
2+
3+
package vectorindex
4+
5+
// raceEnabled reports whether the package was compiled with -race.
6+
// Some long-running deterministic benchmarks (e.g. TestHNSW_Recall10k)
7+
// gain nothing from the race detector — they're sequential — and pay
8+
// ~10× overhead that dominates CI time. Those tests skip when this is
9+
// true, leaving the explicitly concurrent tests to exercise the detector.
10+
const raceEnabled = true

0 commit comments

Comments
 (0)