diff --git a/internal/crawler/crawler.go b/internal/crawler/crawler.go index 785b097..bb04cdd 100644 --- a/internal/crawler/crawler.go +++ b/internal/crawler/crawler.go @@ -274,8 +274,7 @@ func extractLinks(client *http.Client, pageURL string, base *url.URL) []string { } func resolveURL(base, href string) string { - if strings.HasPrefix(href, "#") || strings.HasPrefix(href, "mailto:") || - strings.HasPrefix(href, "javascript:") { + if strings.HasPrefix(href, "#") { return "" } b, err := url.Parse(base) @@ -286,6 +285,15 @@ func resolveURL(base, href string) string { if err != nil { return "" } + // Reject any href with an explicit scheme other than http/https. + // This allow-list covers mailto:, javascript:, data:, vbscript:, + // tel:, file:, blob:, and anything else we don't crawl. + if h.Scheme != "" { + s := strings.ToLower(h.Scheme) + if s != "http" && s != "https" { + return "" + } + } resolved := b.ResolveReference(h) resolved.Fragment = "" resolved.RawQuery = "" diff --git a/internal/crawler/crawler_test.go b/internal/crawler/crawler_test.go new file mode 100644 index 0000000..6b614fd --- /dev/null +++ b/internal/crawler/crawler_test.go @@ -0,0 +1,34 @@ +package crawler + +import "testing" + +func TestResolveURL_SchemeAllowList(t *testing.T) { + const base = "https://example.com/docs/" + cases := []struct { + name string + href string + want string + }{ + {"relative path", "guide.html", "https://example.com/docs/guide.html"}, + {"absolute http", "http://example.com/x", "http://example.com/x"}, + {"absolute https", "https://example.com/x", "https://example.com/x"}, + {"fragment only", "#anchor", ""}, + {"mailto", "mailto:a@b.c", ""}, + {"javascript", "javascript:alert(1)", ""}, + {"javascript case", "JavaScript:alert(1)", ""}, + {"data uri", "data:text/html,", ""}, + {"vbscript", "vbscript:msgbox(1)", ""}, + {"tel", "tel:+15555555555", ""}, + {"file", "file:///etc/passwd", ""}, + {"blob", "blob:https://example.com/abc", ""}, + {"ftp", "ftp://example.com/file", ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := resolveURL(base, tc.href) + if got != tc.want { + t.Errorf("resolveURL(%q, %q) = %q, want %q", base, tc.href, got, tc.want) + } + }) + } +} diff --git a/internal/hookinstaller/installer.go b/internal/hookinstaller/installer.go index 2bc3714..23aca52 100644 --- a/internal/hookinstaller/installer.go +++ b/internal/hookinstaller/installer.go @@ -131,8 +131,8 @@ func writeJSONMapAtomic(path string, data map[string]json.RawMessage) error { cleanup() return fmt.Errorf("close temp: %w", err) } - // 0o644 — standard config perms. Writable by user only. - if err := os.Chmod(tmpPath, 0o644); err != nil { + // 0o600 — user-only read/write. No group/world bits. + if err := os.Chmod(tmpPath, 0o600); err != nil { cleanup() return fmt.Errorf("chmod temp: %w", err) } @@ -163,19 +163,19 @@ func validateHookPath(p string) error { return nil } -// ExtractHookScript writes the embedded hook.sh to dest with 0o755 perms. +// ExtractHookScript writes the embedded hook.sh to dest with 0o700 perms. // Creates the parent directory if missing. Overwrites existing files — // callers that want to preserve a user-modified hook should check first. func ExtractHookScript(dest string) error { - if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + if err := os.MkdirAll(filepath.Dir(dest), 0o700); err != nil { return fmt.Errorf("mkdir %s: %w", filepath.Dir(dest), err) } - if err := os.WriteFile(dest, HookScript, 0o755); err != nil { + if err := os.WriteFile(dest, HookScript, 0o700); err != nil { return fmt.Errorf("write %s: %w", dest, err) } // WriteFile masks perms through umask on create; reset explicitly so - // the resulting file is reliably executable. - if err := os.Chmod(dest, 0o755); err != nil { + // the resulting file is reliably executable for the owner only. + if err := os.Chmod(dest, 0o700); err != nil { return fmt.Errorf("chmod %s: %w", dest, err) } return nil