Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions internal/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,7 @@ func extractLinks(client *http.Client, pageURL string, base *url.URL) []string {
}

func resolveURL(base, href string) string {
if strings.HasPrefix(href, "#") || strings.HasPrefix(href, "mailto:") ||
strings.HasPrefix(href, "javascript:") {
if strings.HasPrefix(href, "#") {
return ""
}
b, err := url.Parse(base)
Expand All @@ -286,6 +285,15 @@ func resolveURL(base, href string) string {
if err != nil {
return ""
}
// Reject any href with an explicit scheme other than http/https.
// This allow-list covers mailto:, javascript:, data:, vbscript:,
// tel:, file:, blob:, and anything else we don't crawl.
if h.Scheme != "" {
s := strings.ToLower(h.Scheme)
if s != "http" && s != "https" {
return ""
}
}
resolved := b.ResolveReference(h)
resolved.Fragment = ""
resolved.RawQuery = ""
Expand Down
34 changes: 34 additions & 0 deletions internal/crawler/crawler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package crawler

import "testing"

func TestResolveURL_SchemeAllowList(t *testing.T) {
const base = "https://example.com/docs/"
cases := []struct {
name string
href string
want string
}{
{"relative path", "guide.html", "https://example.com/docs/guide.html"},
{"absolute http", "http://example.com/x", "http://example.com/x"},
{"absolute https", "https://example.com/x", "https://example.com/x"},
{"fragment only", "#anchor", ""},
{"mailto", "mailto:a@b.c", ""},
{"javascript", "javascript:alert(1)", ""},
{"javascript case", "JavaScript:alert(1)", ""},
{"data uri", "data:text/html,<script>alert(1)</script>", ""},
{"vbscript", "vbscript:msgbox(1)", ""},
{"tel", "tel:+15555555555", ""},
{"file", "file:///etc/passwd", ""},
{"blob", "blob:https://example.com/abc", ""},
{"ftp", "ftp://example.com/file", ""},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := resolveURL(base, tc.href)
if got != tc.want {
t.Errorf("resolveURL(%q, %q) = %q, want %q", base, tc.href, got, tc.want)
}
})
}
}
14 changes: 7 additions & 7 deletions internal/hookinstaller/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ func writeJSONMapAtomic(path string, data map[string]json.RawMessage) error {
cleanup()
return fmt.Errorf("close temp: %w", err)
}
// 0o644standard config perms. Writable by user only.
if err := os.Chmod(tmpPath, 0o644); err != nil {
// 0o600user-only read/write. No group/world bits.
if err := os.Chmod(tmpPath, 0o600); err != nil {
cleanup()
return fmt.Errorf("chmod temp: %w", err)
}
Expand Down Expand Up @@ -163,19 +163,19 @@ func validateHookPath(p string) error {
return nil
}

// ExtractHookScript writes the embedded hook.sh to dest with 0o755 perms.
// ExtractHookScript writes the embedded hook.sh to dest with 0o700 perms.
// Creates the parent directory if missing. Overwrites existing files —
// callers that want to preserve a user-modified hook should check first.
func ExtractHookScript(dest string) error {
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
if err := os.MkdirAll(filepath.Dir(dest), 0o700); err != nil {
return fmt.Errorf("mkdir %s: %w", filepath.Dir(dest), err)
}
if err := os.WriteFile(dest, HookScript, 0o755); err != nil {
if err := os.WriteFile(dest, HookScript, 0o700); err != nil {
return fmt.Errorf("write %s: %w", dest, err)
}
// WriteFile masks perms through umask on create; reset explicitly so
// the resulting file is reliably executable.
if err := os.Chmod(dest, 0o755); err != nil {
// the resulting file is reliably executable for the owner only.
if err := os.Chmod(dest, 0o700); err != nil {
return fmt.Errorf("chmod %s: %w", dest, err)
}
return nil
Expand Down
Loading