Skip to content

Commit 29682a8

Browse files
aksOpsclaude
andcommitted
Add .codeignore + .gitignore support, fix node_modules exclude bug
Bug: fnmatch-based exclude patterns missed root-level node_modules/ (pattern **/node_modules/** required a parent directory prefix). Fix: Replace fnmatch with pathspec (gitwildmatch) — the same gitignore matching engine used by pip, black, and setuptools. New features: - .codeignore file support (gitignore syntax, any directory) - .gitignore auto-read from repo root - Config exclude_patterns still work (combined with ignore files) - pathspec added to dependencies All 1,662 tests pass. Benchmark: 3,844 nodes, 4.9s (no regression). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5f4c37c commit 29682a8

2 files changed

Lines changed: 57 additions & 5 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies = [
2020
"pyyaml>=6.0",
2121
"sqlparse>=0.5",
2222
"pydantic>=2.0",
23+
"pathspec>=0.11",
2324
]
2425

2526
[project.optional-dependencies]

src/code_intelligence/discovery/file_discovery.py

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44

55
import fnmatch
66
import hashlib
7+
import logging
78
import os
89
import re
910
import subprocess
1011
from dataclasses import dataclass
1112
from enum import Enum
1213
from pathlib import Path
1314

15+
import pathspec
16+
17+
logger = logging.getLogger(__name__)
18+
1419
from code_intelligence.config import Config
1520

1621

@@ -156,6 +161,52 @@ def _compile_exclude_patterns(patterns: list[str]) -> re.Pattern[str] | None:
156161
return re.compile("|".join(fnmatch.translate(p) for p in patterns))
157162

158163

164+
def _build_ignore_spec(repo_path: Path, config_patterns: list[str]) -> pathspec.PathSpec:
165+
"""Build a combined ignore spec from config patterns + ignore files.
166+
167+
Reads .codeignore and .gitignore files from the repo root and any
168+
subdirectory, combining them with the config exclude_patterns.
169+
Uses gitignore-style matching (handles node_modules at any depth).
170+
"""
171+
all_patterns: list[str] = []
172+
173+
# 1. Config exclude patterns (convert ** glob to gitignore style)
174+
for p in config_patterns:
175+
# Strip leading **/ — gitignore patterns match at any depth by default
176+
cleaned = p.replace("**/", "").rstrip("/**")
177+
all_patterns.append(cleaned)
178+
# Also keep original for explicit **/ matching
179+
all_patterns.append(p)
180+
181+
# 2. Read .codeignore from repo root
182+
codeignore = repo_path / ".codeignore"
183+
if codeignore.is_file():
184+
try:
185+
lines = codeignore.read_text().splitlines()
186+
for line in lines:
187+
line = line.strip()
188+
if line and not line.startswith("#"):
189+
all_patterns.append(line)
190+
logger.debug("Loaded %d patterns from .codeignore", len(lines))
191+
except OSError:
192+
pass
193+
194+
# 3. Read .gitignore from repo root (supplementary)
195+
gitignore = repo_path / ".gitignore"
196+
if gitignore.is_file():
197+
try:
198+
lines = gitignore.read_text().splitlines()
199+
for line in lines:
200+
line = line.strip()
201+
if line and not line.startswith("#"):
202+
all_patterns.append(line)
203+
logger.debug("Loaded %d patterns from .gitignore", len(lines))
204+
except OSError:
205+
pass
206+
207+
return pathspec.PathSpec.from_lines("gitwildmatch", all_patterns)
208+
209+
159210
def _compute_sha256(file_path: Path) -> str:
160211
"""Compute SHA-256 hex digest for a file.
161212
@@ -198,13 +249,17 @@ def discover(
198249
self._current_commit = None
199250
relative_paths = self._walk_files(repo_path)
200251

201-
exclude_re = _compile_exclude_patterns(discovery_cfg.exclude_patterns)
252+
ignore_spec = _build_ignore_spec(repo_path, discovery_cfg.exclude_patterns)
202253

203254
result: list[DiscoveredFile] = []
204255
for rel in relative_paths:
205256
abs_path = repo_path / rel
206257
rel_path = Path(rel)
207258

259+
# Check ignore patterns first (fastest rejection)
260+
if ignore_spec.match_file(str(rel_path)):
261+
continue
262+
208263
# Extension filter
209264
lang = _map_extension_to_language(rel_path)
210265
if lang is None:
@@ -217,10 +272,6 @@ def discover(
217272
):
218273
continue
219274

220-
# Check exclude patterns
221-
if exclude_re and exclude_re.match(str(rel_path)):
222-
continue
223-
224275
# Size guard
225276
try:
226277
size = abs_path.stat().st_size

0 commit comments

Comments
 (0)