Skip to content

Commit cf47ac8

Browse files
aksOpsclaude
andcommitted
Phase 2: Tech debt cleanup — auto-discovery, utils, split imports, dispatch table, linker protocol, extensions, tests
2a. Auto-discover detectors — replaced 75-entry hardcoded registry list with pkgutil.walk_packages() scanning. New detector = create file, done. 2b. Shared utils — created detectors/utils.py with decode_text, iter_lines, find_line_number, filename, matches_filename. 2c. Split imports_detector.py (723→~200 lines) — extracted dedicated kotlin_structures (fixed sealed/enum/annotation class regexes), rust_structures (fixed inherent impl, async fn, macro_rules!), scala_structures into separate files. 2d. Parser dispatch table — replaced 11-branch elif chain in _parse_structured() with _STRUCTURED_PARSERS dict. 2e. Fixed Linker protocol — introduced LinkResult dataclass, removed _new_module_nodes private attribute hack. 2f. Added 16 missing extensions (.html, .css, .mjs, .cjs, .jsonc, .groovy, .pyi, .razor, .cshtml, .adoc, etc.) + extensionless filename support (Dockerfile, Makefile, go.mod, Jenkinsfile). 2g. Added 54 new tests for 10 previously untested detectors. 75 detectors, 35 languages, 415 tests, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6c4fd92 commit cf47ac8

28 files changed

Lines changed: 1465 additions & 481 deletions

src/code_intelligence/analyzer.py

Lines changed: 78 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
"scala", "swift", "r", "perl", "lua", "dart",
3636
"dockerfile", "toml", "ini", "dotenv", "csv",
3737
"vue", "svelte",
38+
"html", "css", "scss", "less", "razor", "cshtml", "asciidoc",
39+
"makefile", "gomod", "gosum", "groovy",
3840
}
3941

4042

@@ -52,57 +54,85 @@ class AnalysisResult:
5254
files_without_detectors: int
5355

5456

57+
def _parse_toml(content: bytes, file_path: str) -> dict:
58+
"""Parse TOML content."""
59+
try:
60+
import tomllib
61+
except ModuleNotFoundError:
62+
import tomli as tomllib # type: ignore[no-redef]
63+
try:
64+
text = content.decode("utf-8", errors="replace")
65+
data = tomllib.loads(text)
66+
except Exception as exc:
67+
return {"error": "invalid_toml", "file": file_path, "detail": str(exc)}
68+
return {"type": "toml", "file": file_path, "data": data}
69+
70+
71+
def _parse_ini(content: bytes, file_path: str) -> dict:
72+
"""Parse INI content."""
73+
import configparser
74+
try:
75+
text = content.decode("utf-8", errors="replace")
76+
parser = configparser.ConfigParser()
77+
parser.read_string(text)
78+
data = {section: dict(parser[section]) for section in parser.sections()}
79+
except Exception as exc:
80+
return {"error": "invalid_ini", "file": file_path, "detail": str(exc)}
81+
return {"type": "ini", "file": file_path, "data": data}
82+
83+
84+
def _text_passthrough(lang: str):
85+
"""Return a parser that passes through raw text for regex-based detection."""
86+
def _parse(content: bytes, file_path: str) -> dict:
87+
return {"type": lang, "file": file_path, "data": content.decode("utf-8", errors="replace")}
88+
return _parse
89+
90+
91+
def _class_parser(module_path: str, class_name: str):
92+
"""Return a parser that lazily imports and delegates to a structured parser class."""
93+
def _parse(content: bytes, file_path: str):
94+
mod = __import__(module_path, fromlist=[class_name])
95+
cls = getattr(mod, class_name)
96+
return cls().parse(content, file_path)
97+
return _parse
98+
99+
100+
# Dispatch table for structured parsers. Keyed by language identifier.
101+
_STRUCTURED_PARSERS: dict[str, Any] = {
102+
"xml": _class_parser("code_intelligence.parsing.structured.xml_parser", "XmlParser"),
103+
"yaml": _class_parser("code_intelligence.parsing.structured.yaml_parser", "YamlParser"),
104+
"json": _class_parser("code_intelligence.parsing.structured.json_parser", "JsonParser"),
105+
"properties": _class_parser("code_intelligence.parsing.structured.properties_parser", "PropertiesParser"),
106+
"gradle": _class_parser("code_intelligence.parsing.structured.gradle_parser", "GradleParser"),
107+
"sql": _class_parser("code_intelligence.parsing.structured.sql_parser", "SqlParser"),
108+
"toml": _parse_toml,
109+
"ini": _parse_ini,
110+
"markdown": _text_passthrough("markdown"),
111+
"proto": _text_passthrough("proto"),
112+
"vue": _text_passthrough("vue"),
113+
"svelte": _text_passthrough("svelte"),
114+
"html": _text_passthrough("html"),
115+
"css": _text_passthrough("css"),
116+
"scss": _text_passthrough("scss"),
117+
"less": _text_passthrough("less"),
118+
"razor": _text_passthrough("razor"),
119+
"cshtml": _text_passthrough("cshtml"),
120+
"asciidoc": _text_passthrough("asciidoc"),
121+
"makefile": _text_passthrough("makefile"),
122+
"gomod": _text_passthrough("gomod"),
123+
"gosum": _text_passthrough("gosum"),
124+
"groovy": _text_passthrough("groovy"),
125+
}
126+
127+
55128
def _parse_structured(language: str, content: bytes, file_path: str) -> Any:
56129
"""Dispatch to the correct structured parser."""
57-
if language == "xml":
58-
from code_intelligence.parsing.structured.xml_parser import XmlParser
59-
return XmlParser().parse(content, file_path)
60-
elif language == "yaml":
61-
from code_intelligence.parsing.structured.yaml_parser import YamlParser
62-
return YamlParser().parse(content, file_path)
63-
elif language == "json":
64-
from code_intelligence.parsing.structured.json_parser import JsonParser
65-
return JsonParser().parse(content, file_path)
66-
elif language == "properties":
67-
from code_intelligence.parsing.structured.properties_parser import PropertiesParser
68-
return PropertiesParser().parse(content, file_path)
69-
elif language == "gradle":
70-
from code_intelligence.parsing.structured.gradle_parser import GradleParser
71-
return GradleParser().parse(content, file_path)
72-
elif language == "sql":
73-
from code_intelligence.parsing.structured.sql_parser import SqlParser
74-
return SqlParser().parse(content, file_path)
75-
elif language == "toml":
76-
try:
77-
import tomllib
78-
except ModuleNotFoundError:
79-
import tomli as tomllib # type: ignore[no-redef]
130+
parser = _STRUCTURED_PARSERS.get(language)
131+
if parser is not None:
80132
try:
81-
text = content.decode("utf-8", errors="replace")
82-
data = tomllib.loads(text)
83-
except Exception as exc:
84-
return {"error": "invalid_toml", "file": file_path, "detail": str(exc)}
85-
return {"type": "toml", "file": file_path, "data": data}
86-
elif language == "ini":
87-
import configparser
88-
try:
89-
text = content.decode("utf-8", errors="replace")
90-
parser = configparser.ConfigParser()
91-
parser.read_string(text)
92-
data = {section: dict(parser[section]) for section in parser.sections()}
93-
except Exception as exc:
94-
return {"error": "invalid_ini", "file": file_path, "detail": str(exc)}
95-
return {"type": "ini", "file": file_path, "data": data}
96-
elif language == "markdown":
97-
# Return raw text for regex-based detection
98-
return {"type": "markdown", "file": file_path, "data": content.decode("utf-8", errors="replace")}
99-
elif language == "proto":
100-
# Return raw text for regex-based detection
101-
return {"type": "proto", "file": file_path, "data": content.decode("utf-8", errors="replace")}
102-
elif language == "vue":
103-
return {"type": "vue", "file": file_path, "data": content.decode("utf-8", errors="replace")}
104-
elif language == "svelte":
105-
return {"type": "svelte", "file": file_path, "data": content.decode("utf-8", errors="replace")}
133+
return parser(content, file_path)
134+
except Exception:
135+
logger.debug("Structured parse failed for %s", file_path, exc_info=True)
106136
return None
107137

108138

src/code_intelligence/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ class DiscoveryConfig(BaseModel):
2626
".toml", ".ini", ".cfg", ".conf",
2727
".env", ".csv", ".dockerfile",
2828
".vue", ".svelte",
29+
".html", ".htm", ".css", ".scss", ".less",
30+
".mjs", ".cjs", ".mts", ".cts", ".jsonc",
31+
".groovy", ".pyi", ".razor", ".cshtml", ".adoc",
2932
])
3033
exclude_patterns: list[str] = Field(default_factory=lambda: [
3134
"**/node_modules/**",

0 commit comments

Comments
 (0)