3535 "scala" , "swift" , "r" , "perl" , "lua" , "dart" ,
3636 "dockerfile" , "toml" , "ini" , "dotenv" , "csv" ,
3737 "vue" , "svelte" ,
38+ "html" , "css" , "scss" , "less" , "razor" , "cshtml" , "asciidoc" ,
39+ "makefile" , "gomod" , "gosum" , "groovy" ,
3840}
3941
4042
@@ -52,57 +54,85 @@ class AnalysisResult:
5254 files_without_detectors : int
5355
5456
57+ def _parse_toml (content : bytes , file_path : str ) -> dict :
58+ """Parse TOML content."""
59+ try :
60+ import tomllib
61+ except ModuleNotFoundError :
62+ import tomli as tomllib # type: ignore[no-redef]
63+ try :
64+ text = content .decode ("utf-8" , errors = "replace" )
65+ data = tomllib .loads (text )
66+ except Exception as exc :
67+ return {"error" : "invalid_toml" , "file" : file_path , "detail" : str (exc )}
68+ return {"type" : "toml" , "file" : file_path , "data" : data }
69+
70+
71+ def _parse_ini (content : bytes , file_path : str ) -> dict :
72+ """Parse INI content."""
73+ import configparser
74+ try :
75+ text = content .decode ("utf-8" , errors = "replace" )
76+ parser = configparser .ConfigParser ()
77+ parser .read_string (text )
78+ data = {section : dict (parser [section ]) for section in parser .sections ()}
79+ except Exception as exc :
80+ return {"error" : "invalid_ini" , "file" : file_path , "detail" : str (exc )}
81+ return {"type" : "ini" , "file" : file_path , "data" : data }
82+
83+
84+ def _text_passthrough (lang : str ):
85+ """Return a parser that passes through raw text for regex-based detection."""
86+ def _parse (content : bytes , file_path : str ) -> dict :
87+ return {"type" : lang , "file" : file_path , "data" : content .decode ("utf-8" , errors = "replace" )}
88+ return _parse
89+
90+
91+ def _class_parser (module_path : str , class_name : str ):
92+ """Return a parser that lazily imports and delegates to a structured parser class."""
93+ def _parse (content : bytes , file_path : str ):
94+ mod = __import__ (module_path , fromlist = [class_name ])
95+ cls = getattr (mod , class_name )
96+ return cls ().parse (content , file_path )
97+ return _parse
98+
99+
100+ # Dispatch table for structured parsers. Keyed by language identifier.
101+ _STRUCTURED_PARSERS : dict [str , Any ] = {
102+ "xml" : _class_parser ("code_intelligence.parsing.structured.xml_parser" , "XmlParser" ),
103+ "yaml" : _class_parser ("code_intelligence.parsing.structured.yaml_parser" , "YamlParser" ),
104+ "json" : _class_parser ("code_intelligence.parsing.structured.json_parser" , "JsonParser" ),
105+ "properties" : _class_parser ("code_intelligence.parsing.structured.properties_parser" , "PropertiesParser" ),
106+ "gradle" : _class_parser ("code_intelligence.parsing.structured.gradle_parser" , "GradleParser" ),
107+ "sql" : _class_parser ("code_intelligence.parsing.structured.sql_parser" , "SqlParser" ),
108+ "toml" : _parse_toml ,
109+ "ini" : _parse_ini ,
110+ "markdown" : _text_passthrough ("markdown" ),
111+ "proto" : _text_passthrough ("proto" ),
112+ "vue" : _text_passthrough ("vue" ),
113+ "svelte" : _text_passthrough ("svelte" ),
114+ "html" : _text_passthrough ("html" ),
115+ "css" : _text_passthrough ("css" ),
116+ "scss" : _text_passthrough ("scss" ),
117+ "less" : _text_passthrough ("less" ),
118+ "razor" : _text_passthrough ("razor" ),
119+ "cshtml" : _text_passthrough ("cshtml" ),
120+ "asciidoc" : _text_passthrough ("asciidoc" ),
121+ "makefile" : _text_passthrough ("makefile" ),
122+ "gomod" : _text_passthrough ("gomod" ),
123+ "gosum" : _text_passthrough ("gosum" ),
124+ "groovy" : _text_passthrough ("groovy" ),
125+ }
126+
127+
55128def _parse_structured (language : str , content : bytes , file_path : str ) -> Any :
56129 """Dispatch to the correct structured parser."""
57- if language == "xml" :
58- from code_intelligence .parsing .structured .xml_parser import XmlParser
59- return XmlParser ().parse (content , file_path )
60- elif language == "yaml" :
61- from code_intelligence .parsing .structured .yaml_parser import YamlParser
62- return YamlParser ().parse (content , file_path )
63- elif language == "json" :
64- from code_intelligence .parsing .structured .json_parser import JsonParser
65- return JsonParser ().parse (content , file_path )
66- elif language == "properties" :
67- from code_intelligence .parsing .structured .properties_parser import PropertiesParser
68- return PropertiesParser ().parse (content , file_path )
69- elif language == "gradle" :
70- from code_intelligence .parsing .structured .gradle_parser import GradleParser
71- return GradleParser ().parse (content , file_path )
72- elif language == "sql" :
73- from code_intelligence .parsing .structured .sql_parser import SqlParser
74- return SqlParser ().parse (content , file_path )
75- elif language == "toml" :
76- try :
77- import tomllib
78- except ModuleNotFoundError :
79- import tomli as tomllib # type: ignore[no-redef]
130+ parser = _STRUCTURED_PARSERS .get (language )
131+ if parser is not None :
80132 try :
81- text = content .decode ("utf-8" , errors = "replace" )
82- data = tomllib .loads (text )
83- except Exception as exc :
84- return {"error" : "invalid_toml" , "file" : file_path , "detail" : str (exc )}
85- return {"type" : "toml" , "file" : file_path , "data" : data }
86- elif language == "ini" :
87- import configparser
88- try :
89- text = content .decode ("utf-8" , errors = "replace" )
90- parser = configparser .ConfigParser ()
91- parser .read_string (text )
92- data = {section : dict (parser [section ]) for section in parser .sections ()}
93- except Exception as exc :
94- return {"error" : "invalid_ini" , "file" : file_path , "detail" : str (exc )}
95- return {"type" : "ini" , "file" : file_path , "data" : data }
96- elif language == "markdown" :
97- # Return raw text for regex-based detection
98- return {"type" : "markdown" , "file" : file_path , "data" : content .decode ("utf-8" , errors = "replace" )}
99- elif language == "proto" :
100- # Return raw text for regex-based detection
101- return {"type" : "proto" , "file" : file_path , "data" : content .decode ("utf-8" , errors = "replace" )}
102- elif language == "vue" :
103- return {"type" : "vue" , "file" : file_path , "data" : content .decode ("utf-8" , errors = "replace" )}
104- elif language == "svelte" :
105- return {"type" : "svelte" , "file" : file_path , "data" : content .decode ("utf-8" , errors = "replace" )}
133+ return parser (content , file_path )
134+ except Exception :
135+ logger .debug ("Structured parse failed for %s" , file_path , exc_info = True )
106136 return None
107137
108138
0 commit comments