1616import java .util .List ;
1717import java .util .Map ;
1818import java .util .TreeMap ;
19+ import java .util .concurrent .Executors ;
20+ import java .util .concurrent .Future ;
21+ import java .util .concurrent .TimeUnit ;
1922
2023/**
2124 * Runs all {@link LanguageExtractor} beans after {@link io.github.randomcodespace.iq.intelligence.lexical.LexicalEnricher}
@@ -80,44 +83,106 @@ public void enrich(List<CodeNode> nodes, List<CodeEdge> edges, Path rootPath) {
8083 }
8184 }
8285
83- int edgesAdded = 0 ;
84- int typeHintsAdded = 0 ;
85-
86+ // Collect files that have a matching extractor
87+ record FileTask ( String filePath , List < CodeNode > fileNodes , LanguageExtractor extractor , String language ) {}
88+ List < FileTask > tasks = new ArrayList <>();
8689 for (Map .Entry <String , List <CodeNode >> entry : nodesByFile .entrySet ()) {
8790 String filePath = entry .getKey ();
88- List <CodeNode > fileNodes = entry .getValue ();
89-
9091 String language = detectLanguage (filePath );
9192 if (language == null ) continue ;
92-
9393 String resolvedLanguage = LANGUAGE_ALIASES .getOrDefault (language , language );
9494 LanguageExtractor extractor = extractorByLanguage .get (resolvedLanguage );
9595 if (extractor == null ) continue ;
96+ tasks .add (new FileTask (filePath , entry .getValue (), extractor , language ));
97+ }
9698
97- String content = readFile (rootPath , filePath );
98- if (content == null ) continue ;
99+ if (tasks .isEmpty ()) {
100+ log .info ("Language enrichment: no files matched any extractor" );
101+ return ;
102+ }
99103
100- DetectorContext ctx = new DetectorContext (filePath , language , content , nodeRegistry , null );
104+ // Process files in parallel with per-file timeout
105+ var newEdges = java .util .Collections .synchronizedList (new ArrayList <CodeEdge >());
106+ var edgesAdded = new java .util .concurrent .atomic .AtomicInteger (0 );
107+ var typeHintsAdded = new java .util .concurrent .atomic .AtomicInteger (0 );
108+
109+ var executor = Executors .newVirtualThreadPerTaskExecutor ();
110+ try {
111+ List <Future <?>> futures = new ArrayList <>(tasks .size ());
112+ for (FileTask task : tasks ) {
113+ futures .add (executor .submit (() -> {
114+ if (Thread .interrupted ()) {
115+ Thread .currentThread ().interrupt ();
116+ return null ;
117+ }
118+ String content = readFile (rootPath , task .filePath ());
119+ if (content == null ) return null ;
120+
121+ // Skip minified files — they hang parsers and contain no useful structure
122+ if (isLikelyMinified (task .filePath (), content )) {
123+ log .debug ("Skipping minified file for enrichment: {}" , task .filePath ());
124+ return null ;
125+ }
101126
102- for (CodeNode node : fileNodes ) {
127+ DetectorContext ctx = new DetectorContext (
128+ task .filePath (), task .language (), content , nodeRegistry , null );
129+
130+ for (CodeNode node : task .fileNodes ()) {
131+ if (Thread .interrupted ()) {
132+ Thread .currentThread ().interrupt ();
133+ break ;
134+ }
135+ try {
136+ LanguageExtractionResult result = task .extractor ().extract (ctx , node );
137+ newEdges .addAll (result .callEdges ());
138+ newEdges .addAll (result .symbolReferences ());
139+ edgesAdded .addAndGet (result .callEdges ().size () + result .symbolReferences ().size ());
140+ for (Map .Entry <String , String > hint : result .typeHints ().entrySet ()) {
141+ node .getProperties ().put (hint .getKey (), hint .getValue ());
142+ typeHintsAdded .incrementAndGet ();
143+ }
144+ } catch (Exception e ) {
145+ log .warn ("LanguageExtractor {} failed on node {} in {}: {}" ,
146+ task .extractor ().getClass ().getSimpleName (),
147+ node .getId (), task .filePath (), e .getMessage ());
148+ }
149+ }
150+ return null ;
151+ }));
152+ }
153+
154+ // Collect with per-file timeout
155+ for (int i = 0 ; i < futures .size (); i ++) {
103156 try {
104- LanguageExtractionResult result = extractor .extract (ctx , node );
105- edges .addAll (result .callEdges ());
106- edges .addAll (result .symbolReferences ());
107- edgesAdded += result .callEdges ().size () + result .symbolReferences ().size ();
108- for (Map .Entry <String , String > hint : result .typeHints ().entrySet ()) {
109- node .getProperties ().put (hint .getKey (), hint .getValue ());
110- typeHintsAdded ++;
157+ futures .get (i ).get (30 , TimeUnit .SECONDS );
158+ } catch (java .util .concurrent .TimeoutException e ) {
159+ futures .get (i ).cancel (true );
160+ log .warn ("Language enrichment timed out for {} (30s), skipping" , tasks .get (i ).filePath ());
161+ } catch (java .util .concurrent .ExecutionException e ) {
162+ log .warn ("Language enrichment failed for {}: {}" , tasks .get (i ).filePath (), e .getMessage ());
163+ } catch (InterruptedException e ) {
164+ Thread .currentThread ().interrupt ();
165+ break ;
166+ }
167+ }
168+ } finally {
169+ executor .shutdown ();
170+ try {
171+ if (!executor .awaitTermination (10 , TimeUnit .SECONDS )) {
172+ executor .shutdownNow ();
173+ if (!executor .awaitTermination (5 , TimeUnit .SECONDS )) {
174+ log .warn ("Language enrichment executor did not terminate cleanly" );
111175 }
112- } catch (Exception e ) {
113- log .warn ("LanguageExtractor {} failed on node {} in {}: {}" ,
114- extractor .getClass ().getSimpleName (), node .getId (), filePath , e .getMessage ());
115176 }
177+ } catch (InterruptedException e ) {
178+ executor .shutdownNow ();
179+ Thread .currentThread ().interrupt ();
116180 }
117181 }
118182
119- log .info ("Language enrichment: {} edges added, {} type hints added across {} extractors" ,
120- edgesAdded , typeHintsAdded , extractorByLanguage .size ());
183+ edges .addAll (newEdges );
184+ log .info ("Language enrichment: {} edges added, {} type hints added across {} extractors ({} files)" ,
185+ edgesAdded .get (), typeHintsAdded .get (), extractorByLanguage .size (), tasks .size ());
121186 }
122187
123188 private Map <String , CodeNode > buildRegistry (List <CodeNode > nodes ) {
@@ -144,6 +209,23 @@ private String readFile(Path rootPath, String filePath) {
144209 }
145210 }
146211
212+ /**
213+ * Check if a file is likely minified (long lines, large size) to skip enrichment.
214+ */
215+ private static boolean isLikelyMinified (String filePath , String content ) {
216+ if (content .length () < 50_000 ) return false ;
217+ String name = filePath .contains ("/" ) ? filePath .substring (filePath .lastIndexOf ('/' ) + 1 ) : filePath ;
218+ boolean jsOrCss = name .endsWith (".js" ) || name .endsWith (".mjs" ) || name .endsWith (".cjs" )
219+ || name .endsWith (".css" ) || name .endsWith (".jsx" ) || name .endsWith (".ts" );
220+ if (!jsOrCss && !name .endsWith (".min.js" ) && !name .endsWith (".bundle.js" )) return false ;
221+ int newlines = 0 ;
222+ for (int i = 0 ; i < content .length (); i ++) {
223+ if (content .charAt (i ) == '\n' ) newlines ++;
224+ }
225+ if (newlines == 0 ) newlines = 1 ;
226+ return content .length () / newlines > 1000 ;
227+ }
228+
147229 /**
148230 * Map file extension to language string (mirrors FileDiscovery conventions).
149231 */
0 commit comments