77import org .slf4j .Logger ;
88import org .slf4j .LoggerFactory ;
99
10+ import java .io .IOException ;
11+ import java .nio .charset .StandardCharsets ;
12+ import java .nio .file .Files ;
1013import java .nio .file .Path ;
1114import java .util .ArrayList ;
1215import java .util .LinkedHashMap ;
1316import java .util .List ;
1417import java .util .Map ;
1518import java .util .TreeMap ;
16- import java .util .UUID ;
19+ import java .util .regex .Matcher ;
20+ import java .util .regex .Pattern ;
1721
1822/**
1923 * Detects service boundaries by scanning the graph for build file nodes
2226 * <p>
2327 * Creates SERVICE nodes and sets the {@code service} property on all
2428 * child nodes (nodes whose filePath starts with the module directory).
29+ * <p>
30+ * Supported build systems:
31+ * <ul>
32+ * <li>Maven (pom.xml) -- extracts artifactId</li>
33+ * <li>Gradle (build.gradle, build.gradle.kts)</li>
34+ * <li>npm (package.json) -- extracts name field</li>
35+ * <li>Go (go.mod) -- extracts module name</li>
36+ * <li>Cargo (Cargo.toml) -- extracts package name</li>
37+ * <li>.NET (.csproj)</li>
38+ * <li>Python (requirements.txt, setup.py, pyproject.toml, manage.py)</li>
39+ * <li>Docker (Dockerfile) -- supplemental indicator</li>
40+ * </ul>
2541 */
2642public class ServiceDetector {
2743
@@ -31,18 +47,42 @@ public class ServiceDetector {
3147 * Build file patterns that indicate module boundaries.
3248 * Maps filename to build tool name.
3349 */
34- private static final Map <String , String > BUILD_FILES = Map .of (
35- "pom.xml" , "maven" ,
36- "package.json" , "npm" ,
37- "go.mod" , "go" ,
38- "build.gradle" , "gradle" ,
39- "build.gradle.kts" , "gradle" ,
40- "Cargo.toml" , "cargo"
50+ private static final Map <String , String > BUILD_FILES = Map .ofEntries (
51+ Map .entry ("pom.xml" , "maven" ),
52+ Map .entry ("package.json" , "npm" ),
53+ Map .entry ("go.mod" , "go" ),
54+ Map .entry ("build.gradle" , "gradle" ),
55+ Map .entry ("build.gradle.kts" , "gradle" ),
56+ Map .entry ("Cargo.toml" , "cargo" ),
57+ Map .entry ("requirements.txt" , "python" ),
58+ Map .entry ("setup.py" , "python" ),
59+ Map .entry ("pyproject.toml" , "python" ),
60+ Map .entry ("manage.py" , "django" ),
61+ Map .entry ("Dockerfile" , "docker" )
4162 );
4263
4364 /** File extension for .csproj files (matched by suffix). */
4465 private static final String CSPROJ_EXTENSION = ".csproj" ;
4566
67+ /** Python build files ranked by priority (first match wins for a directory). */
68+ private static final List <String > PYTHON_BUILD_FILES = List .of (
69+ "pyproject.toml" , "setup.py" , "requirements.txt" , "manage.py"
70+ );
71+
72+ /** Regex patterns for extracting names from build file contents. */
73+ private static final Pattern POM_ARTIFACT_ID = Pattern .compile (
74+ "<artifactId>\\ s*([^<]+?)\\ s*</artifactId>" );
75+ private static final Pattern PACKAGE_JSON_NAME = Pattern .compile (
76+ "\" name\" \\ s*:\\ s*\" ([^\" ]+)\" " );
77+ private static final Pattern GO_MOD_MODULE = Pattern .compile (
78+ "^module\\ s+(\\ S+)" , Pattern .MULTILINE );
79+ private static final Pattern CARGO_PACKAGE_NAME = Pattern .compile (
80+ "^name\\ s*=\\ s*\" ([^\" ]+)\" " , Pattern .MULTILINE );
81+ private static final Pattern PYPROJECT_NAME = Pattern .compile (
82+ "^name\\ s*=\\ s*\" ([^\" ]+)\" " , Pattern .MULTILINE );
83+ private static final Pattern SETUP_PY_NAME = Pattern .compile (
84+ "name\\ s*=\\ s*['\" ]([^'\" ]+)['\" ]" );
85+
4686 /**
4787 * Detect service boundaries from the graph's nodes and create SERVICE nodes.
4888 *
@@ -53,6 +93,20 @@ public class ServiceDetector {
5393 * the service property assignments for existing nodes
5494 */
5595 public ServiceDetectionResult detect (List <CodeNode > nodes , List <CodeEdge > edges , String projectDir ) {
96+ return detect (nodes , edges , projectDir , null );
97+ }
98+
99+ /**
100+ * Detect service boundaries with optional project root for reading build file contents.
101+ *
102+ * @param nodes all current nodes in the graph
103+ * @param edges all current edges in the graph
104+ * @param projectDir the project root directory name (used as fallback service name)
105+ * @param projectRoot optional absolute path to the project root (for reading build files)
106+ * @return result containing new SERVICE nodes, CONTAINS edges
107+ */
108+ public ServiceDetectionResult detect (List <CodeNode > nodes , List <CodeEdge > edges ,
109+ String projectDir , Path projectRoot ) {
56110 // 1. Find module boundaries by scanning node file paths for build files
57111 // Use TreeMap for deterministic ordering (sorted by directory path)
58112 Map <String , ModuleInfo > modules = new TreeMap <>();
@@ -67,14 +121,42 @@ public ServiceDetectionResult detect(List<CodeNode> nodes, List<CodeEdge> edges,
67121 // Check known build files
68122 String buildTool = BUILD_FILES .get (fileName );
69123 if (buildTool != null ) {
70- modules .putIfAbsent (dirPath , new ModuleInfo (dirPath , buildTool , fileName ));
124+ // For Python: only register if no better build tool already present
125+ ModuleInfo existing = modules .get (dirPath );
126+ if (existing != null && isPythonTool (buildTool ) && !isPythonTool (existing .buildTool ())) {
127+ continue ; // Don't override a non-Python build tool with Python
128+ }
129+ // For Docker: only register if no other build tool present
130+ if ("docker" .equals (buildTool ) && existing != null ) {
131+ // Add docker as supplemental info but don't override
132+ continue ;
133+ }
134+ // For Python files: prefer higher-priority ones
135+ if (isPythonTool (buildTool ) && existing != null && isPythonTool (existing .buildTool ())) {
136+ if (pythonPriority (fileName ) >= pythonPriority (existing .buildFile ())) {
137+ continue ; // Current is same or lower priority
138+ }
139+ }
140+ modules .put (dirPath , new ModuleInfo (dirPath , buildTool , fileName ));
71141 }
72142 // Check .csproj files
73143 if (fileName .endsWith (CSPROJ_EXTENSION )) {
74144 modules .putIfAbsent (dirPath , new ModuleInfo (dirPath , "dotnet" , fileName ));
75145 }
76146 }
77147
148+ // 1b. Check for Dockerfile as supplemental indicator -- create service
149+ // only if no other build file was found for that directory
150+ for (CodeNode node : nodes ) {
151+ String filePath = node .getFilePath ();
152+ if (filePath == null ) continue ;
153+ String fileName = Path .of (filePath ).getFileName ().toString ();
154+ if ("Dockerfile" .equals (fileName )) {
155+ String dirPath = parentDir (filePath );
156+ modules .putIfAbsent (dirPath , new ModuleInfo (dirPath , "docker" , fileName ));
157+ }
158+ }
159+
78160 // 2. If no modules detected, create one service for the whole project
79161 if (modules .isEmpty ()) {
80162 modules .put ("" , new ModuleInfo ("" , "unknown" , "" ));
@@ -95,7 +177,7 @@ public ServiceDetectionResult detect(List<CodeNode> nodes, List<CodeEdge> edges,
95177 String dir = entry .getKey ();
96178 ModuleInfo info = entry .getValue ();
97179
98- String serviceName = deriveServiceName (dir , projectDir );
180+ String serviceName = extractServiceName (dir , info , projectDir , projectRoot );
99181
100182 CodeNode service = new CodeNode ();
101183 service .setId ("service:" + serviceName );
@@ -141,6 +223,10 @@ public ServiceDetectionResult detect(List<CodeNode> nodes, List<CodeEdge> edges,
141223 CodeNode serviceNode = serviceByDir .get (matchedDir );
142224 if (serviceNode != null ) {
143225 String serviceName = serviceNode .getLabel ();
226+ // Ensure properties map is mutable before modifying
227+ if (!(node .getProperties () instanceof java .util .HashMap )) {
228+ node .setProperties (new java .util .HashMap <>(node .getProperties ()));
229+ }
144230 node .getProperties ().put ("service" , serviceName );
145231
146232 // Create CONTAINS edge
@@ -177,6 +263,104 @@ public ServiceDetectionResult detect(List<CodeNode> nodes, List<CodeEdge> edges,
177263 return new ServiceDetectionResult (serviceNodes , serviceEdges );
178264 }
179265
266+ /**
267+ * Extract service name from build file contents if possible, otherwise use directory name.
268+ */
269+ private String extractServiceName (String dir , ModuleInfo info , String projectDir , Path projectRoot ) {
270+ // Try to read the build file and extract the real name
271+ if (projectRoot != null && !info .buildFile ().isEmpty ()) {
272+ String nameFromFile = readNameFromBuildFile (projectRoot , dir , info );
273+ if (nameFromFile != null && !nameFromFile .isBlank ()) {
274+ return nameFromFile ;
275+ }
276+ }
277+ // Fallback to directory-based naming
278+ return deriveServiceName (dir , projectDir );
279+ }
280+
281+ /**
282+ * Read the build file and extract the project/module/package name.
283+ */
284+ private String readNameFromBuildFile (Path projectRoot , String dir , ModuleInfo info ) {
285+ Path buildFile = dir .isEmpty ()
286+ ? projectRoot .resolve (info .buildFile ())
287+ : projectRoot .resolve (dir ).resolve (info .buildFile ());
288+
289+ if (!Files .isRegularFile (buildFile )) {
290+ return null ;
291+ }
292+
293+ try {
294+ String content = Files .readString (buildFile , StandardCharsets .UTF_8 );
295+ return switch (info .buildTool ()) {
296+ case "maven" -> extractFromPom (content );
297+ case "npm" -> extractFromPackageJson (content );
298+ case "go" -> extractFromGoMod (content );
299+ case "cargo" -> extractFromCargoToml (content );
300+ case "python" -> extractFromPythonBuild (content , info .buildFile ());
301+ case "django" -> null ; // manage.py doesn't contain the name
302+ default -> null ;
303+ };
304+ } catch (IOException e ) {
305+ log .debug ("Could not read build file {}: {}" , buildFile , e .getMessage ());
306+ return null ;
307+ }
308+ }
309+
310+ private String extractFromPom (String content ) {
311+ // Find the first <artifactId> that is a direct child of <project>
312+ // (not inside <parent> or <dependency>). Simple heuristic: skip
313+ // artifactIds that appear inside a <parent> block.
314+ int parentEnd = content .indexOf ("</parent>" );
315+ String searchContent = parentEnd > 0 ? content .substring (parentEnd ) : content ;
316+ Matcher m = POM_ARTIFACT_ID .matcher (searchContent );
317+ return m .find () ? m .group (1 ).trim () : null ;
318+ }
319+
320+ private String extractFromPackageJson (String content ) {
321+ Matcher m = PACKAGE_JSON_NAME .matcher (content );
322+ if (m .find ()) {
323+ String name = m .group (1 ).trim ();
324+ // Strip npm scope prefix (@org/name -> name)
325+ if (name .contains ("/" )) {
326+ name = name .substring (name .lastIndexOf ('/' ) + 1 );
327+ }
328+ return name ;
329+ }
330+ return null ;
331+ }
332+
333+ private String extractFromGoMod (String content ) {
334+ Matcher m = GO_MOD_MODULE .matcher (content );
335+ if (m .find ()) {
336+ String module = m .group (1 ).trim ();
337+ // Use last path segment (github.com/org/repo -> repo)
338+ if (module .contains ("/" )) {
339+ module = module .substring (module .lastIndexOf ('/' ) + 1 );
340+ }
341+ return module ;
342+ }
343+ return null ;
344+ }
345+
346+ private String extractFromCargoToml (String content ) {
347+ Matcher m = CARGO_PACKAGE_NAME .matcher (content );
348+ return m .find () ? m .group (1 ).trim () : null ;
349+ }
350+
351+ private String extractFromPythonBuild (String content , String fileName ) {
352+ if ("pyproject.toml" .equals (fileName )) {
353+ Matcher m = PYPROJECT_NAME .matcher (content );
354+ return m .find () ? m .group (1 ).trim () : null ;
355+ }
356+ if ("setup.py" .equals (fileName )) {
357+ Matcher m = SETUP_PY_NAME .matcher (content );
358+ return m .find () ? m .group (1 ).trim () : null ;
359+ }
360+ // requirements.txt has no name
361+ return null ;
362+ }
363+
180364 /**
181365 * Derive a human-readable service name from a directory path.
182366 */
@@ -200,6 +384,18 @@ private static String parentDir(String filePath) {
200384 return normalized .substring (0 , lastSlash );
201385 }
202386
387+ private static boolean isPythonTool (String buildTool ) {
388+ return "python" .equals (buildTool ) || "django" .equals (buildTool );
389+ }
390+
391+ /**
392+ * Priority index for Python build files (lower = higher priority).
393+ */
394+ private static int pythonPriority (String fileName ) {
395+ int idx = PYTHON_BUILD_FILES .indexOf (fileName );
396+ return idx < 0 ? PYTHON_BUILD_FILES .size () : idx ;
397+ }
398+
203399 /**
204400 * Internal record for module metadata.
205401 */
0 commit comments