Skip to content

Commit dddddb1

Browse files
aksOpsclaude
andcommitted
Rewrite 11 Python detectors from regex to ANTLR AST with regex fallback
All Python detectors now extend AbstractAntlrDetector instead of AbstractRegexDetector. Each implements parse() using AntlrParserFactory, detectWithAst() for AST-based detection, and detectWithRegex() as fallback when parsing fails. KafkaPythonDetector delegates AST path to regex since ANTLR getText() strips whitespace needed by patterns. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cdf947b commit dddddb1

11 files changed

Lines changed: 1422 additions & 96 deletions

src/main/java/io/github/randomcodespace/iq/detector/python/CeleryTaskDetector.java

Lines changed: 120 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
package io.github.randomcodespace.iq.detector.python;
22

3-
import io.github.randomcodespace.iq.detector.AbstractRegexDetector;
3+
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.detector.DetectorContext;
55
import io.github.randomcodespace.iq.detector.DetectorResult;
6+
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
7+
import io.github.randomcodespace.iq.grammar.python.Python3Parser;
8+
import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener;
69
import io.github.randomcodespace.iq.model.CodeEdge;
710
import io.github.randomcodespace.iq.model.CodeNode;
811
import io.github.randomcodespace.iq.model.EdgeKind;
912
import io.github.randomcodespace.iq.model.NodeKind;
13+
import org.antlr.v4.runtime.tree.ParseTree;
14+
import org.antlr.v4.runtime.tree.ParseTreeWalker;
1015
import org.springframework.stereotype.Component;
1116

1217
import java.util.ArrayList;
@@ -16,20 +21,23 @@
1621
import java.util.regex.Pattern;
1722

1823
@Component
19-
public class CeleryTaskDetector extends AbstractRegexDetector {
24+
public class CeleryTaskDetector extends AbstractAntlrDetector {
2025

21-
// @app.task or @shared_task or @celery.task with optional name param
26+
// --- Regex patterns ---
2227
private static final Pattern TASK_DECORATOR = Pattern.compile(
2328
"@(?:\\w+\\.)?(?:task|shared_task)\\(?"
2429
+ "(?:.*?name\\s*=\\s*['\"]([^'\"]+)['\"])?"
2530
+ "[^)]*\\)?\\s*\\n\\s*def\\s+(\\w+)",
2631
Pattern.DOTALL
2732
);
28-
29-
// task.delay(...) or task.apply_async(...)
3033
private static final Pattern TASK_CALL = Pattern.compile(
3134
"(\\w+)\\.(delay|apply_async|s|si|signature)\\("
3235
);
36+
private static final Pattern NAME_KWARG_RE = Pattern.compile(
37+
"name\\s*=\\s*['\"]([^'\"]+)['\"]"
38+
);
39+
40+
private static final Set<String> TASK_DECORATORS = Set.of("task", "shared_task");
3341

3442
@Override
3543
public String getName() {
@@ -42,7 +50,113 @@ public Set<String> getSupportedLanguages() {
4250
}
4351

4452
@Override
45-
public DetectorResult detect(DetectorContext ctx) {
53+
protected ParseTree parse(DetectorContext ctx) {
54+
return AntlrParserFactory.parse("python", ctx.content());
55+
}
56+
57+
@Override
58+
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
59+
List<CodeNode> nodes = new ArrayList<>();
60+
List<CodeEdge> edges = new ArrayList<>();
61+
String filePath = ctx.filePath();
62+
String moduleName = ctx.moduleName();
63+
String text = ctx.content();
64+
65+
// Walk for decorated functions (task definitions)
66+
ParseTreeWalker.DEFAULT.walk(new Python3ParserBaseListener() {
67+
@Override
68+
public void enterDecorated(Python3Parser.DecoratedContext decorated) {
69+
if (decorated.decorators() == null) return;
70+
71+
String funcName = null;
72+
if (decorated.funcdef() != null && decorated.funcdef().name() != null) {
73+
funcName = decorated.funcdef().name().getText();
74+
} else if (decorated.async_funcdef() != null
75+
&& decorated.async_funcdef().funcdef() != null
76+
&& decorated.async_funcdef().funcdef().name() != null) {
77+
funcName = decorated.async_funcdef().funcdef().name().getText();
78+
}
79+
if (funcName == null) return;
80+
81+
for (var dec : decorated.decorators().decorator()) {
82+
if (dec.dotted_name() == null) continue;
83+
var names = dec.dotted_name().name();
84+
String lastPart = names.get(names.size() - 1).getText();
85+
if (!TASK_DECORATORS.contains(lastPart)) continue;
86+
87+
// Extract task name from name=... kwarg
88+
String taskName = null;
89+
if (dec.arglist() != null) {
90+
String argText = dec.arglist().getText();
91+
Matcher nm = NAME_KWARG_RE.matcher(argText);
92+
if (nm.find()) {
93+
taskName = nm.group(1);
94+
}
95+
}
96+
if (taskName == null) {
97+
taskName = funcName;
98+
}
99+
100+
int line = lineOf(dec);
101+
102+
String queueId = "queue:" + (moduleName != null ? moduleName : "") + ":celery:" + taskName;
103+
CodeNode queueNode = new CodeNode();
104+
queueNode.setId(queueId);
105+
queueNode.setKind(NodeKind.QUEUE);
106+
queueNode.setLabel("celery:" + taskName);
107+
queueNode.setModule(moduleName);
108+
queueNode.setFilePath(filePath);
109+
queueNode.setLineStart(line);
110+
queueNode.getProperties().put("broker", "celery");
111+
queueNode.getProperties().put("task_name", taskName);
112+
queueNode.getProperties().put("function", funcName);
113+
nodes.add(queueNode);
114+
115+
String methodId = "method:" + filePath + "::" + funcName;
116+
CodeNode methodNode = new CodeNode();
117+
methodNode.setId(methodId);
118+
methodNode.setKind(NodeKind.METHOD);
119+
methodNode.setLabel(funcName);
120+
methodNode.setFqn(filePath + "::" + funcName);
121+
methodNode.setModule(moduleName);
122+
methodNode.setFilePath(filePath);
123+
methodNode.setLineStart(line);
124+
nodes.add(methodNode);
125+
126+
CodeEdge consumesEdge = new CodeEdge();
127+
consumesEdge.setId(methodId + "->consumes->" + queueId);
128+
consumesEdge.setKind(EdgeKind.CONSUMES);
129+
consumesEdge.setSourceId(methodId);
130+
edges.add(consumesEdge);
131+
}
132+
}
133+
134+
@Override
135+
public void enterAtom_expr(Python3Parser.Atom_exprContext atomExpr) {
136+
// Detect task.delay() / task.apply_async() calls
137+
String exprText = atomExpr.getText();
138+
Matcher callMatcher = TASK_CALL.matcher(exprText);
139+
if (callMatcher.find()) {
140+
String taskRef = callMatcher.group(1);
141+
int line = lineOf(atomExpr);
142+
143+
String queueId = "queue:" + (moduleName != null ? moduleName : "") + ":celery:" + taskRef;
144+
String callerId = "method:" + filePath + "::caller_l" + line;
145+
146+
CodeEdge producesEdge = new CodeEdge();
147+
producesEdge.setId(callerId + "->produces->" + queueId);
148+
producesEdge.setKind(EdgeKind.PRODUCES);
149+
producesEdge.setSourceId(callerId);
150+
edges.add(producesEdge);
151+
}
152+
}
153+
}, tree);
154+
155+
return DetectorResult.of(nodes, edges);
156+
}
157+
158+
@Override
159+
protected DetectorResult detectWithRegex(DetectorContext ctx) {
46160
List<CodeNode> nodes = new ArrayList<>();
47161
List<CodeEdge> edges = new ArrayList<>();
48162
String text = ctx.content();
@@ -52,7 +166,6 @@ public DetectorResult detect(DetectorContext ctx) {
52166
String filePath = ctx.filePath();
53167
String moduleName = ctx.moduleName();
54168

55-
// Detect task definitions
56169
Matcher taskMatcher = TASK_DECORATOR.matcher(text);
57170
while (taskMatcher.find()) {
58171
String taskName = taskMatcher.group(1) != null ? taskMatcher.group(1) : taskMatcher.group(2);
@@ -90,11 +203,9 @@ public DetectorResult detect(DetectorContext ctx) {
90203
edges.add(consumesEdge);
91204
}
92205

93-
// Detect task invocations
94206
Matcher callMatcher = TASK_CALL.matcher(text);
95207
while (callMatcher.find()) {
96208
String taskRef = callMatcher.group(1);
97-
String callType = callMatcher.group(2);
98209
int line = findLineNumber(text, callMatcher.start());
99210

100211
String queueId = "queue:" + (moduleName != null ? moduleName : "") + ":celery:" + taskRef;

src/main/java/io/github/randomcodespace/iq/detector/python/DjangoAuthDetector.java

Lines changed: 135 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
package io.github.randomcodespace.iq.detector.python;
22

3-
import io.github.randomcodespace.iq.detector.AbstractRegexDetector;
3+
import io.github.randomcodespace.iq.detector.AbstractAntlrDetector;
44
import io.github.randomcodespace.iq.detector.DetectorContext;
55
import io.github.randomcodespace.iq.detector.DetectorResult;
6+
import io.github.randomcodespace.iq.grammar.AntlrParserFactory;
7+
import io.github.randomcodespace.iq.grammar.python.Python3Parser;
8+
import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener;
69
import io.github.randomcodespace.iq.model.CodeNode;
710
import io.github.randomcodespace.iq.model.NodeKind;
11+
import org.antlr.v4.runtime.tree.ParseTree;
12+
import org.antlr.v4.runtime.tree.ParseTreeWalker;
813
import org.springframework.stereotype.Component;
914

1015
import java.util.ArrayList;
@@ -15,18 +20,16 @@
1520
import java.util.regex.Pattern;
1621

1722
@Component
18-
public class DjangoAuthDetector extends AbstractRegexDetector {
23+
public class DjangoAuthDetector extends AbstractAntlrDetector {
1924

25+
// --- Regex fallback patterns ---
2026
private static final Pattern LOGIN_REQUIRED_RE = Pattern.compile("@login_required\\b");
21-
2227
private static final Pattern PERMISSION_REQUIRED_RE = Pattern.compile(
2328
"@permission_required\\(\\s*[\"']([^\"']*)[\"']"
2429
);
25-
2630
private static final Pattern USER_PASSES_TEST_RE = Pattern.compile(
2731
"@user_passes_test\\(\\s*([^,)\\s]+)"
2832
);
29-
3033
private static final Pattern MIXIN_RE = Pattern.compile(
3134
"class\\s+(\\w+)\\s*\\(([^)]*)\\):"
3235
);
@@ -48,7 +51,116 @@ public Set<String> getSupportedLanguages() {
4851
}
4952

5053
@Override
51-
public DetectorResult detect(DetectorContext ctx) {
54+
protected ParseTree parse(DetectorContext ctx) {
55+
return AntlrParserFactory.parse("python", ctx.content());
56+
}
57+
58+
@Override
59+
protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) {
60+
List<CodeNode> nodes = new ArrayList<>();
61+
String filePath = ctx.filePath();
62+
String moduleName = ctx.moduleName();
63+
64+
ParseTreeWalker.DEFAULT.walk(new Python3ParserBaseListener() {
65+
@Override
66+
public void enterDecorated(Python3Parser.DecoratedContext decorated) {
67+
if (decorated.decorators() == null) return;
68+
for (var dec : decorated.decorators().decorator()) {
69+
if (dec.dotted_name() == null) continue;
70+
String decoratorName = dec.dotted_name().getText();
71+
72+
// @login_required
73+
if ("login_required".equals(decoratorName)) {
74+
int line = lineOf(dec);
75+
CodeNode node = new CodeNode();
76+
node.setId("auth:" + filePath + ":login_required:" + line);
77+
node.setKind(NodeKind.GUARD);
78+
node.setLabel("@login_required");
79+
node.setModule(moduleName);
80+
node.setFilePath(filePath);
81+
node.setLineStart(line);
82+
node.setAnnotations(List.of("@login_required"));
83+
node.getProperties().put("auth_type", "django");
84+
node.getProperties().put("permissions", List.of());
85+
node.getProperties().put("auth_required", true);
86+
nodes.add(node);
87+
}
88+
89+
// @permission_required("perm")
90+
if ("permission_required".equals(decoratorName) && dec.arglist() != null) {
91+
int line = lineOf(dec);
92+
String permission = extractFirstStringArg(dec.arglist());
93+
if (permission == null) permission = "";
94+
CodeNode node = new CodeNode();
95+
node.setId("auth:" + filePath + ":permission_required:" + line);
96+
node.setKind(NodeKind.GUARD);
97+
node.setLabel("@permission_required(" + permission + ")");
98+
node.setModule(moduleName);
99+
node.setFilePath(filePath);
100+
node.setLineStart(line);
101+
node.setAnnotations(List.of("@permission_required"));
102+
node.getProperties().put("auth_type", "django");
103+
node.getProperties().put("permissions", List.of(permission));
104+
node.getProperties().put("auth_required", true);
105+
nodes.add(node);
106+
}
107+
108+
// @user_passes_test(fn)
109+
if ("user_passes_test".equals(decoratorName) && dec.arglist() != null) {
110+
int line = lineOf(dec);
111+
String testFunc = extractFirstArgName(dec.arglist());
112+
if (testFunc == null) testFunc = "";
113+
CodeNode node = new CodeNode();
114+
node.setId("auth:" + filePath + ":user_passes_test:" + line);
115+
node.setKind(NodeKind.GUARD);
116+
node.setLabel("@user_passes_test(" + testFunc + ")");
117+
node.setModule(moduleName);
118+
node.setFilePath(filePath);
119+
node.setLineStart(line);
120+
node.setAnnotations(List.of("@user_passes_test"));
121+
node.getProperties().put("auth_type", "django");
122+
node.getProperties().put("permissions", List.of());
123+
node.getProperties().put("test_function", testFunc);
124+
node.getProperties().put("auth_required", true);
125+
nodes.add(node);
126+
}
127+
}
128+
}
129+
130+
@Override
131+
public void enterClassdef(Python3Parser.ClassdefContext classCtx) {
132+
if (classCtx.name() == null) return;
133+
String className = classCtx.name().getText();
134+
if (classCtx.arglist() == null) return;
135+
136+
for (var arg : classCtx.arglist().argument()) {
137+
String base = arg.getText().trim();
138+
if (AUTH_MIXINS.containsKey(base)) {
139+
int line = lineOf(classCtx);
140+
CodeNode node = new CodeNode();
141+
node.setId("auth:" + filePath + ":" + base + ":" + line);
142+
node.setKind(NodeKind.GUARD);
143+
node.setLabel(className + "(" + base + ")");
144+
node.setModule(moduleName);
145+
node.setFilePath(filePath);
146+
node.setLineStart(line);
147+
node.setAnnotations(List.of("mixin:" + base));
148+
node.getProperties().put("auth_type", "django");
149+
node.getProperties().put("permissions", List.of());
150+
node.getProperties().put("mixin", base);
151+
node.getProperties().put("class_name", className);
152+
node.getProperties().put("auth_required", true);
153+
nodes.add(node);
154+
}
155+
}
156+
}
157+
}, tree);
158+
159+
return DetectorResult.of(nodes, List.of());
160+
}
161+
162+
@Override
163+
protected DetectorResult detectWithRegex(DetectorContext ctx) {
52164
List<CodeNode> nodes = new ArrayList<>();
53165
String text = ctx.content();
54166
if (text == null || text.isEmpty()) {
@@ -57,7 +169,6 @@ public DetectorResult detect(DetectorContext ctx) {
57169
String filePath = ctx.filePath();
58170
String moduleName = ctx.moduleName();
59171

60-
// @login_required
61172
Matcher m = LOGIN_REQUIRED_RE.matcher(text);
62173
while (m.find()) {
63174
int line = findLineNumber(text, m.start());
@@ -75,7 +186,6 @@ public DetectorResult detect(DetectorContext ctx) {
75186
nodes.add(node);
76187
}
77188

78-
// @permission_required("perm")
79189
m = PERMISSION_REQUIRED_RE.matcher(text);
80190
while (m.find()) {
81191
int line = findLineNumber(text, m.start());
@@ -94,7 +204,6 @@ public DetectorResult detect(DetectorContext ctx) {
94204
nodes.add(node);
95205
}
96206

97-
// @user_passes_test(fn)
98207
m = USER_PASSES_TEST_RE.matcher(text);
99208
while (m.find()) {
100209
int line = findLineNumber(text, m.start());
@@ -114,7 +223,6 @@ public DetectorResult detect(DetectorContext ctx) {
114223
nodes.add(node);
115224
}
116225

117-
// Class-based views with auth mixins
118226
m = MIXIN_RE.matcher(text);
119227
while (m.find()) {
120228
String className = m.group(1);
@@ -144,4 +252,21 @@ public DetectorResult detect(DetectorContext ctx) {
144252

145253
return DetectorResult.of(nodes, List.of());
146254
}
255+
256+
private static String extractFirstStringArg(Python3Parser.ArglistContext arglist) {
257+
if (arglist == null) return null;
258+
for (var arg : arglist.argument()) {
259+
String argText = arg.getText();
260+
if ((argText.startsWith("\"") && argText.endsWith("\""))
261+
|| (argText.startsWith("'") && argText.endsWith("'"))) {
262+
return argText.substring(1, argText.length() - 1);
263+
}
264+
}
265+
return null;
266+
}
267+
268+
private static String extractFirstArgName(Python3Parser.ArglistContext arglist) {
269+
if (arglist == null || arglist.argument().isEmpty()) return null;
270+
return arglist.argument(0).getText();
271+
}
147272
}

0 commit comments

Comments
 (0)