Skip to content

Commit 19c6619

Browse files
fix(security): enforce max-bytes cap on /api/file + MCP read_file (RAN-9) (#67)
Without a size guard, /api/file and the read_file MCP tool loaded entire files into the JVM heap before the optional line slice was applied. A multi-GB file in an indexed codebase trivially OOM'd the serving process, giving anyone with HTTP access a DoS lever against the read-only API and MCP endpoints. Adds a configurable cap (default 5 MiB, key: serving.max_file_bytes) and routes both entry points through a single SafeFileReader: - No line range → check on-disk size first; reject before reading. - With a line range → stream via BufferedReader, apply start/end filter, and track accumulated UTF-8 byte count against the cap. REST returns HTTP 413 (CONTENT_TOO_LARGE); MCP returns the usual JSON {\"error\": ...} payload. Both emit a message shaped like \"File exceeds max size: N bytes (max M bytes)\". Config is plumbed end-to-end through the unified config stack: ServingConfig.maxFileBytes, ConfigDefaults.builtIn() (5 MiB), ConfigMerger, EnvVarOverlay (CODEIQ_SERVING_MAXFILEBYTES), snake_case YAML key with camelCase alias, UnifiedConfigAdapter → legacy CodeIqConfig.maxFileBytes (with a >=1 clamp in the setter). Tests added: - SafeFileReaderTest: whole-file reject, line-range streaming, line-range reject mid-stream, negative startLine clamping. - GraphControllerTest: 413 on oversize whole-file read, 200 on narrow line range when the whole file exceeds cap. - McpToolsTest: \"exceeds max size\" error for oversize read, line-range passthrough, line-range reject. - UnifiedConfigAdapterTest: explicit maxFileBytes overrides default; absent value falls back to CodeIqConfig default (5 MiB). - ConfigResolverTest / ConfigValidatorTest / ConfigExplainSubcommandTest: updated for the new ServingConfig record shape. Default documented in docs/codeiq.yml.example. Co-authored-by: Paperclip <noreply@paperclip.ing>
1 parent f7c4401 commit 19c6619

19 files changed

Lines changed: 311 additions & 42 deletions

docs/codeiq.yml.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ serving:
5454
port: 8080 # HTTP port for REST + MCP + UI
5555
bind_address: 0.0.0.0 # interface to bind; 127.0.0.1 for localhost-only
5656
read_only: false # must be false in non-prod; CI gate enforces this
57+
max_file_bytes: 5242880 # cap on /api/file + MCP read_file (bytes); 5 MiB default — rejects with HTTP 413
5758
neo4j:
5859
dir: .codeiq/graph/graph.db # embedded Neo4j data directory
5960
page_cache_mb: 256 # Neo4j page cache (MB)

src/main/java/io/github/randomcodespace/iq/api/GraphController.java

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import io.github.randomcodespace.iq.model.NodeKind;
1818

1919
import java.io.IOException;
20-
import java.nio.charset.StandardCharsets;
2120
import java.nio.file.Files;
2221
import java.nio.file.NoSuchFileException;
2322
import java.nio.file.Path;
@@ -291,23 +290,14 @@ public ResponseEntity<String> readFile(
291290
return ResponseEntity.notFound().build();
292291
}
293292
try {
294-
String content = Files.readString(resolvedReal, StandardCharsets.UTF_8);
295-
if (startLine != null || endLine != null) {
296-
String[] lines = content.split("\n", -1);
297-
int start = (startLine != null ? startLine : 1);
298-
int end = (endLine != null ? endLine : lines.length);
299-
start = Math.max(1, Math.min(start, lines.length));
300-
end = Math.max(start, Math.min(end, lines.length));
301-
StringBuilder sb = new StringBuilder();
302-
for (int i = start - 1; i < end; i++) {
303-
if (i > start - 1) sb.append('\n');
304-
sb.append(lines[i]);
305-
}
306-
content = sb.toString();
307-
}
293+
String content = SafeFileReader.read(resolvedReal, startLine, endLine, config.getMaxFileBytes());
308294
return ResponseEntity.ok()
309295
.contentType(MediaType.TEXT_PLAIN)
310296
.body(content);
297+
} catch (SafeFileReader.FileTooLargeException tooLarge) {
298+
return ResponseEntity.status(HttpStatus.CONTENT_TOO_LARGE)
299+
.contentType(MediaType.TEXT_PLAIN)
300+
.body(tooLarge.getMessage());
311301
} catch (IOException e) {
312302
return ResponseEntity.status(500)
313303
.contentType(MediaType.TEXT_PLAIN)
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package io.github.randomcodespace.iq.api;
2+
3+
import java.io.BufferedReader;
4+
import java.io.IOException;
5+
import java.nio.charset.StandardCharsets;
6+
import java.nio.file.Files;
7+
import java.nio.file.Path;
8+
9+
/**
10+
* Reads files for the read-only serving layer with a hard byte cap.
11+
*
12+
* <p>The two entry points that surface repo content over HTTP — {@code GET /api/file}
13+
* and the {@code read_file} MCP tool — must never load unbounded content into the JVM
14+
* heap; a multi-GB file would OOM the serving process and become a trivial DoS vector.
15+
*
16+
* <p>Behaviour:
17+
* <ul>
18+
* <li>Without a line range, the file's on-disk size is checked first and the read
19+
* is rejected if it exceeds the cap.</li>
20+
* <li>With a {@code startLine}/{@code endLine} range, the file is read line-by-line
21+
* via a {@link BufferedReader}; only lines in range are retained and the
22+
* accumulated UTF-8 byte count is capped the same way.</li>
23+
* </ul>
24+
*/
25+
public final class SafeFileReader {
26+
27+
public static final class FileTooLargeException extends RuntimeException {
28+
private final long size;
29+
private final long max;
30+
31+
public FileTooLargeException(long size, long max) {
32+
super("File exceeds max size: " + size + " bytes (max " + max + " bytes)");
33+
this.size = size;
34+
this.max = max;
35+
}
36+
37+
public long size() { return size; }
38+
public long max() { return max; }
39+
}
40+
41+
private SafeFileReader() {}
42+
43+
public static String read(Path path, Integer startLine, Integer endLine, long maxBytes)
44+
throws IOException {
45+
if (startLine == null && endLine == null) {
46+
long size = Files.size(path);
47+
if (size > maxBytes) {
48+
throw new FileTooLargeException(size, maxBytes);
49+
}
50+
return Files.readString(path, StandardCharsets.UTF_8);
51+
}
52+
int start = Math.max(1, startLine != null ? startLine : 1);
53+
int end = endLine != null ? Math.max(start, endLine) : Integer.MAX_VALUE;
54+
StringBuilder sb = new StringBuilder();
55+
long accumulated = 0;
56+
boolean first = true;
57+
try (BufferedReader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
58+
String line;
59+
int idx = 0;
60+
while ((line = reader.readLine()) != null) {
61+
idx++;
62+
if (idx < start) continue;
63+
if (idx > end) break;
64+
long lineBytes = line.getBytes(StandardCharsets.UTF_8).length;
65+
long add = lineBytes + (first ? 0L : 1L);
66+
if (accumulated + add > maxBytes) {
67+
throw new FileTooLargeException(accumulated + add, maxBytes);
68+
}
69+
if (!first) sb.append('\n');
70+
sb.append(line);
71+
accumulated += add;
72+
first = false;
73+
}
74+
}
75+
return sb.toString();
76+
}
77+
}

src/main/java/io/github/randomcodespace/iq/config/CodeIqConfig.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ public class CodeIqConfig {
5050
/** Maximum lines per snippet returned in evidence packs (default 50). */
5151
private int maxSnippetLines = 50;
5252

53+
/** Maximum bytes read by the serving layer's /api/file and MCP read_file (default 5 MiB). */
54+
private long maxFileBytes = 5L * 1024L * 1024L;
55+
5356
public static class Graph {
5457
private String path = ".codeiq/graph/graph.db";
5558

@@ -152,4 +155,12 @@ public int getMaxSnippetLines() {
152155
void setMaxSnippetLines(int maxSnippetLines) {
153156
this.maxSnippetLines = Math.max(1, maxSnippetLines);
154157
}
158+
159+
public long getMaxFileBytes() {
160+
return maxFileBytes;
161+
}
162+
163+
void setMaxFileBytes(long maxFileBytes) {
164+
this.maxFileBytes = Math.max(1L, maxFileBytes);
165+
}
155166
}

src/main/java/io/github/randomcodespace/iq/config/UnifiedConfigAdapter.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ public static CodeIqConfig toCodeIqConfig(CodeIqUnifiedConfig u) {
5858
if (u.serving().readOnly() != null) {
5959
c.setReadOnly(u.serving().readOnly());
6060
}
61+
if (u.serving().maxFileBytes() != null) {
62+
c.setMaxFileBytes(u.serving().maxFileBytes());
63+
}
6164
if (u.serving().neo4j() != null && u.serving().neo4j().dir() != null) {
6265
CodeIqConfig.Graph graph = new CodeIqConfig.Graph();
6366
graph.setPath(u.serving().neo4j().dir());

src/main/java/io/github/randomcodespace/iq/config/unified/ConfigDefaults.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public static CodeIqUnifiedConfig builtIn() {
3030
8080,
3131
"0.0.0.0",
3232
false,
33+
5L * 1024L * 1024L, // maxFileBytes — 5 MiB cap on /api/file + read_file
3334
new Neo4jConfig(
3435
".codeiq/graph/graph.db",
3536
256, 256, 1024

src/main/java/io/github/randomcodespace/iq/config/unified/ConfigMerger.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ private IndexingConfig mergeIndexing(IndexingConfig lo, IndexingConfig hi, Input
6666

6767
private ServingConfig mergeServing(ServingConfig lo, ServingConfig hi, Input l, Map<String,ConfigProvenance> p) {
6868
return new ServingConfig(
69-
take("serving.port", lo.port(), hi.port(), l, p),
70-
take("serving.bind_address", lo.bindAddress(), hi.bindAddress(), l, p),
71-
take("serving.read_only", lo.readOnly(), hi.readOnly(), l, p),
69+
take("serving.port", lo.port(), hi.port(), l, p),
70+
take("serving.bind_address", lo.bindAddress(), hi.bindAddress(), l, p),
71+
take("serving.read_only", lo.readOnly(), hi.readOnly(), l, p),
72+
take("serving.max_file_bytes", lo.maxFileBytes(), hi.maxFileBytes(), l, p),
7273
new Neo4jConfig(
7374
take("serving.neo4j.dir", lo.neo4j().dir(), hi.neo4j().dir(), l, p),
7475
take("serving.neo4j.page_cache_mb", lo.neo4j().pageCacheMb(), hi.neo4j().pageCacheMb(), l, p),

src/main/java/io/github/randomcodespace/iq/config/unified/EnvVarOverlay.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public static CodeIqUnifiedConfig from(Map<String, String> env) {
2121
pageMb = null, heapInit = null, heapMax = null,
2222
maxDepth = null, maxRadius = null, maxFiles = null, maxSnippetLines = null,
2323
parallelism = null;
24-
Long maxPayload = null;
24+
Long maxPayload = null, servingMaxFileBytes = null;
2525
Boolean readOnly = null, incremental = null, metrics = null, tracing = null, mcpEnabled = null;
2626
String cacheDir = null, bindAddr = null, projectName = null, projectRoot = null,
2727
projectServiceName = null,
@@ -56,6 +56,7 @@ public static CodeIqUnifiedConfig from(Map<String, String> env) {
5656
case "SERVING_PORT" -> port = Integer.parseInt(v);
5757
case "SERVING_BINDADDRESS" -> bindAddr = v;
5858
case "SERVING_READONLY" -> readOnly = Boolean.parseBoolean(v);
59+
case "SERVING_MAXFILEBYTES" -> servingMaxFileBytes = Long.parseLong(v);
5960
case "SERVING_NEO4J_DIR" -> neo4jDir = v;
6061
case "SERVING_NEO4J_PAGECACHEMB" -> pageMb = Integer.parseInt(v);
6162
case "SERVING_NEO4J_HEAPINITIALMB" -> heapInit = Integer.parseInt(v);
@@ -90,7 +91,7 @@ public static CodeIqUnifiedConfig from(Map<String, String> env) {
9091
new ProjectConfig(projectName, projectRoot, projectServiceName, List.of()),
9192
new IndexingConfig(languages, include, exclude, incremental, cacheDir, parallelism, batch,
9293
maxDepth, maxRadius, maxFiles, maxSnippetLines, parsers),
93-
new ServingConfig(port, bindAddr, readOnly,
94+
new ServingConfig(port, bindAddr, readOnly, servingMaxFileBytes,
9495
new Neo4jConfig(neo4jDir, pageMb, heapInit, heapMax)),
9596
new McpConfig(mcpEnabled, mcpTransport, mcpBasePath,
9697
new McpAuthConfig(mcpMode, mcpTokenEnv),
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
package io.github.randomcodespace.iq.config.unified;
2-
public record ServingConfig(Integer port, String bindAddress, Boolean readOnly, Neo4jConfig neo4j) {
3-
public static ServingConfig empty() { return new ServingConfig(null, null, null, Neo4jConfig.empty()); }
2+
public record ServingConfig(Integer port, String bindAddress, Boolean readOnly, Long maxFileBytes, Neo4jConfig neo4j) {
3+
public static ServingConfig empty() { return new ServingConfig(null, null, null, null, Neo4jConfig.empty()); }
44
}

src/main/java/io/github/randomcodespace/iq/config/unified/UnifiedConfigLoader.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ private static ServingConfig servingFrom(Map<String, Object> m, Path path, Set<S
126126
requireIntOrNull(m.get("port"), path, "serving.port"),
127127
(String) pick(m, "serving", "bind_address", "bindAddress", path, warned),
128128
(Boolean) pick(m, "serving", "read_only", "readOnly", path, warned),
129+
requireLongOrNull(pick(m, "serving", "max_file_bytes", "maxFileBytes", path, warned),
130+
path, "serving.max_file_bytes"),
129131
n4j);
130132
}
131133

0 commit comments

Comments
 (0)