Skip to content

Commit d1365fe

Browse files
solodmdaltaywtf
andauthored
fix: handle ENOSPC file watcher errors gracefully (openclaw#73357)
Merged via squash. Prepared head SHA: ce2dd6e Co-authored-by: solodmd <51304754+solodmd@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf
1 parent 4781b46 commit d1365fe

5 files changed

Lines changed: 230 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ Docs: https://docs.openclaw.ai
114114
- Parallels/Windows update smoke: escape the stale post-swap import regex in the generated PowerShell script so expected `ERR_MODULE_NOT_FOUND` update handoffs continue to post-update health checks. (#75315)
115115
- Slack: allow draft preview streaming in top-level DMs when `replyToMode` is `off` while keeping Slack native streaming and assistant thread status gated on reply threads. Fixes #56480. (#56544) Thanks @HangGlidersRule.
116116
- Control UI/chat: remove the delete-confirm popover outside-click listener on every dismiss path, so Cancel, Delete, outside clicks, and same-button toggles no longer leave stale document listeners behind. Refs #75590 and #69982. Thanks @Ricardo-M-L.
117+
- Memory-core: treat exhausted file watcher limits as non-fatal for builtin memory auto-sync while preserving fatal handling for unrelated disk-full errors. (#73357) Thanks @solodmd.
117118

118119
## 2026.5.2
119120

extensions/memory-core/src/memory/manager-sync-ops.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,12 @@ export abstract class MemoryManagerSyncOps {
446446
this.watcher.on("change", markDirty);
447447
this.watcher.on("unlink", markDirty);
448448
this.watcher.on("unlinkDir", markDirty);
449+
this.watcher.on("error", (err) => {
450+
// File watcher errors (e.g., ENOSPC) should not crash the gateway.
451+
// Log the error and continue - memory search still works without auto-sync.
452+
const message = err instanceof Error ? err.message : String(err);
453+
log.warn(`memory watcher error: ${message}`);
454+
});
449455
}
450456

451457
protected ensureSessionListener() {

extensions/memory-core/src/memory/manager.watcher-config.test.ts

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from "vites
99

1010
type WatchIgnoredFn = (watchPath: string, stats?: { isDirectory?: () => boolean }) => boolean;
1111

12-
const { createdWatchers, watchMock } = vi.hoisted(() => {
13-
type WatchEvent = "add" | "change" | "unlink" | "unlinkDir";
14-
type WatchCallback = () => void;
12+
const { createdWatchers, memoryLoggerWarn, watchMock } = vi.hoisted(() => {
13+
type WatchEvent = "add" | "change" | "unlink" | "unlinkDir" | "error";
14+
type WatchCallback = (value?: unknown) => void;
1515
function createMockWatcher() {
1616
const handlers = new Map<WatchEvent, WatchCallback[]>();
1717
const watcher = {
@@ -20,9 +20,9 @@ const { createdWatchers, watchMock } = vi.hoisted(() => {
2020
return watcher;
2121
}),
2222
close: vi.fn(async () => undefined),
23-
emit: (event: WatchEvent) => {
23+
emit: (event: WatchEvent, value?: unknown) => {
2424
for (const callback of handlers.get(event) ?? []) {
25-
callback();
25+
callback(value);
2626
}
2727
},
2828
};
@@ -31,6 +31,7 @@ const { createdWatchers, watchMock } = vi.hoisted(() => {
3131
const watchers: Array<ReturnType<typeof createMockWatcher>> = [];
3232
const result = {
3333
createdWatchers: watchers,
34+
memoryLoggerWarn: vi.fn(),
3435
watchMock: vi.fn(() => {
3536
const watcher = createMockWatcher();
3637
watchers.push(watcher);
@@ -42,6 +43,18 @@ const { createdWatchers, watchMock } = vi.hoisted(() => {
4243
return result;
4344
});
4445

46+
vi.mock("openclaw/plugin-sdk/memory-core-host-engine-foundation", async (importOriginal) => {
47+
const actual =
48+
await importOriginal<typeof import("openclaw/plugin-sdk/memory-core-host-engine-foundation")>();
49+
return {
50+
...actual,
51+
createSubsystemLogger: (subsystem: string) => ({
52+
...actual.createSubsystemLogger(subsystem),
53+
warn: memoryLoggerWarn,
54+
}),
55+
};
56+
});
57+
4558
vi.mock("./sqlite-vec.js", () => ({
4659
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
4760
}));
@@ -246,4 +259,16 @@ describe("memory watcher config", () => {
246259
expect(syncSpy).toHaveBeenCalledWith({ reason: "watch" });
247260
},
248261
);
262+
263+
it("attaches a logging non-throwing watcher error listener", async () => {
264+
await setupWatcherWorkspace({ name: "notes.md", contents: "hello" });
265+
const cfg = createWatcherConfig();
266+
267+
await expectWatcherManager(cfg);
268+
269+
const watcher = createdWatchers[0];
270+
expect(watcher?.on).toHaveBeenCalledWith("error", expect.any(Function));
271+
expect(() => watcher?.emit("error", new Error("watcher error: ENOSPC"))).not.toThrow();
272+
expect(memoryLoggerWarn).toHaveBeenCalledWith("memory watcher error: watcher error: ENOSPC");
273+
});
249274
});

src/infra/unhandled-rejections.test.ts

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
22
import {
33
isAbortError,
44
isBenignUncaughtExceptionError,
5+
isTransientFileWatchError,
56
isTransientNetworkError,
67
isTransientSqliteError,
78
isTransientUnhandledRejectionError,
@@ -258,6 +259,104 @@ describe("isTransientSqliteError", () => {
258259
});
259260
});
260261

262+
describe("isTransientFileWatchError", () => {
263+
it("returns true for ENOSPC with inotify message", () => {
264+
const error = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" });
265+
expect(isTransientFileWatchError(error)).toBe(true);
266+
});
267+
268+
it("returns true for ENOSPC with file watcher message", () => {
269+
const error = Object.assign(new Error("System limit for number of file watchers reached"), {
270+
code: "ENOSPC",
271+
});
272+
expect(isTransientFileWatchError(error)).toBe(true);
273+
});
274+
275+
it("returns true for ENOSPC with watcher error message", () => {
276+
const error = Object.assign(new Error("watcher error: ENOSPC"), { code: "ENOSPC" });
277+
expect(isTransientFileWatchError(error)).toBe(true);
278+
});
279+
280+
it("returns false for ENOSPC without watch indicator (general disk full)", () => {
281+
const error = Object.assign(new Error("write failed: no space left on device"), {
282+
code: "ENOSPC",
283+
});
284+
expect(isTransientFileWatchError(error)).toBe(false);
285+
});
286+
287+
it("returns false for ENOSPC with only 'disk full' message", () => {
288+
const error = Object.assign(new Error("ENOSPC: disk full"), { code: "ENOSPC" });
289+
expect(isTransientFileWatchError(error)).toBe(false);
290+
});
291+
292+
it("returns false for message-only disk full without watch indicator", () => {
293+
expect(isTransientFileWatchError(new Error("write failed: no space left on device"))).toBe(
294+
false,
295+
);
296+
expect(isTransientFileWatchError(new Error("ENOSPC: no space left on device"))).toBe(false);
297+
});
298+
299+
it("returns true for 'no space left on device' message with watcher context", () => {
300+
const error = new Error("file watcher: no space left on device");
301+
expect(isTransientFileWatchError(error)).toBe(true);
302+
});
303+
304+
it("returns true for inotify-related error messages", () => {
305+
expect(isTransientFileWatchError(new Error("inotify watches exhausted"))).toBe(true);
306+
expect(
307+
isTransientFileWatchError(new Error("System limit for number of file watchers reached")),
308+
).toBe(true);
309+
});
310+
311+
it("returns true for watcher-related no-space messages", () => {
312+
expect(isTransientFileWatchError(new Error("file watcher: no space left on device"))).toBe(
313+
true,
314+
);
315+
});
316+
317+
it("returns false for generic code-less watcher messages", () => {
318+
expect(isTransientFileWatchError(new Error("file watcher failed"))).toBe(false);
319+
expect(isTransientFileWatchError(new Error("watcher error: boom"))).toBe(false);
320+
expect(isTransientFileWatchError(new Error("watcher error: ENOSPC"))).toBe(false);
321+
expect(isTransientUnhandledRejectionError(new Error("file watcher failed"))).toBe(false);
322+
expect(isTransientUnhandledRejectionError(new Error("watcher error: boom"))).toBe(false);
323+
expect(isTransientUnhandledRejectionError(new Error("watcher error: ENOSPC"))).toBe(false);
324+
});
325+
326+
it("returns true for ENOSPC with cause chain containing watch indicator", () => {
327+
const cause = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" });
328+
const error = Object.assign(new Error("watcher failed"), { cause });
329+
expect(isTransientFileWatchError(error)).toBe(true);
330+
});
331+
332+
it("returns false for 'watchdog timeout' (unrelated watch error)", () => {
333+
expect(isTransientFileWatchError(new Error("watchdog timeout"))).toBe(false);
334+
expect(isTransientFileWatchError(new Error("cannot watch process"))).toBe(false);
335+
});
336+
337+
it("returns false for regular errors without file watch indicators", () => {
338+
expect(isTransientFileWatchError(new Error("Something went wrong"))).toBe(false);
339+
expect(isTransientFileWatchError(new TypeError("Cannot read property"))).toBe(false);
340+
expect(isTransientFileWatchError(new RangeError("Invalid array length"))).toBe(false);
341+
});
342+
343+
it("returns false for other disk errors without ENOSPC", () => {
344+
expect(isTransientFileWatchError(new Error("disk quota exceeded"))).toBe(false);
345+
expect(
346+
isTransientFileWatchError(
347+
Object.assign(new Error("read only file system"), { code: "EROFS" }),
348+
),
349+
).toBe(false);
350+
});
351+
352+
it.each([null, undefined, "string error", 42, { message: "plain object" }])(
353+
"returns false for non-file-watch input %#",
354+
(value) => {
355+
expect(isTransientFileWatchError(value)).toBe(false);
356+
},
357+
);
358+
});
359+
261360
describe("isTransientUnhandledRejectionError", () => {
262361
it("treats raw pre-connect network uncaught exceptions as benign", () => {
263362
const epipe = Object.assign(new Error("write EPIPE"), { code: "EPIPE" });
@@ -287,4 +386,30 @@ describe("isTransientUnhandledRejectionError", () => {
287386

288387
expect(isTransientUnhandledRejectionError(error)).toBe(true);
289388
});
389+
390+
it("returns true for transient file watcher errors (ENOSPC + inotify)", () => {
391+
const error = Object.assign(new Error("inotify watches exhausted"), { code: "ENOSPC" });
392+
expect(isTransientUnhandledRejectionError(error)).toBe(true);
393+
});
394+
395+
it("returns true for file watcher errors with message only", () => {
396+
const error = new Error("System limit for number of file watchers reached");
397+
expect(isTransientUnhandledRejectionError(error)).toBe(true);
398+
});
399+
400+
it("returns false for ENOSPC without watch indicator (general disk full)", () => {
401+
const error = Object.assign(new Error("write failed: no space left on device"), {
402+
code: "ENOSPC",
403+
});
404+
expect(isTransientUnhandledRejectionError(error)).toBe(false);
405+
});
406+
407+
it("returns false for code-less disk full messages without watch indicator", () => {
408+
expect(
409+
isTransientUnhandledRejectionError(new Error("write failed: no space left on device")),
410+
).toBe(false);
411+
expect(isTransientUnhandledRejectionError(new Error("ENOSPC: no space left on device"))).toBe(
412+
false,
413+
);
414+
});
290415
});

src/infra/unhandled-rejections.ts

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,75 @@ export function isTransientSqliteError(err: unknown): boolean {
350350
return false;
351351
}
352352

353+
/**
354+
* Checks if an error is a transient file watcher error that shouldn't crash the gateway.
355+
* These are typically resource exhaustion issues (e.g., inotify watches exhausted) that
356+
* can be recovered from by degrading to manual sync mode.
357+
*
358+
* Note: ENOSPC is a general POSIX error code (disk full, write failures, etc.).
359+
* To avoid misclassifying unrelated storage failures, we require both the ENOSPC code
360+
* AND a watch/inotify-related message indicator, similar to how hasSqliteSignal gates
361+
* SQLite errors.
362+
*/
363+
export function isTransientFileWatchError(err: unknown): boolean {
364+
if (!err) {
365+
return false;
366+
}
367+
368+
const hasFileWatchSignal = (message: string) =>
369+
message.includes("inotify") ||
370+
message.includes("watcher") ||
371+
message.includes("file watcher") ||
372+
message.includes("watch limit") ||
373+
message.includes("max watches");
374+
const hasFileWatchExhaustionSignal = (message: string) =>
375+
message.includes("inotify watches") ||
376+
message.includes("inotify watch") ||
377+
message.includes("system limit for number of file watchers") ||
378+
message.includes("watch limit") ||
379+
message.includes("max watches");
380+
381+
for (const candidate of collectNestedUnhandledErrorCandidates(err)) {
382+
// Skip non-object candidates early
383+
if (!candidate || typeof candidate !== "object") {
384+
continue;
385+
}
386+
387+
const code = extractErrorCodeOrErrno(candidate);
388+
const rawMessage =
389+
"message" in candidate && typeof candidate.message === "string" ? candidate.message : "";
390+
const message = normalizeLowercaseStringOrEmpty(rawMessage);
391+
392+
// ENOSPC requires both the code AND a watch/inotify message indicator
393+
// to avoid misclassifying general disk-full errors as transient watcher errors.
394+
if (code === "ENOSPC") {
395+
if (hasFileWatchSignal(message)) {
396+
return true;
397+
}
398+
// ENOSPC without watch indicator is not classified here
399+
continue;
400+
}
401+
402+
// Without an ENOSPC code, only classify explicit watcher resource exhaustion.
403+
// Generic "file watcher failed" labels can wrap permission/config/runtime failures.
404+
if (!message) {
405+
continue;
406+
}
407+
if (
408+
(message.includes("no space left on device") && hasFileWatchSignal(message)) ||
409+
hasFileWatchExhaustionSignal(message)
410+
) {
411+
return true;
412+
}
413+
}
414+
415+
return false;
416+
}
417+
353418
export function isTransientUnhandledRejectionError(err: unknown): boolean {
354-
return isTransientNetworkError(err) || isTransientSqliteError(err);
419+
return (
420+
isTransientNetworkError(err) || isTransientSqliteError(err) || isTransientFileWatchError(err)
421+
);
355422
}
356423

357424
function isBenignUncaughtNetworkException(err: unknown): boolean {

0 commit comments

Comments
 (0)