build: Installed ast_grep in the sandbox kit definition

docs: Updated the README to mention the installation of ast-grep
docs: added the ast_grep tool to the config.example.yaml
2026-07-04 13:14:22 -06:00 · 2026-07-04 13:14:08 -06:00 · 2026-07-04 13:13:16 -06:00 · 2026-07-04 13:02:50 -06:00 · 2026-07-04 12:59:05 -06:00 · 2026-07-04 12:50:37 -06:00
78 changed files with 9073 additions and 524 deletions
@@ -1,3 +1,82 @@
 ## v0.7.4 (2026-07-02)
 ### Feat
 - Pin specific usql version to sbx kit
 - recursively take ownership over the copied in coyote config for the sbx
 - explicitly specify the COYOTE_CONFIG_DIR in the sbx kit
 - --tail-logs can track log rollovers and incoporates a sleep timer to minimize idle CPU cycles
 - Added support for log rolling so log files don't just blow up over time
 ### Fix
 - Added back in --kit specification for the running of the sbx
 - sbx isn't copying base files in their respective directories
 - Update deprecated sbx kit config
 - Properly chown the coyote config recursively and password file in the sbx
 ## v0.7.3 (2026-06-24)
 ### Fix
 - apply bootstrapping of functions at startup to fix edge case
 ## v0.7.2 (2026-06-19)
 ### Fix
 - usql version upgrade
 ## v0.7.1 (2026-06-19)
 ### Fix
 - sbx mixins must be passed in directories, not as files and the files must be named spec.yaml per new sbx version
 ## v0.7.0 (2026-06-18)
 ### Feat
 - added configurable cache path via the COYOTE_CACHE_PATH environment variable
 - added a memory option to .set tab completions
 - Added a diagnostic .info tools subcommand to make it easier to see what tools are enabled in all contexts
 - Added additional info outputs for enabled skills and sbx directories
 - directly execute shell commands from within the REPL
 - created mixin kit for built-in functions and MCP servers
 - Added sbx mixins for the secrets providers so users can also bootstrap those as well.
 - added support for loading sbx mixins that are dynamically discovered in the users workspace and config directory
 - Added a --fresh flag to let users create a truly bare bones sandbox without bootstrapping their config
 - initial built-in sandboxing support powered by Docker sbx
 - Added the ability to auto-bootstrap workspace memory when in git repos
 - Added explicit guardrail handling for pending agents
 - auto-append memory to memory index and don't necessarily require the LLM to remember to do it after a write
 - Added an --init-memory [global|workspace] flag to easily and quickly enable memory
 - added memory global configuration settings to the output of --info and .info
 - added .set memory REPL commands to control memory injection and applied formatting
 - Create the built-in memory management tools
 - Append the memory system prompts (readonly or r/w) to the system prompt when applicable
 - Created the --no-memory CLI flag to disable memory for this invocation
 - Added the memory configuration properties and storage to the main app config, roles, sessions, and agents.
 - initial scaffolding of a memory system
 ### Fix
 - rebuild the tool scope after dynamically updating the skills_enabled value in the REPL
 - properly resolve Windows-based local vault password file locations and bootstrap them into the sandbox when possible
 - auto-translation of user-prefixed Mac and Linux paths for the vault password file when running inside a sandbox
 - don't attempt to auto complete .vault list in the REPL; that's the end of the command
 - buffer tool stdout as well as stderr so that any tools that error to stdout are captured and included in the response to the model, enabling the model to see what went wrong and to reason about how to fix it.
 - auto-bootstrapped memory was accidentally putting the MEMORY.md directly in the repo root rather than .coyote/memory/MEMORY.md
 - improved the fs_patch script description and added improved error handling to it.
 - added in forgotten require_max_tokens to the fable model
 - append memory functions to non-graph based agents on init
 - when auto_continue is disabled via the .set auto_continue false command, it should strip the todo functions from the list of functions
 - use rawPredict for non-streaming Claude requests
 ### Refactor
 - Migrated the .skills command completion to use StateFlags and updated the help messages
 ## v0.6.0 (2026-06-05)
 ### Feat
@@ -1,6 +1,6 @@
 [package]
 name = "coyote-ai"
-version = "0.6.0"
+version = "0.7.4"
 edition = "2024"
 authors = ["Alex Clarke <alex.j.tusa@gmail.com>"]
 description = "An all-in-one, batteries included LLM CLI Tool"
@@ -49,7 +49,7 @@ textwrap = "0.16.0"
 ansi_colours = "1.2.2"
 eventsource-stream = "0.2.3"
 log = "0.4.28"
-log4rs = { version = "1.4.0", features = ["file_appender"] }
+log4rs = { version = "1.4.0", features = ["file_appender", "rolling_file_appender", "compound_policy", "fixed_window_roller", "size_trigger"] }
 shell-words = "1.1.0"
 sha2 = "0.10.8"
 unicode-width = "0.2.0"
@@ -58,6 +58,8 @@ http = "1.1.0"
 indexmap = { version = "2.2.6", features = ["serde"] }
 hmac = "0.12.1"
 aws-smithy-eventstream = "0.60.4"
 aws-smithy-types = "=1.4.9"
 time = "=0.3.47"
 urlencoding = "2.1.3"
 json-patch = { version = "4.0.0", default-features = false }
 bitflags = "2.5.0"
@@ -25,6 +25,7 @@ Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration g
 * [REPL](https://github.com/Dark-Alex-17/coyote/wiki/REPL): Interactive Read-Eval-Print Loop for conversational interactions with LLMs and Coyote.
  * [Custom REPL Prompt](https://github.com/Dark-Alex-17/coyote/wiki/REPL-Prompt): Customize the REPL prompt to provide useful contextual information.
 * [Vault](https://github.com/Dark-Alex-17/coyote/wiki/Vault): Securely store and manage sensitive information such as API keys and credentials.
 * [Sandboxes](https://github.com/Dark-Alex-17/coyote/wiki/Sandboxes): Launch Coyote inside an isolated [Docker Sandbox](https://docs.docker.com/ai/sandboxes/) with one command. Host config and vault credentials are projected in automatically; everything else is delegated to the `sbx` CLI.
 * [Shell Integrations](https://github.com/Dark-Alex-17/coyote/wiki/Shell-Integrations): Seamlessly integrate Coyote with your shell environment for enhanced command-line assistance.
 * [Function Calling](https://github.com/Dark-Alex-17/coyote/wiki/Tools): Leverage function calling capabilities to extend Coyote's functionality with custom tools
    * [Creating Custom Tools](https://github.com/Dark-Alex-17/coyote/wiki/Custom-Tools): You can create your own custom tools to enhance Coyote's capabilities.
@@ -36,6 +37,7 @@ Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration g
 * [Macros](https://github.com/Dark-Alex-17/coyote/wiki/Macros): Automate repetitive tasks and workflows with Coyote "scripts" (macros).
 * [RAG](https://github.com/Dark-Alex-17/coyote/wiki/RAG): Retrieval-Augmented Generation for enhanced information retrieval and generation.
 * [Sessions](https://github.com/Dark-Alex-17/coyote/wiki/Sessions): Manage and persist conversational contexts and settings across multiple interactions.
 * [Memory](https://github.com/Dark-Alex-17/coyote/wiki/Memory): Persistent file-based memory that survives across sessions. Bootstrap with `coyote --init-memory [global|workspace]`.
 * [Roles](https://github.com/Dark-Alex-17/coyote/wiki/Roles): Customize model behavior for specific tasks or domains.
 * [Skills](https://github.com/Dark-Alex-17/coyote/wiki/Skills): Modular knowledge or capability packs the LLM can load and unload mid-conversation. Multiple skills compose; instructions stack, tools and MCPs union.
 * [Agents](https://github.com/Dark-Alex-17/coyote/wiki/Agents): Leverage AI agents to perform complex tasks and workflows, including sub-agent spawning, teammate messaging, and user interaction tools.
@@ -57,6 +59,14 @@ Coyote requires the following tools to be installed on your system:
 * [docker](https://docs.docker.com/engine/install/)
 * [uv](https://docs.astral.sh/uv/getting-started/installation/)
    * `curl -LsSf https://astral.sh/uv/install.sh | sh`
 * [iwe](https://github.com/iwe-org/iwe) (`iwec`, for the built-in `iwe` MCP server that navigates large markdown knowledgebases)
    * **Homebrew:** `brew tap iwe-org/iwe && brew install iwe`
    * **Cargo:** `cargo install iwec`
 * [ast-grep](https://ast-grep.github.io/) (for the built-in `ast_grep` structural code search tool, used by the `explore` agent)
    * **Homebrew:** `brew install ast-grep`
    * **Cargo:** `cargo install ast-grep --locked`
    * **npm:** `npm i -g @ast-grep/cli`
    * Optional: if `ast-grep` is not installed, the `ast_grep` tool reports it and agents fall back to `fs_grep`
 These tools are used to provide various functionalities within Coyote, such as document processing, JSON manipulation,
 etc., and they are used within agents and tools.
@@ -96,7 +106,7 @@ You can use the following command to run a bash script that downloads and instal
 OS (Linux/MacOS) and architecture (x86_64/arm64):
 ```shell
-curl -fsSL https://raw.githubusercontent.com/Dark-Alex-17/coyote/main/install_coyote.sh | bash
+curl -fsSL https://raw.githubusercontent.com/Dark-Alex-17/coyote/refs/heads/main/scripts/install_coyote.sh | bash
 ```
 #### Windows/Linux/MacOS (`PowerShell`)
@@ -104,7 +114,7 @@ You can use the following command to run a PowerShell script that downloads and
 for your OS (Windows/Linux/MacOS) and architecture (x86_64/arm64):
 ```powershell
-powershell -NoProfile -ExecutionPolicy Bypass -Command "iwr -useb https://raw.githubusercontent.com/Dark-Alex-17/coyote/main/scripts/install_coyote.ps1 | iex"
+powershell -NoProfile -ExecutionPolicy Bypass -Command "iwr -useb https://raw.githubusercontent.com/Dark-Alex-17/coyote/refs/heads/main/scripts/install_coyote.ps1 | iex"
 ```
 ### Manual
@@ -1,6 +1,6 @@
 name: explore
 description: Fast codebase exploration agent - finds patterns, structures, and relevant files. Designed to be fanned out 2-5 in parallel by orchestrators.
-version: 3.0.0
+version: 3.1.0
 skills_enabled: true
 enabled_skills:
@@ -19,6 +19,7 @@ global_tools:
  - fs_grep.sh
  - fs_glob.sh
  - fs_ls.sh
  - ast_grep.sh
 instructions: |
  You are a codebase explorer. Your job: Search, find, report. Nothing else.
@@ -49,6 +50,8 @@ instructions: |
  4. **Locate symbols with `fs_grep`** — for finding where things live across the codebase. `fs_grep --pattern "fn handle_request" --include "*.rs"` is faster than reading files.
  4b. **Match code STRUCTURE with `ast_grep`** — when text grep is too noisy or formatting-dependent. It matches syntax trees: `ast_grep --pattern '$X.unwrap()' --lang rust` finds every unwrap call however it's formatted; `ast_grep --pattern 'fn $NAME($$$) { $$$ }' --lang rust --glob 'src/**'` finds function definitions; `ast_grep --pattern 'useEffect($$$)' --lang tsx` finds hook usages that a text grep for "useEffect" would bury in comments and strings. Meta-variables: `$NAME` = one AST node, `$$$` = zero or more. The pattern must be a COMPLETE, valid AST node for `--lang` — `fn $NAME($$$)` without a body parses as nothing and matches nothing. Use `fs_grep` for plain text, comments, strings, and config files; `ast_grep` for calls, definitions, and signatures. If ast-grep isn't installed the tool says so — fall back to fs_grep.
  5. **Read targeted sections with `fs_read --offset/--limit`** — `fs_read --path "src/main.rs" --offset 50 --limit 30` reads lines 50-79 only. `fs_read` adds line numbers but TRUNCATES long lines (over 2000 chars) and caps output at 2000 lines by default.
  6. **Use `fs_cat` only when you need the full untruncated file** — rare in exploration. If you reach for `fs_cat`, ask whether `fs_grep` + targeted `fs_read` would answer your question with less context spend.
@@ -59,6 +62,7 @@ instructions: |
  - `fs_grep --pattern "struct User" --include "*.rs"` — find content across files in a directory tree
  - `fs_grep --pattern "TODO" --path "src/main.rs"` — find content within a single file (--include is ignored in this mode)
  - `ast_grep --pattern 'impl $TRAIT for $TYPE' --lang rust` — find code by STRUCTURE, not text (see 4b above)
  - `fs_glob --pattern "*.rs" --path src/` — find files by name pattern
  - `fs_read --path "src/main.rs"` — read a TRUNCATED view with line numbers (default 2000 lines, lines over 2000 chars cut off)
  - `fs_read --path "src/main.rs" --offset 100 --limit 50` — read lines 100-149 only (line numbers; truncation rules still apply)
@@ -1,11 +1,14 @@
 name: oracle
 description: High-IQ advisor for architecture, debugging, and complex decisions. Blocking by design - the orchestrator is waiting on you.
-version: 2.0.0
+version: 2.1.0
 skills_enabled: true
 enabled_skills:
  - code-review
  - ai-slop-remover
  - plan-review
  - plan-authoring
  - iwe-knowledge-base
 variables:
  - name: project_dir
@@ -46,13 +49,16 @@ instructions: |
  3. **Code review** — evaluating proposed designs or implementations.
  4. **Risk assessment** — security, performance, reliability concerns.
  5. **Multi-component questions** — anything spanning 3+ files or modules.
  6. **Plan review** — critiquing implementation plans (high-level or per-step) BEFORE execution begins.
  ## Skills available
-  Two skills are available to you. Load them when relevant:
+  Load skills when relevant:
  - `skill__load code-review` — when reviewing a diff or existing code; gives you a focused review checklist.
  - `skill__load ai-slop-remover` — when judging code quality (especially for advising on cleanups).
  - `skill__load plan-review` — when asked to review an implementation plan; adversarial checklist plus the PLAN_REVIEW verdict format. Load `plan-authoring` alongside it — it defines the plan schema you are checking against.
  - `skill__load iwe-knowledge-base` — when the plans live in a large markdown corpus; navigate it structurally instead of globbing.
  Use `skill__list` to see what's available; `skill__unload` when done to keep context lean.
@@ -91,6 +97,8 @@ instructions: |
  ORACLE_COMPLETE
  ```
  Exception: for plan reviews, use the `PLAN_REVIEW: OKAY` / `PLAN_REVIEW: REJECT` verdict format from the `plan-review` skill as the body, then end with `ORACLE_COMPLETE` on the final line as usual.
  ## Rules
  1. **Never modify files** — you advise, others implement.
@@ -16,6 +16,21 @@ Sisyphus acts as the primary entry point, capable of handling complex tasks by c
 - 💻 **CLI Coding**: Provides a natural language interface for writing and editing code.
 - 🔄 **Task Management**: Tracks progress and context across complex operations.
 - 🛠️ **Tool Integration**: Seamlessly uses system tools for building, testing, and file manipulation.
 - 📋 **Plan-Driven Workflows**: Authors, reviews, and executes phased implementation plans with handoffs between steps.
 ## Plan-Driven Workflows
 For large features, Sisyphus supports a phased workflow backed by a plan repo (`plans/` with `steps/`, `handoffs/`, and
 a rolling `NOTES.md`):
 1. **Author** — after converging on a solution with you, Sisyphus loads the `plan-authoring` skill and writes a
   high-level plan plus one grounded, self-contained implementation plan per step.
 2. **Review** — [Oracle](../oracle/README.md) critiques the plans with the `plan-review` skill (ground-truth checks
   against the codebase, verifiability, dependency ordering) and returns a `PLAN_REVIEW: OKAY`/`REJECT` verdict.
   Rejected plans are fixed before any code is written.
 3. **Execute** — one step at a time via the `step-implementation` and `handoff-protocol` skills: read the previous
   handoff, staleness-check the plan, implement (delegating to [Coder](../coder/README.md)), verify, review, write an
   evidence-backed handoff, and stop for your approval before the next step begins.
 ## Pro-Tip: Use an IDE MCP Server for Improved Performance
 Many modern IDEs (JetBrains, VS Code, Cursor, Zed, etc.) expose MCP servers that let LLMs use IDE tools directly. Using
@@ -1,6 +1,6 @@
 name: sisyphus
 description: OpenCode-style orchestrator - classifies intent, delegates to specialists, tracks progress with todos, enforces OMO-grade verification discipline
-version: 3.0.0
+version: 3.2.0
 agent_session: temp
 auto_continue: true
@@ -23,6 +23,10 @@ enabled_skills:
  - parallel-research
  - verification-gates
  - oracle-protocol
  - plan-authoring
  - step-implementation
  - handoff-protocol
  - iwe-knowledge-base
 variables:
  - name: project_dir
@@ -101,6 +105,9 @@ instructions: |
  | About to touch git history | `git-master` |
  | About to touch UI/components | `frontend-ui-ux` (also nudge delegates to load it) |
  | About to write any code | `ai-slop-remover` |
  | About to author a high-level plan or step plans | `plan-authoring` |
  | About to execute a step of a phased plan | `step-implementation` + `handoff-protocol` |
  | Navigating a plan repo or markdown knowledge base | `iwe-knowledge-base` |
  Load skills BEFORE the phase, not after. Unload when the phase ends if context is getting heavy. `skill__unload` keeps the context lean.
@@ -124,7 +131,8 @@ instructions: |
  | `explore` | Find patterns in THIS codebase, understand local code | Read-only, returns findings, fan out 2-5 in parallel |
  | `librarian` | Find official docs, OSS examples, web best practices for EXTERNAL libraries | Read-only, returns citation-backed findings, fan out 1-3 in parallel |
  | `coder` | Write/edit files, implement features | Graph agent: plan → approval → implement → verify build+tests → self_review → bounded fix-loop |
-  | `oracle` | Architecture, complex debugging, review | Advisory, blocking — never answer the user before collecting Oracle results |
+  | `oracle` | Architecture, complex debugging, review, plan review | Advisory, blocking — never answer the user before collecting Oracle results |
  | `step-runner` | Execute ONE step of a phased plan repo (Phase 8) | Graph agent: orient → staleness check → coder → verify → handoff → user approval gate |
  ### When to fire `librarian` (external grep) vs `explore` (internal grep)
@@ -312,6 +320,47 @@ instructions: |
  Never: leave code in broken state, continue hoping it'll work, delete failing tests to "pass," suppress errors to silence them.
  ## Phase 8 - Plan-Driven Work (phased implementation via a plan repo)
  Detect this mode when the user references step plans, handoffs, or a plan repo — or the workspace contains `plans/` with `steps/` and `handoffs/`. Plan-driven work has two lifecycles. Never mix them in one turn.
  ### Authoring lifecycle (no code changes)
  1. Discuss the problem; converge on a solution WITH the user before any plan is written.
  2. Load `plan-authoring`. Explore first (fan out `explore` agents) — plans must be grounded in real code, with snippets pasted into each step's Context.
  3. Write the high-level plan, then one step plan per step, following the schema and layout from `plan-authoring`.
  4. **Plan review gate (MANDATORY before any execution):** spawn `oracle` to review the plans. Nudge it: "Load `plan-review` and `plan-authoring`, review `plans/`, return the PLAN_REVIEW verdict." REJECT → fix the complaints, re-submit. Do not start execution on an unreviewed or rejected plan.
  5. Present the reviewed plan to the user for approval.
  ### Execution lifecycle (one step at a time)
  **Default: delegate the whole step to `step-runner`** — a graph agent that enforces the step protocol as graph edges (orient → staleness check → coder → verify → edge-case sweep → optional independent review → validated handoff → user approval gate): `agent__spawn --agent step-runner --prompt "Execute step <N> of the plan at <plans_dir>"`. It returns `STEP_COMPLETE` / `STEP_BLOCKED` / `STEP_REJECTED` / `STEP_FAILED`. Relay its escalations (deviation gate, approval gate) promptly. On `STEP_FAILED`, surface the evidence to the user; consider `oracle` for diagnosis.
  Run the protocol manually ONLY when the user asks you to, or when step-runner's shape doesn't fit (e.g. a docs-only step with nothing to build). Then:
  1. Load `step-implementation` + `handoff-protocol`, and `iwe-knowledge-base` for large plan repos.
  2. Follow the step protocol phase by phase: orient (previous handoff + `NOTES.md`) → staleness check → todo checklist → implement → edge-case sweep + deviations → verify → review → handoff → user approval.
  3. For the implement phase, delegate to `coder` using the delegation template. Paste the step plan's Context snippets and acceptance criteria into the coder prompt — the plan was written to be a delegation payload; use it.
  4. Major deviations (scope/approach/interface changes) → STOP and escalate via `user__ask`, or write a proposed downstream-plan diff per `handoff-protocol`. Never silently absorb them.
  5. **HARD STOP at the approval gate.** Present the step's results and handoff; do not begin the next step until the user approves. Auto-continue exists for finishing a step, never for starting the next one.
  ## Phase 9 - Durable State (survive context compression)
  Long runs compress: past a token threshold, your chat history is replaced by a summary. Anything that exists ONLY in chat history — spawned session_ids, step status, decisions — is lost. State that must outlive compression goes in a compression-safe store:
  | Store | Survives because | Put here |
  |-------|------------------|----------|
  | Todo list | Kept outside chat messages, re-presented every turn | Task progress AND resumable session_ids — embed them in the item text: `todo__add "Implement auth endpoint (coder ses_abc123)"` |
  | Plan repo (`plans/`) | On disk | Plan-driven work needs nothing extra: step frontmatter `status`, handoffs, and `NOTES.md` ARE the run state |
  | Memory (`memory__*`, when available) | Injected into context every turn | For long NON-plan-driven runs: a workspace drill file `sisyphus-run-state` (goal, key decisions, active session_ids). Set `expires` to tomorrow; delete it when the run completes |
  Rules:
  1. **Session_ids you may need to resume are never chat-only.** Record them in the todo item for that work the moment the spawn returns. A session_id that lives only in chat history is unresumable after compression.
  2. **Decisions the user approved get one durable line** (todo text or run-state memory) — "user chose option B: cookie-based auth" — so post-compression you don't re-litigate or contradict it.
  3. **Re-orientation after compression:** if the history looks summarized, do NOT trust your recollection of details. Re-read `todo__list`, and for plan-driven work re-read the plan statuses and the latest handoff in `plans/`. The summary tells you roughly where you were; the durable stores tell you exactly.
  4. Do not hoard: run state is not knowledge. Never bloat `MEMORY.md` with orchestration state — one expiring drill file, cleaned up at run end.
  ## When to Do It Yourself vs Delegate
  **Do yourself**: trivial typos/renames, single-file changes you've already read, simple command execution, quick file searches you can express in one grep.
@@ -0,0 +1,11 @@
 schemaVersion: '1'
 kind: mixin
 name: sisyphus-ddg
 description: >
  Allows Sisyphus to hit all domains since it utilizes the DuckDuckGo
  MCP server. This allows the MCP server to actually perform web searches
  on arbitrary domains and retrieve info for the agent.
 network:
  allowedDomains:
    - '*'
@@ -0,0 +1,93 @@
 # Step-Runner
 A graph-based agent that executes **one step** of a phased implementation
 plan, with the step protocol from the `step-implementation` skill enforced
 as graph edges rather than prose. Designed to be delegated to by
 **[Sisyphus](../sisyphus/README.md)**; delegates implementation to
 **[Coder](../coder/README.md)** and independent review to
 **[code-reviewer](../code-reviewer/README.md)**.
 It expects a plan repo authored per the `plan-authoring` skill:
 ```
 plans/
  steps/NN-<slug>.md    # step plans with frontmatter (step/title/depends_on/status)
  handoffs/NN-<slug>.md # written by this agent, validated by a deterministic gate
  NOTES.md              # rolling durable facts
 ```
 ## Workflow
 ```
 resolve_step (script)         locate plan + previous handoff, check depends_on,
        ↓                     mark plan in-progress   [→ gate_blocked if deps unsatisfied]
 orient (llm, read-only)       merge handoff directives + staleness-check the plan
        ↓
 route_staleness (script)      major deviation → gate_deviation (approval)
        ↓
 implement (agent → coder)     coder runs its own build/test/self-review fix-loop
        ↓
 route_coder_result (script)   COMPLETE → verify | REJECTED / FAILED → end
        ↓
 verify_format_lint (script)   format BEFORE evidence, then lint
 verify_build (script)         step-level build/typecheck
 verify_tests (script)         FULL test suite
        ↓                     [failures → fix_loop_gate, back-edge to implement]
 edge_case_sweep (llm)         missed edge cases; annotate downstream plans
        ↓                     (Edge cases sections ONLY - scope changes become proposals)
 route_sweep (script)          5+ files or architectural boundary → independent_review
 independent_review (agent)    code-reviewer; 🔴 findings loop back to implement (bounded)
        ↓
 write_handoff (llm)           evidence-backed handoff per handoff-protocol + NOTES.md
 check_handoff (script)        deterministic schema gate; marks plan status complete
        ↓
 gate_user_review (approval)   HARD STOP - approve, or send revision comments
        ↓                     (revisions loop through implement → verify → handoff again)
 end_success / end_blocked / end_rejected / end_failure
 ```
 End nodes emit sentinel outcomes for the caller:
 - `STEP_COMPLETE` — step implemented, verified, handoff written, user approved.
 - `STEP_BLOCKED` — `depends_on` unsatisfied and the user declined to proceed.
 - `STEP_REJECTED` — user aborted at the deviation gate, or the coder's plan
  was rejected at its approval gate.
 - `STEP_FAILED` — coder failed, the step-level fix budget was exhausted, or
  the handoff failed validation twice.
 ## Usage
 ```sh
 # From the project root: run the next in-progress/pending step
 coyote -a step-runner "Execute the next step"
 # A specific step (also parsed from the prompt: "execute step 3")
 coyote -a step-runner --agent-variable step 3 "Execute step 3"
 # Plan repo somewhere else
 coyote -a step-runner --agent-variable plans_dir docs/plans "Execute the next step"
 ```
 **Invoke from the project root.** The coder sub-agent resolves its own
 `project_dir` from the invocation directory; overriding `project_dir` here
 does not propagate to the spawned coder.
 ## Tuning
 `graph.yaml` `initial_state` exposes:
 - `max_fix_attempts` (default `2`) — step-level fix budget (the coder has
  its own internal budget of 3).
 - `max_review_attempts` (default `1`) — bounded 🔴-finding fix loops after
  independent review.
 Environment overrides honored by the script nodes:
 - `FORMAT_CMD` / `LINT_CMD` — formatting and linting (otherwise a per-type
  heuristic formats, and linting defers to the build/check command).
 - `BUILD_CMD` / `TEST_CMD` — skip project-type detection (same as coder).
 - `STEP_AUTOAPPROVE=1` — bypass the deviation gate (non-interactive runs).
 - `STEP_SKIP_REVIEW=1` — never spawn the independent reviewer.
 The final user approval gate is never bypassed by an environment variable -
 it is the point of the workflow.
@@ -0,0 +1,599 @@
 name: step-runner
 description: |
  Executes ONE step of a phased implementation plan (plans/ repo) with the
  step protocol enforced as graph edges: orient -> staleness check ->
  implement (coder) -> verify -> edge-case sweep -> optional independent
  review -> evidence-backed handoff -> user approval gate. Designed to be
  delegated to by sisyphus.
 version: "1.0"
 global_tools:
  - fs_cat.sh
  - fs_ls.sh
  - fs_write.sh
  - fs_patch.sh
  - execute_command.sh
 skills_enabled: true
 enabled_skills:
  - step-implementation
  - handoff-protocol
  - code-review
  - ai-slop-remover
 variables:
  - name: project_dir
    description: |
      Absolute path to the project directory. Defaults to "." (the directory
      coyote was invoked from). The coder sub-agent resolves its own
      project_dir the same way, so invoke step-runner FROM the project root
      unless you override this for both.
    default: "."
  - name: plans_dir
    description: |
      Path to the plan repo. Relative paths resolve against project_dir.
      Expected layout: <plans_dir>/steps/NN-<slug>.md,
      <plans_dir>/handoffs/, <plans_dir>/NOTES.md.
    default: "plans"
  - name: step
    description: |
      Which step to execute: a step number, or "next" to pick the first
      in-progress (resume) or pending step plan.
    default: "next"
 settings:
  max_loop_iterations: 20
  log_state_snapshots: true
  validate_before_run: true
  timeout: 7200
 initial_state:
  project_dir: ""
  plans_dir: ""
  step_number: 0
  step_slug: ""
  step_title: ""
  step_plan_path: ""
  step_plan: ""
  prev_handoff_path: "(none)"
  prev_handoff: "(none - this is the first step)"
  notes_path: ""
  notes: "(none)"
  handoff_path: ""
  blocking_reason: ""
  plan_summary: ""
  implementation_brief: ""
  staleness_report: ""
  has_major_deviation: false
  deviation_summary: ""
  user_feedback: ""
  fix_instructions: ""
  fix_attempts: 0
  max_fix_attempts: 2
  coder_result: ""
  format_output: ""
  lint_ok: true
  lint_output: ""
  build_ok: true
  build_output: ""
  tests_ok: true
  tests_output: ""
  edge_case_report: ""
  downstream_updates: ""
  needs_independent_review: false
  review_report: ""
  review_attempts: 0
  max_review_attempts: 1
  handoff_attempts: 0
  handoff_fix: ""
  step_summary: ""
 start: resolve_step
 nodes:
  resolve_step:
    id: resolve_step
    type: script
    description: |
      Locate the step plan, previous handoff, and NOTES.md; parse frontmatter;
      check depends_on satisfaction against existing handoffs; mark the plan
      in-progress. Routes to gate_blocked when dependencies are unsatisfied.
    script: scripts/resolve_step.sh
    timeout: 30
    fallback: end_failure
    next: orient
  gate_blocked:
    id: gate_blocked
    type: approval
    description: Escalate unsatisfied dependencies instead of building on missing ground.
    question: |
      Step {{step_number}} ({{step_title}}) is BLOCKED:
      {{blocking_reason}}
      Proceed anyway?
    options:
      - "yes"
      - "no"
    routes:
      "yes": orient
      "no": end_blocked
    on_other: end_blocked
  orient:
    id: orient
    type: llm
    description: |
      Read-only orientation and staleness check: merge the previous handoff's
      directives with the step plan, then verify the plan's assumptions
      against the CURRENT codebase before any edit.
    skills_enabled: true
    enabled_skills:
      - step-implementation
    instructions: |
      You are orienting for one step of a phased implementation plan. Load
      `step-implementation` and apply its Orient and Staleness-check phases.
      You are READ-ONLY in this node: no edits, no fixes.
      1. Read the previous handoff (below). Note directives aimed at this
         step, deviations that changed the codebase, and bare assertions
         that need re-verification.
      2. Staleness-check the step plan against the code at {{project_dir}}:
         grep the symbols it references (via execute_command), read its
         Context snippets at their claimed locations with fs_cat, confirm
         its Test commands exist.
      3. Classify discrepancies per the skill's deviation table: minor
         (mechanics differ; correct silently in the brief) vs major (scope,
         approach, interfaces, or a later step's assumptions affected).
      Produce `implementation_brief`: the corrected, self-contained marching
      orders for the implementer - plan tasks in order, handoff directives
      applied, minor staleness corrections folded in, acceptance criteria
      restated. The implementer sees ONLY the step plan plus your brief.
    prompt: |
      ## Step plan ({{step_plan_path}})
      {{step_plan}}
      ## Previous handoff ({{prev_handoff_path}})
      {{prev_handoff}}
      ## Rolling project notes
      {{notes}}
    tools:
      - fs_cat
      - fs_ls
      - execute_command
    max_iterations: 20
    output_schema:
      type: object
      properties:
        plan_summary:
          type: string
          description: 1-3 sentences summarizing what this step delivers
        implementation_brief:
          type: string
          description: Corrected, self-contained instructions for the implementer
        staleness_report:
          type: string
          description: Findings from checking plan assumptions against current code; "clean" if none
        has_major_deviation:
          type: boolean
          description: True when a discrepancy changes scope, approach, or interfaces
        deviation_summary:
          type: string
          description: Major deviations only, with the plan claim vs current reality. Empty when none
      required: [plan_summary, implementation_brief, staleness_report, has_major_deviation, deviation_summary]
    fallback: end_failure
    next: route_staleness
  route_staleness:
    id: route_staleness
    type: script
    description: Major deviation -> user gate; otherwise straight to implement.
    script: scripts/route_staleness.sh
    timeout: 5
    fallback: implement
  gate_deviation:
    id: gate_deviation
    type: approval
    description: Major deviations are never silently absorbed - the user decides.
    question: |
      Step {{step_number}} ({{step_title}}): the plan no longer matches the
      codebase in a way that changes scope or approach.
      {{deviation_summary}}
      Staleness report:
      {{staleness_report}}
      Proceed with the corrected brief? (Answer with anything else to give
      your own guidance to the implementer.)
    options:
      - "proceed"
      - "abort"
    routes:
      "proceed": implement
      "abort": end_rejected
    on_other: implement
    state_updates:
      user_feedback: "{{choice}}"
  implement:
    id: implement
    type: agent
    description: |
      Delegate implementation to the coder graph agent, which runs its own
      plan -> implement -> build -> tests -> self-review fix-loop internally.
    agent: coder
    prompt: |
      ## TASK
      Execute step {{step_number}} ({{step_title}}) of a phased implementation
      plan for the project at {{project_dir}}.
      ## EXPECTED OUTCOME
      Every task in the step plan below is implemented and its acceptance
      criteria are met. Tests are derived from the Acceptance criteria
      section (not from the implementation). Build and full test suite pass.
      ## MUST DO
      - Follow the Orientation brief below - it supersedes the raw plan where
        they disagree (it folds in corrections from the staleness check).
      - Match the patterns pasted in the step plan's Context section.
      - Derive tests from the plan's Acceptance criteria.
      ## MUST NOT DO
      - Do not touch anything listed in the plan's Out of scope section.
      - Do not modify files under {{plans_dir}}.
      - Do not implement work belonging to other steps.
      ## CONTEXT
      ### Step plan
      {{step_plan}}
      ### Orientation brief (handoff directives + staleness corrections applied)
      {{implementation_brief}}
      ### User guidance (if any)
      {{user_feedback}}
      ### Fix loop status (empty on first attempt)
      {{fix_instructions}}
    timeout: 3600
    state_updates:
      coder_result: "{{output}}"
    next: route_coder_result
  route_coder_result:
    id: route_coder_result
    type: script
    description: Route on the coder sentinel - COMPLETE verifies, REJECTED/FAILED terminate.
    script: scripts/route_coder_result.sh
    timeout: 5
    fallback: end_failure
  verify_format_lint:
    id: verify_format_lint
    type: script
    description: |
      Format BEFORE evidence collection (FORMAT_CMD override or per-type
      heuristic), then lint (LINT_CMD, when configured). Lint failure routes
      to the fix loop.
    script: scripts/verify_format_lint.sh
    timeout: 300
    fallback: fix_loop_gate
  verify_build:
    id: verify_build
    type: script
    description: Step-level build/typecheck evidence, collected AFTER formatting.
    script: scripts/verify_build.sh
    timeout: 600
    fallback: fix_loop_gate
  verify_tests:
    id: verify_tests
    type: script
    description: FULL test suite - regressions in untouched code fail the step too.
    script: scripts/verify_tests.sh
    timeout: 1200
    fallback: fix_loop_gate
  fix_loop_gate:
    id: fix_loop_gate
    type: script
    description: |
      Step-level fix budget (the coder already ran its own internal fix
      loop). Loops to implement with fix_instructions, or ends as failure.
    script: scripts/fix_loop_gate.sh
    timeout: 5
    fallback: end_failure
  edge_case_sweep:
    id: edge_case_sweep
    type: llm
    description: |
      Post-implementation sweep: missed spots, edge cases, downstream plan
      implications. May annotate downstream plans' Edge cases sections
      (annotate vs propose per handoff-protocol). Also judges whether the
      change warrants an independent review pass.
    skills_enabled: true
    enabled_skills:
      - step-implementation
      - handoff-protocol
    instructions: |
      The implementation for this step just passed build and tests. Load
      `step-implementation` (edge-case sweep phase) and `handoff-protocol`
      (annotate-vs-propose rules), then:
      1. Read the changed code (the coder result below names the files).
         Look for edge cases the plan missed: empty inputs, error paths,
         concurrency, partial failure, compat.
      2. For each edge case belonging to a LATER step: check that step's
         plan under {{plans_dir}}/steps/. If its Edge cases section already
         covers it, done. If not, append an entry to that section via
         fs_patch - touch NOTHING else in the file.
      3. NEVER edit a later plan's Objective, Tasks, Acceptance criteria,
         or Out of scope. Scope-affecting changes become proposed diffs in
         `downstream_updates` instead.
      4. Set needs_independent_review=true when the change touched 5+ files
         or crosses architectural boundaries (auth, public APIs, schema,
         security-sensitive paths).
      Be terse. Findings, not prose.
    prompt: |
      ## Coder result
      {{coder_result}}
      ## Step plan
      {{step_plan}}
      ## Staleness report from orientation
      {{staleness_report}}
    tools:
      - fs_cat
      - fs_ls
      - fs_patch
      - execute_command
    max_iterations: 20
    output_schema:
      type: object
      properties:
        edge_case_report:
          type: string
          description: Edge cases discovered - both handled and punted, one per line. "none" if empty
        downstream_updates:
          type: string
          description: Annotations made (plan file + section) and proposed diffs for scope-affecting changes. "none" if empty
        needs_independent_review:
          type: boolean
      required: [edge_case_report, downstream_updates, needs_independent_review]
    fallback: write_handoff
    next: route_sweep
  route_sweep:
    id: route_sweep
    type: script
    description: Broad or boundary-crossing changes get an independent reviewer.
    script: scripts/route_sweep.sh
    timeout: 5
    fallback: write_handoff
  independent_review:
    id: independent_review
    type: agent
    description: Independent review pass - the author's self-review cannot catch its own rationalizations.
    agent: code-reviewer
    prompt: |
      Review the changes produced for step {{step_number}} ({{step_title}})
      of a phased implementation plan in {{project_dir}}.
      What the step was supposed to do:
      {{plan_summary}}
      Coder summary (names the modified/created files):
      {{coder_result}}
      Review the changed files against the step plan's acceptance criteria.
      Preserve severity tags in your findings.
    timeout: 1200
    state_updates:
      review_report: "{{output}}"
    next: route_review
  route_review:
    id: route_review
    type: script
    description: Critical findings loop back to implement (bounded); otherwise proceed to handoff.
    script: scripts/route_review.sh
    timeout: 5
    fallback: write_handoff
  write_handoff:
    id: write_handoff
    type: llm
    description: |
      Write the evidence-backed handoff per handoff-protocol and append
      durable facts to NOTES.md. The completion gate (check_handoff)
      verifies the document afterward.
    skills_enabled: true
    enabled_skills:
      - handoff-protocol
      - ai-slop-remover
    instructions: |
      Load `handoff-protocol` and follow its writer schema EXACTLY: the
      frontmatter (step, title, result) and all eight sections, writing
      "None" rather than omitting a section.
      Write the handoff to {{handoff_path}} with fs_write. Paste the
      verification evidence below verbatim into the Evidence section -
      commands, exit codes, decisive output lines. Deviations come from the
      staleness report, gate decisions, and fix loop history. Downstream
      plan updates come from the sweep results.
      Then append durable, step-independent facts (if any) to {{notes_path}}
      - create the file if missing, never rewrite existing entries.
      If "Gate feedback" below is non-empty, a previous handoff attempt
      failed validation - fix exactly what it lists.
    prompt: |
      ## Step
      {{step_number}} ({{step_title}}) - plan at {{step_plan_path}}
      ## Plan summary
      {{plan_summary}}
      ## Coder result
      {{coder_result}}
      ## Staleness report / deviations
      {{staleness_report}}
      Major deviation summary (if any): {{deviation_summary}}
      User guidance given (if any): {{user_feedback}}
      Fix loop attempts used: {{fix_attempts}} of {{max_fix_attempts}}
      ## Edge cases discovered
      {{edge_case_report}}
      ## Downstream plan updates
      {{downstream_updates}}
      ## Independent review report (if any)
      {{review_report}}
      ## Verification evidence (paste verbatim)
      ### Format
      {{format_output}}
      ### Lint
      {{lint_output}}
      ### Build
      {{build_output}}
      ### Tests
      {{tests_output}}
      ## Gate feedback
      {{handoff_fix}}
    tools:
      - fs_cat
      - fs_ls
      - fs_write
      - fs_patch
    max_iterations: 15
    output_schema:
      type: object
      properties:
        step_summary:
          type: string
          description: 3-6 sentence summary of the step for the user's approval decision - what was done, deviations, anything needing their attention
      required: [step_summary]
    fallback: end_failure
    next: check_handoff
  check_handoff:
    id: check_handoff
    type: script
    description: |
      Deterministic completion gate - handoff exists with frontmatter and all
      required sections. On success, marks the step plan status complete.
      One retry back to write_handoff, then failure.
    script: scripts/check_handoff.sh
    timeout: 10
    fallback: end_failure
  gate_user_review:
    id: gate_user_review
    type: approval
    description: The hard stop - the next step never starts without explicit approval.
    question: |
      ## Step {{step_number}} ({{step_title}}) - ready for review
      {{step_summary}}
      Handoff: {{handoff_path}}
      Build: {{build_ok}} | Tests: {{tests_ok}} | Fix attempts: {{fix_attempts}}/{{max_fix_attempts}}
      Approve this step? (Answer with anything else to send revision
      instructions straight to the implementer.)
    options:
      - "approve"
      - "revise"
    routes:
      "approve": end_success
      "revise": get_revision
    on_other: revise_from_choice
    state_updates:
      user_feedback: "{{choice}}"
  get_revision:
    id: get_revision
    type: input
    description: Collect revision instructions, then loop back through implement -> verify -> handoff.
    question: "What should change? Your comments go to the implementer verbatim."
    validation: "len(input) > 0"
    state_updates:
      fix_instructions: "{{input}}"
    next: implement
  revise_from_choice:
    id: revise_from_choice
    type: script
    description: Free-form approval answers are treated as revision instructions.
    script: scripts/revise_from_choice.sh
    timeout: 5
    fallback: get_revision
  end_success:
    id: end_success
    type: end
    output: |
      STEP_COMPLETE
      Step: {{step_number}} ({{step_title}})
      Plan: {{step_plan_path}}
      Handoff: {{handoff_path}}
      Build: passed | Tests: passed | Fix attempts: {{fix_attempts}}/{{max_fix_attempts}}
      {{step_summary}}
      Downstream plan updates:
      {{downstream_updates}}
  end_blocked:
    id: end_blocked
    type: end
    output: |
      STEP_BLOCKED
      Step: {{step_number}} ({{step_title}})
      Reason:
      {{blocking_reason}}
  end_rejected:
    id: end_rejected
    type: end
    output: |
      STEP_REJECTED
      Step: {{step_number}} ({{step_title}})
      Rejected at: deviation gate or coder approval gate.
      Deviation summary:
      {{deviation_summary}}
      Coder result (if it ran):
      {{coder_result}}
  end_failure:
    id: end_failure
    type: end
    output: |
      STEP_FAILED
      Step: {{step_number}} ({{step_title}})
      Fix attempts: {{fix_attempts}}/{{max_fix_attempts}}
      Blocking reason (if resolution failed): {{blocking_reason}}
      Coder result:
      {{coder_result}}
      Last build output:
      {{build_output}}
      Last tests output:
      {{tests_output}}
@@ -0,0 +1,54 @@
 #!/usr/bin/env bash
 set -uo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 handoff_path=$(echo "$state" | jq -r '.handoff_path // ""')
 step_plan_path=$(echo "$state" | jq -r '.step_plan_path // ""')
 handoff_attempts=$(echo "$state" | jq -r '.handoff_attempts // 0')
 problems=""
 if [[ ! -f "$handoff_path" ]]; then
  problems="- handoff file does not exist at $handoff_path"$'\n'
 else
  content=$(cat "$handoff_path")
  grep -qE '^result:[[:space:]]*(complete|partial|blocked)' <<< "$content" \
    || problems+="- frontmatter is missing 'result: complete|partial|blocked'"$'\n'
  for section in "Summary" "Completed" "Not completed" "Deviations" "Downstream plan updates" "Edge cases discovered" "Evidence" "Notes for next step"; do
    grep -qE "^##[[:space:]]+${section}" <<< "$content" \
      || problems+="- missing required section: ## ${section}"$'\n'
  done
 fi
 if [[ -z "$problems" ]]; then
  if [[ -f "$step_plan_path" ]]; then
    tmp=$(mktemp)
    awk 'BEGIN{n=0} /^---[[:space:]]*$/{n++; print; next} n==1 && /^status:/{print "status: complete"; next} {print}' "$step_plan_path" > "$tmp" && mv "$tmp" "$step_plan_path"
  fi
  jq -nc '{"handoff_fix": "", "_next": "gate_user_review"}'
  exit 0
 fi
 if (( handoff_attempts >= 1 )); then
  jq -nc \
    --arg br "Handoff failed validation twice. Problems:
 $problems" \
    '{"blocking_reason": $br, "_next": "end_failure"}'
  exit 0
 fi
 jq -nc \
  --arg hf "The previous handoff attempt failed validation. Fix exactly these problems:
 $problems" \
  '{
    "handoff_attempts": 1,
    "handoff_fix": $hf,
    "_next": "write_handoff"
  }'
@@ -0,0 +1,60 @@
 #!/usr/bin/env bash
 set -euo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 fix_attempts=$(echo "$state" | jq -r '.fix_attempts // 0')
 max_fix_attempts=$(echo "$state" | jq -r '.max_fix_attempts // 2')
 lint_ok=$(echo "$state" | jq -r '.lint_ok | if . == null then "true" else (. | tostring) end')
 build_ok=$(echo "$state" | jq -r '.build_ok | if . == null then "true" else (. | tostring) end')
 tests_ok=$(echo "$state" | jq -r '.tests_ok | if . == null then "true" else (. | tostring) end')
 lint_output=$(echo "$state" | jq -r '.lint_output // ""')
 build_output=$(echo "$state" | jq -r '.build_output // ""')
 tests_output=$(echo "$state" | jq -r '.tests_output // ""')
 if (( fix_attempts >= max_fix_attempts )); then
  jq -nc \
    --argjson n "$fix_attempts" \
    '{
      "fix_attempts": $n,
      "_next": "end_failure"
    }'
  exit 0
 fi
 next_attempts=$((fix_attempts + 1))
 if [[ "$lint_ok" != "true" ]]; then
  stage="lint"
  output="$lint_output"
 elif [[ "$build_ok" != "true" ]]; then
  stage="build"
  output="$build_output"
 elif [[ "$tests_ok" != "true" ]]; then
  stage="full test suite"
  output="$tests_output"
 else
  stage="verification"
  output="fix_loop_gate was reached but no failing stage was recorded. Re-run verification."
 fi
 fix_instructions=$(printf '## Fix loop status (step-level attempt %d of %d)\n\nThe implementation passed the coder'"'"'s internal checks but failed step-level verification at the %s stage.\n\nOutput:\n```\n%s\n```\n\nIdentify the minimal fix and apply it. Do not refactor. Regressions in untouched code caused by this change are in scope.' \
  "$next_attempts" "$max_fix_attempts" "$stage" "$output")
 jq -nc \
  --argjson n "$next_attempts" \
  --arg 'fi' "$fix_instructions" \
  '{
    "fix_attempts": $n,
    "fix_instructions": $fi,
    "lint_ok": true,
    "build_ok": true,
    "tests_ok": true,
    "_next": "implement"
  }'
@@ -0,0 +1,152 @@
 #!/usr/bin/env bash
 set -uo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 fail() {
  jq -nc --arg r "$1" '{"blocking_reason": $r, "_next": "end_failure"}'
  exit 0
 }
 project_dir="${LLM_AGENT_VAR_PROJECT_DIR:-.}"
 project_dir=$(cd "$project_dir" 2>/dev/null && pwd) || fail "project_dir does not exist: $project_dir"
 plans_dir="${LLM_AGENT_VAR_PLANS_DIR:-plans}"
 [[ "$plans_dir" != /* ]] && plans_dir="$project_dir/$plans_dir"
 steps_dir="$plans_dir/steps"
 handoffs_dir="$plans_dir/handoffs"
 notes_path="$plans_dir/NOTES.md"
 [[ -d "$steps_dir" ]] || fail "No step plans directory at $steps_dir (expected <plans_dir>/steps/NN-<slug>.md)"
 frontmatter() {
  awk '/^---[[:space:]]*$/{n++; next} n==1{print} n>=2{exit}' "$1"
 }
 fm_value() {
  echo "$1" | grep -E "^$2:" | head -1 | sed -E "s/^$2:[[:space:]]*//" | sed -E 's/^["'"'"']|["'"'"']$//g'
 }
 step="${LLM_AGENT_VAR_STEP:-next}"
 if [[ "$step" == "next" ]]; then
  prompt_step=$(echo "$state" | jq -r '.initial_prompt // ""' | grep -oiE 'step[[:space:]#:]*[0-9]+' | head -1 | grep -oE '[0-9]+' || true)
  [[ -n "$prompt_step" ]] && step="$prompt_step"
 fi
 plan_file=""
 if [[ "$step" == "next" ]]; then
  first_pending=""
  while IFS= read -r f; do
    st=$(fm_value "$(frontmatter "$f")" "status")
    if [[ "$st" == "in-progress" ]]; then
      plan_file="$f"
      break
    fi
    [[ -z "$first_pending" && ( "$st" == "pending" || -z "$st" ) ]] && first_pending="$f"
  done < <(find "$steps_dir" -maxdepth 1 -name '*.md' | sort)
  [[ -z "$plan_file" ]] && plan_file="$first_pending"
  [[ -z "$plan_file" ]] && fail "No in-progress or pending step plans in $steps_dir"
 else
  [[ "$step" =~ ^[0-9]+$ ]] || fail "step must be a number or 'next'; got: $step"
  padded=$(printf '%02d' "$((10#$step))")
  plan_file=$(find "$steps_dir" -maxdepth 1 \( -name "${padded}-*.md" -o -name "${step}-*.md" \) | sort | head -1)
  [[ -n "$plan_file" ]] || fail "No step plan matching step $step in $steps_dir"
 fi
 bn=$(basename "$plan_file" .md)
 num_part="${bn%%-*}"
 [[ "$num_part" =~ ^[0-9]+$ ]] || fail "Step plan filename must start with a number: $bn"
 step_number=$((10#$num_part))
 step_slug="${bn#*-}"
 fm=$(frontmatter "$plan_file")
 step_title=$(fm_value "$fm" "title")
 [[ -z "$step_title" ]] && step_title="$step_slug"
 deps=$(echo "$fm" | awk '/^depends_on:/{f=1; print; next} f && /^[[:space:]]*-/{print; next} f{exit}' | grep -oE '[0-9]+' || true)
 unsatisfied=""
 for dep in $deps; do
  dep_padded=$(printf '%02d' "$((10#$dep))")
  dep_handoff=$(find "$handoffs_dir" -maxdepth 1 \( -name "${dep_padded}-*.md" -o -name "${dep}-*.md" \) 2>/dev/null | sort | head -1)
  if [[ -z "$dep_handoff" ]]; then
    unsatisfied+="- step $dep: no handoff found (step not executed?)"$'\n'
    continue
  fi
  dep_result=$(fm_value "$(frontmatter "$dep_handoff")" "result")
  if [[ "$dep_result" != "complete" ]]; then
    unsatisfied+="- step $dep: handoff result is '$dep_result' (not complete): $dep_handoff"$'\n'
  fi
 done
 prev_handoff_path="(none)"
 prev_handoff="(none - this is the first step)"
 prev_file=""
 prev_num=0
 while IFS= read -r h; do
  hn="${h##*/}"
  hn="${hn%%-*}"
  [[ "$hn" =~ ^[0-9]+$ ]] || continue
  n=$((10#$hn))
  if (( n < step_number && n >= prev_num )); then
    prev_num=$n
    prev_file="$h"
  fi
 done < <(find "$handoffs_dir" -maxdepth 1 -name '*.md' 2>/dev/null | sort)
 if [[ -n "$prev_file" ]]; then
  prev_handoff_path="$prev_file"
  prev_handoff=$(head -c 16000 "$prev_file")
 fi
 notes="(none)"
 [[ -f "$notes_path" ]] && notes=$(head -c 8000 "$notes_path")
 step_plan=$(head -c 24000 "$plan_file")
 handoff_path="$handoffs_dir/$(basename "$plan_file")"
 tmp=$(mktemp)
 awk 'BEGIN{n=0} /^---[[:space:]]*$/{n++; print; next} n==1 && /^status:/{print "status: in-progress"; next} {print}' "$plan_file" > "$tmp" && mv "$tmp" "$plan_file"
 next_node="orient"
 blocking_reason=""
 if [[ -n "$unsatisfied" ]]; then
  next_node="gate_blocked"
  blocking_reason="Unsatisfied dependencies:"$'\n'"$unsatisfied"
 fi
 jq -nc \
  --arg pd "$project_dir" \
  --arg pl "$plans_dir" \
  --argjson sn "$step_number" \
  --arg ss "$step_slug" \
  --arg st "$step_title" \
  --arg spp "$plan_file" \
  --arg sp "$step_plan" \
  --arg php "$prev_handoff_path" \
  --arg ph "$prev_handoff" \
  --arg np "$notes_path" \
  --arg no "$notes" \
  --arg hp "$handoff_path" \
  --arg br "$blocking_reason" \
  --arg nx "$next_node" \
  '{
    "project_dir": $pd,
    "plans_dir": $pl,
    "step_number": $sn,
    "step_slug": $ss,
    "step_title": $st,
    "step_plan_path": $spp,
    "step_plan": $sp,
    "prev_handoff_path": $php,
    "prev_handoff": $ph,
    "notes_path": $np,
    "notes": $no,
    "handoff_path": $hp,
    "blocking_reason": $br,
    "_next": $nx
  }'
@@ -0,0 +1,27 @@
 #!/usr/bin/env bash
 set -euo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 feedback=$(echo "$state" | jq -r '.user_feedback // ""')
 if [[ -z "$feedback" ]]; then
  jq -nc '{"_next": "get_revision"}'
  exit 0
 fi
 fix_instructions=$(printf '## Revision requested by the user at the step approval gate\n\nAddress these comments with minimal edits, then the step re-verifies and the handoff is rewritten:\n\n%s' \
  "$feedback")
 jq -nc \
  --arg 'fi' "$fix_instructions" \
  '{
    "fix_instructions": $fi,
    "_next": "implement"
  }'
@@ -0,0 +1,27 @@
 #!/usr/bin/env bash
 set -euo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 coder_result=$(echo "$state" | jq -r '.coder_result // ""')
 case "$coder_result" in
  *CODER_COMPLETE*)
    jq -nc '{"_next": "verify_format_lint"}'
    ;;
  *CODER_REJECTED*)
    jq -nc '{"_next": "end_rejected"}'
    ;;
  *CODER_FAILED*)
    jq -nc '{"blocking_reason": "coder fix-loop exhausted; see coder result", "_next": "end_failure"}'
    ;;
  *)
    jq -nc '{"blocking_reason": "coder returned no recognizable sentinel (expected CODER_COMPLETE / CODER_REJECTED / CODER_FAILED)", "_next": "end_failure"}'
    ;;
 esac
@@ -0,0 +1,38 @@
 #!/usr/bin/env bash
 set -euo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 review_report=$(echo "$state" | jq -r '.review_report // ""')
 review_attempts=$(echo "$state" | jq -r '.review_attempts // 0')
 max_review_attempts=$(echo "$state" | jq -r '.max_review_attempts // 1')
 if ! grep -qF "🔴" <<< "$review_report"; then
  jq -nc '{"_next": "write_handoff"}'
  exit 0
 fi
 if (( review_attempts >= max_review_attempts )); then
  jq -nc '{"_next": "write_handoff"}'
  exit 0
 fi
 next_review=$((review_attempts + 1))
 fix_instructions=$(printf '## Independent review findings (attempt %d of %d)\n\nAn independent reviewer flagged CRITICAL (🔴) findings. Address ONLY the 🔴 findings with minimal edits. Do not refactor unrelated code.\n\n%s' \
  "$next_review" "$max_review_attempts" "$review_report")
 jq -nc \
  --argjson n "$next_review" \
  --arg 'fi' "$fix_instructions" \
  '{
    "review_attempts": $n,
    "fix_instructions": $fi,
    "needs_independent_review": false,
    "_next": "implement"
  }'
@@ -0,0 +1,23 @@
 #!/usr/bin/env bash
 set -euo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 has_major=$(echo "$state" | jq -r '.has_major_deviation // false')
 if [[ "${STEP_AUTOAPPROVE:-0}" == "1" ]]; then
  jq -nc '{"_next": "implement"}'
  exit 0
 fi
 if [[ "$has_major" == "true" ]]; then
  jq -nc '{"_next": "gate_deviation"}'
 else
  jq -nc '{"_next": "implement"}'
 fi
@@ -0,0 +1,23 @@
 #!/usr/bin/env bash
 set -euo pipefail
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 needs_review=$(echo "$state" | jq -r '.needs_independent_review // false')
 if [[ "${STEP_SKIP_REVIEW:-0}" == "1" ]]; then
  jq -nc '{"_next": "write_handoff"}'
  exit 0
 fi
 if [[ "$needs_review" == "true" ]]; then
  jq -nc '{"_next": "independent_review"}'
 else
  jq -nc '{"_next": "write_handoff"}'
 fi
@@ -0,0 +1,57 @@
 #!/usr/bin/env bash
 set -uo pipefail
 # shellcheck disable=SC1091
 source "$(dirname "$0")/../../.shared/utils.sh"
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 project_dir=$(echo "$state" | jq -r '.project_dir // "."')
 if [[ -n "${BUILD_CMD:-}" ]]; then
  cmd="$BUILD_CMD"
 else
  project_info=$(detect_project "$project_dir")
  cmd=$(echo "$project_info" | jq -r '.check // .build // ""')
 fi
 if [[ -z "$cmd" || "$cmd" == "null" ]]; then
  jq -nc '{
    "build_ok": true,
    "build_output": "(no build/check command available for this project type)",
    "_next": "verify_tests"
  }'
  exit 0
 fi
 exit_code=0
 output=$(cd "$project_dir" && eval "$cmd" 2>&1) || exit_code=$?
 if (( exit_code == 0 )); then
  jq -nc \
    --arg out "Ran: $cmd
 $output" \
    '{
      "build_ok": true,
      "build_output": $out,
      "_next": "verify_tests"
    }'
 else
  jq -nc \
    --arg out "Ran: $cmd
 Exit code: $exit_code
 $output" \
    '{
      "build_ok": false,
      "build_output": $out,
      "_next": "fix_loop_gate"
    }'
 fi
@@ -0,0 +1,79 @@
 #!/usr/bin/env bash
 set -uo pipefail
 # shellcheck disable=SC1091
 source "$(dirname "$0")/../../.shared/utils.sh"
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 project_dir=$(echo "$state" | jq -r '.project_dir // "."')
 project_type=$(detect_project "$project_dir" | jq -r '.type // "unknown"')
 format_cmd="${FORMAT_CMD:-}"
 if [[ -z "$format_cmd" ]]; then
  case "$project_type" in
    rust) format_cmd="cargo fmt" ;;
    go) format_cmd="gofmt -w ." ;;
    python) command -v ruff &>/dev/null && format_cmd="ruff format ." ;;
  esac
 fi
 if [[ -z "$format_cmd" ]]; then
  format_output="(no format command configured for project type '$project_type'; skipped. Set FORMAT_CMD to enable.)"
 else
  fmt_rc=0
  fmt_out=$(cd "$project_dir" && eval "$format_cmd" 2>&1) || fmt_rc=$?
  format_output="Ran: $format_cmd
 Exit code: $fmt_rc
 $fmt_out"
 fi
 lint_cmd="${LINT_CMD:-}"
 if [[ -z "$lint_cmd" ]]; then
  jq -nc \
    --arg fo "$format_output" \
    '{
      "format_output": $fo,
      "lint_ok": true,
      "lint_output": "(no LINT_CMD configured; linting is covered by the build/check command)",
      "_next": "verify_build"
    }'
  exit 0
 fi
 lint_rc=0
 lint_out=$(cd "$project_dir" && eval "$lint_cmd" 2>&1) || lint_rc=$?
 if (( lint_rc == 0 )); then
  jq -nc \
    --arg fo "$format_output" \
    --arg lo "Ran: $lint_cmd
 $lint_out" \
    '{
      "format_output": $fo,
      "lint_ok": true,
      "lint_output": $lo,
      "_next": "verify_build"
    }'
 else
  jq -nc \
    --arg fo "$format_output" \
    --arg lo "Ran: $lint_cmd
 Exit code: $lint_rc
 $lint_out" \
    '{
      "format_output": $fo,
      "lint_ok": false,
      "lint_output": $lo,
      "_next": "fix_loop_gate"
    }'
 fi
@@ -0,0 +1,57 @@
 #!/usr/bin/env bash
 set -uo pipefail
 # shellcheck disable=SC1091
 source "$(dirname "$0")/../../.shared/utils.sh"
 if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
  state=$(cat "$GRAPH_STATE_FILE")
 elif [[ -n "${GRAPH_STATE:-}" ]]; then
  state="$GRAPH_STATE"
 else
  state='{}'
 fi
 project_dir=$(echo "$state" | jq -r '.project_dir // "."')
 if [[ -n "${TEST_CMD:-}" ]]; then
  cmd="$TEST_CMD"
 else
  project_info=$(detect_project "$project_dir")
  cmd=$(echo "$project_info" | jq -r '.test // ""')
 fi
 if [[ -z "$cmd" || "$cmd" == "null" ]]; then
  jq -nc '{
    "tests_ok": true,
    "tests_output": "(no test command available for this project type)",
    "_next": "edge_case_sweep"
  }'
  exit 0
 fi
 exit_code=0
 output=$(cd "$project_dir" && eval "$cmd" 2>&1) || exit_code=$?
 if (( exit_code == 0 )); then
  jq -nc \
    --arg out "Ran: $cmd
 $output" \
    '{
      "tests_ok": true,
      "tests_output": $out,
      "_next": "edge_case_sweep"
    }'
 else
  jq -nc \
    --arg out "Ran: $cmd
 Exit code: $exit_code
 $output" \
    '{
      "tests_ok": false,
      "tests_output": $out,
      "_next": "fix_loop_gate"
    }'
 fi
@@ -18,6 +18,11 @@
      "type": "stdio",
      "command": "uvx",
      "args": ["duckduckgo-mcp-server"]
    },
    "iwe": {
      "type": "stdio",
      "command": "iwec",
      "args": ["--project", "."]
    }
  }
 }
@@ -0,0 +1,44 @@
 schemaVersion: "1"
 kind: mixin
 name: built-in-tools
 description: >
  Installs binaries and allows network domains required by Coyote's built-in
  global tools and the default MCP server set. Auto-applied by Coyote's sbx
  mixin discovery when running `coyote --sandbox`.
 network:
  allowedDomains:
    # fetch_url_via_jina + jina reader fallback
    - "r.jina.ai:443"
    # get_current_weather (.sh, .py, .ts)
    - "wttr.in:443"
    # search_arxiv (the .sh tool still uses http://, so :80 is required until fixed)
    - "export.arxiv.org:443"
    - "export.arxiv.org:80"
    # search_arxiv + search_wikipedia may follow DOI redirects
    - "doi.org:443"
    # search_wikipedia
    - "en.wikipedia.org:443"
    # search_wolframalpha
    - "api.wolframalpha.com:443"
    # web_search_perplexity
    - "api.perplexity.ai:443"
    # web_search_tavily
    - "api.tavily.com:443"
    # send_twilio
    - "api.twilio.com:443"
    # MCP: github (built-in mcp.json: api.githubcopilot.com)
    - "api.githubcopilot.com:443"
    # MCP: atlassian (built-in mcp.json: mcp-remote -> mcp.atlassian.com)
    - "mcp.atlassian.com:443"
    # MCP: ddg-search (built-in mcp.json: uvx duckduckgo-mcp-server)
    - "duckduckgo.com:443"
    - "html.duckduckgo.com:443"
    - "lite.duckduckgo.com:443"
    # MCP: npx-based servers (mcp-remote) pull from npm
    - "registry.npmjs.org:443"
    # MCP: docker server may pull images from common registries
    - "ghcr.io:443"
    - "registry-1.docker.io:443"
    - "auth.docker.io:443"
    - "production.cloudflare.docker.com:443"
@@ -0,0 +1,81 @@
 #!/usr/bin/env bash
 set -e
 # @describe Structural code search using AST patterns (ast-grep). Matches syntax trees, not text,
 # so it finds code regardless of formatting: function calls with any arguments, definitions, etc.
 # Use meta-variables in patterns: $NAME matches one AST node, $$$ matches zero or more nodes.
 # Patterns must be COMPLETE, valid AST nodes in the target language: 'fn $NAME($$$) { $$$ }'
 # matches Rust fn definitions (with body - 'fn $NAME($$$)' alone parses as nothing and matches
 # nothing), 'foo($$$)' matches all calls to foo, '$X.unwrap()' matches all unwrap calls.
 # Prefer this over fs_grep when searching for code STRUCTURE (calls, definitions, signatures);
 # use fs_grep for plain text, comments, or strings.
 # @option --pattern! The AST pattern to search for (must parse as valid code in the target language)
 # @option --lang The target language (e.g. rust, typescript, tsx, javascript, python, go, java, c, cpp, kotlin, swift, ruby, php, css, html, yaml, json). Strongly recommended; without it files of every supported language are scanned
 # @option --path The directory OR file to search in (defaults to current working directory)
 # @option --glob File glob to narrow the search (e.g. "src/**/*.rs", "!**/tests/**")
 # @env LLM_OUTPUT=/dev/stdout The output path
 MAX_RESULTS=100
 MAX_OUTPUT_BYTES=32768
 resolve_binary() {
    if command -v ast-grep &>/dev/null; then
        echo "ast-grep"
        return 0
    fi
    if command -v sg &>/dev/null && sg --version 2>/dev/null | grep -qi 'ast-grep'; then
        echo "sg"
        return 0
    fi
    return 1
 }
 main() {
    # shellcheck disable=SC2154
    local pattern="$argc_pattern"
    local lang="${argc_lang:-}"
    local search_path="${argc_path:-.}"
    local glob="${argc_glob:-}"
    local bin
    if ! bin=$(resolve_binary); then
        printf 'ast-grep is not installed. Fall back to fs_grep for this search.\nTo enable structural search, install ast-grep:\n  cargo install ast-grep --locked\n  brew install ast-grep\n  npm i -g @ast-grep/cli\n' >> "$LLM_OUTPUT"
        return 0
    fi
    if [[ ! -e "$search_path" ]]; then
        echo "Error: path not found: $search_path" >> "$LLM_OUTPUT"
        return 1
    fi
    local args=(run --pattern "$pattern" --color never --heading never)
    [[ -n "$lang" ]] && args+=(--lang "$lang")
    [[ -n "$glob" ]] && args+=(--globs "$glob")
    args+=("$search_path")
    local output exit_code=0
    output=$("$bin" "${args[@]}" 2>&1) || exit_code=$?
    if [[ -z "$output" ]]; then
        echo "No structural matches found for: $pattern" >> "$LLM_OUTPUT"
        return 0
    fi
    if (( exit_code > 1 )); then
        printf 'ast-grep failed (exit %s):\n%s\n\nHint: the pattern must be valid %s syntax. Meta-variables: $NAME (one node), $$$ (zero or more).\n' \
            "$exit_code" "$output" "${lang:-source}" >> "$LLM_OUTPUT"
        return 0
    fi
    local total
    total=$(wc -l <<< "$output")
    output=$(head -n "$MAX_RESULTS" <<< "$output" | head -c "$MAX_OUTPUT_BYTES")
    echo "$output" >> "$LLM_OUTPUT"
    if (( total > MAX_RESULTS )); then
        printf '\n(Showing %s of %s matching lines. Narrow with --glob, --lang, or a more specific pattern.)\n' \
            "$MAX_RESULTS" "$total" >> "$LLM_OUTPUT"
    fi
 }
@@ -5,6 +5,23 @@ set -e
 # PREFERRED way to modify a file. Prefer this over fs_write whenever the file already exists: it sends less data,
 # preserves unchanged content automatically, and is less prone to accidental data loss from full rewrites.
 # Use fs_write only when you are creating a new file or doing a complete rewrite where most of the content changes.
 #
 # CRITICAL — the patch is matched byte-for-byte. There is no fuzzy matching, no whitespace tolerance, and no context shift:
 # - Context lines (prefixed with a single space) and removed lines (prefixed with '-') must equal the file content exactly.
 #   If unsure, fs_cat the file first and copy the bytes verbatim into your patch.
 # - JSON-escape the contents string ONCE. Each literal backslash in the file becomes \\ in the JSON contents string. So a
 #   shell line containing s|\\"|"|g must appear in JSON as s|\\\\\"|\"|g — NOT s|\\\\\\\"|\\\"|g. Over-escaping backslashes
 #   is the most common cause of "unable to apply patch" failures, especially in files with sed/jq/regex pipelines or
 #   embedded Python with quoted strings.
 # - Hunks are applied in order; the first hunk that fails aborts the whole patch — later hunks are NOT attempted.
 # - If you've edited this file in earlier tool calls, fs_cat it again before composing the patch. A stale view of the file
 #   produces context lines that no longer match.
 # - On failure the error message names the failing hunk and shows the expected-vs-actual line. Fix that specific line and
 #   retry — do not blindly resend a near-identical patch.
 #
 # For files with heavy escaping (sed/jq/regex pipelines, shell with embedded heredocs, deeply quoted strings), prefer
 # fs_write over chained fs_patch hunks to replace the entire file with the full new contents (i.e. original content +
 # your changes).
 # @option --path! The path of the file to apply the patch to
 # @option --contents! The patch to apply to the file
@@ -600,6 +600,14 @@ patch_file() {
          for (i = 2; i <= hunkTotalOriginalLines[hunkIndex]; i++) {
            if (lines[nextLineIndex] != hunkOriginalLines[hunkIndex,i]) {
              if (i - 1 > bestPartialLen[hunkIndex]) {
                bestPartialLen[hunkIndex] = i - 1
                bestPartialAnchorLine[hunkIndex] = lineIndex
                bestPartialHunkPos[hunkIndex] = i
                bestPartialDivergeLine[hunkIndex] = nextLineIndex
                bestPartialExpected[hunkIndex] = hunkOriginalLines[hunkIndex,i]
                bestPartialActual[hunkIndex] = lines[nextLineIndex]
              }
              nextLineIndex = 0
              break
            }
@@ -621,7 +629,32 @@ patch_file() {
        }
        if (hunkIndex != totalHunks + 1) {
          failingHunk = hunkIndex
          print "error: unable to apply patch" > "/dev/stderr"
          print "" > "/dev/stderr"
          print "Hunk " failingHunk " of " totalHunks " did not match the file." > "/dev/stderr"
          if (bestPartialLen[failingHunk] == 0) {
            print "" > "/dev/stderr"
            print "The first context/removed line of hunk " failingHunk " was not found anywhere in the file:" > "/dev/stderr"
            print "  expected: " hunkOriginalLines[failingHunk, 1] > "/dev/stderr"
          } else {
            print "" > "/dev/stderr"
            print "Closest match: anchored at file line " bestPartialAnchorLine[failingHunk] ", matched " bestPartialLen[failingHunk] " of " hunkTotalOriginalLines[failingHunk] " original lines before diverging." > "/dev/stderr"
            print "" > "/dev/stderr"
            print "At file line " bestPartialDivergeLine[failingHunk] " (hunk original line " bestPartialHunkPos[failingHunk] "):" > "/dev/stderr"
            print "  expected: " bestPartialExpected[failingHunk] > "/dev/stderr"
            print "  actual:   " bestPartialActual[failingHunk] > "/dev/stderr"
          }
          print "" > "/dev/stderr"
          print "Lines must match byte-for-byte (no fuzzy matching). Check escaping, whitespace, and quoting." > "/dev/stderr"
          if (failingHunk < totalHunks) {
            print "" > "/dev/stderr"
            print (totalHunks - failingHunk) " subsequent hunk(s) were not attempted (patcher aborts on first failure)." > "/dev/stderr"
          }
          exit 1
        }
    }
@@ -0,0 +1,93 @@
 ---
 name: diagnose
 temperature: 0.2
 enabled_tools:
  - execute_command
  - fs_cat
  - fs_ls
  - web_search_coyote
 skills_enabled: false
 auto_continue: true
 max_auto_continues: 10
 ---
 You are an expert systems troubleshooter: equal parts SRE, sysadmin, network engineer, and homelab tinkerer. Your job
 is to diagnose and fix technical problems of any kind: services that won't start, networking failures, container
 issues, driver problems, permission errors, misbehaving hardware, broken configs, or anything else. You are not limited
 to code.
 <system>
 os: {{__os__}}
 distro: {{__os_distro__}}
 arch: {{__arch__}}
 shell: {{__shell__}}
 cwd: {{__cwd__}}
 now: {{__now__}}
 </system>
 ## Prime Directive
 **You run the diagnostics yourself.** Never tell the user to run a command and paste the output back. Use the
 `execute_command` tool to gather evidence directly, then interpret the results for them. The user should watch you
 work, not act as your terminal.
 ## Diagnostic Loop
 Work the loop until the problem is solved or genuinely blocked:
 1. **Reproduce & observe.** Run the failing thing (or inspect its state) to see the actual error with your own eyes.
   Never diagnose from the user's paraphrase alone.
 2. **Establish what changed.** Most breakage follows a change: updates, config edits, reboots, new hardware, expired
   certs/leases. Check timestamps, package logs, and recent history early.
 3. **Check the dumb stuff first.** Is the service running? Is it enabled? Is the interface up? Is the disk full? Is
   DNS resolving? Is the clock right? Cheap checks before deep theories.
 4. **Isolate by layer.** Split the problem space in half with each test:
   - Networking: bottom-up — link → IP/DHCP → routing → DNS → transport → application.
   - Software: process alive? → logs → config → dependencies/permissions → environment → binary itself.
   - Containers: daemon → image → container state → logs → mounts/networks → host resources.
 5. **Hypothesize, then test.** State your current best hypothesis in one line before each test, and change ONE
   variable at a time. If a test disproves the hypothesis, say so and pivot; don't quietly move on.
 6. **Fix the root cause, not the symptom.** A restart that "fixes" it without explanation is a data point, not a fix.
 7. **Verify.** After any fix, re-run the original failing operation and confirm it now works. No verification, no
   victory declaration.
 ## Evidence Gathering
 - Primary sources, in rough order of value: exit codes and stderr, service/app logs (`journalctl`, `docker logs`,
  files under `/var/log`), kernel messages (`dmesg`), state inspection (`systemctl status`, `ip`, `ss`, `df`, `free`,
  `lsblk`, `nmcli`, `docker ps/inspect`), then config files.
 - Make every command non-interactive and bounded: `--no-pager` for `journalctl`/`systemctl`, `-n`/`--since` to limit
  log output, `timeout 10 ...` for anything that might hang, `-c` counts for `ping`. Never launch interactive TUIs
  (top, htop, lazydocker itself) — use their batch/one-shot modes or underlying CLIs instead.
 - Prefer unprivileged commands. When root is genuinely required, say why and use `sudo` (the user may get a password
  prompt in their terminal — that's expected).
 - Search the web for exact error strings (quoted, with software name and version) when an error is unfamiliar or
  smells like a known bug or recent regression. Distro wikis, GitHub issues, and bug trackers beat guessing.
 ## Safety Rules
 Commands fall into three tiers:
 1. **Read-only / inspection** (status, logs, listing, ping, dig, cat): run freely, no permission needed.
 2. **Reversible state changes** (restart a service, bounce an interface, recreate a container, edit a config after
   backing it up): announce what you're about to do and why in one sentence, then do it. Back up any file before
   modifying it (`cp file file.bak.$(date +%s)`).
 3. **Destructive or hard-to-reverse actions** (deleting data or volumes, formatting, `dd`, partitioning, package
   removal, firewall flushes, forced resets): STOP and ask for explicit confirmation first, including the exact
   command and a rollback plan. Never run these on your own judgment.
 Additional hard rules:
 - Never print or transmit secrets. If command output contains tokens, keys, or passwords, redact them in your response.
 - Never disable security controls (firewalls, SELinux/AppArmor, certificate validation) as a "fix" — at most as a
  temporary, clearly-labeled isolation test, restored immediately after.
 - If the evidence points to failing hardware or risk of data loss, stop, say so plainly, and present options before
  touching anything else.
 ## Communication
 - Lead with what you found, not what you did. Then show the key evidence: the command and the relevant lines of its
  output (trimmed — never dump walls of text).
 - When the problem is multi-step, keep a running todo list so the user can follow the investigation.
 - On resolution, close with a short summary: **root cause → fix applied → how it was verified → how to prevent it**.
 - If you're blocked (needs physical access, a password you don't have, a reboot decision), say exactly what you need
  and what you'll do once you have it.
@@ -0,0 +1,346 @@
 # Docker sbx agent kit for Coyote
 #
 # Setup (paths use $HOME so commands work in bash/zsh/PowerShell/Git Bash):
 #   sbx create --kit ./sbx-kit/ coyote --name testing .
 #   sbx cp $HOME/.config/coyote/ testing:/home/agent/.config/
 #   sbx cp $HOME/.coyote_password testing:/home/agent/
 #   sbx run testing --kit ./sbx-kit/
 schemaVersion: "1"
 kind: sandbox
 name: coyote
 displayName: Coyote
 description: >
  An all-in-one, batteries-included LLM CLI tool featuring Shell Assistant,
  CLI & REPL mode, RAG, AI tools & agents, MCP servers, skills, and macros.
 sandbox:
  image: "docker/sandbox-templates:shell-docker"
  aiFilename: COYOTE.md
  entrypoint:
    run: ["bash", "-lc", "exec /home/agent/.cargo/bin/coyote"]
 network:
  # Proxy-managed LLM providers: the proxy substitutes `proxy-managed` for
  # the env var inside the sandbox and rewrites the auth header per
  # serviceAuth at request time. Multiple domains may map to one service
  # (e.g. jina) so they share a single credential.
  serviceDomains:
    api.openai.com: openai
    api.anthropic.com: anthropic
    generativelanguage.googleapis.com: gemini
    api.cohere.ai: cohere
    api.groq.com: groq
    openrouter.ai: openrouter
    api.ai21.com: ai21
    api.cloudflare.com: cloudflare
    api.deepinfra.com: deepinfra
    api.deepseek.com: deepseek
    api.mistral.ai: mistral
    api.perplexity.ai: perplexity
    api.voyageai.com: voyageai
    api.x.ai: xai
    api.jina.ai: jina
    r.jina.ai: jina
    qianfan.baidubce.com: ernie
    api.hunyuan.cloud.tencent.com: hunyuan
    api.minimax.chat: minimax
    api.moonshot.cn: moonshot
    dashscope.aliyuncs.com: qianwen
    open.bigmodel.cn: zhipuai
  serviceAuth:
    openai:
      headerName: Authorization
      valueFormat: "Bearer %s"
    anthropic:
      headerName: x-api-key
      valueFormat: "%s"
    gemini:
      headerName: x-goog-api-key
      valueFormat: "%s"
    cohere:
      headerName: Authorization
      valueFormat: "Bearer %s"
    groq:
      headerName: Authorization
      valueFormat: "Bearer %s"
    openrouter:
      headerName: Authorization
      valueFormat: "Bearer %s"
    ai21:
      headerName: Authorization
      valueFormat: "Bearer %s"
    cloudflare:
      headerName: Authorization
      valueFormat: "Bearer %s"
    deepinfra:
      headerName: Authorization
      valueFormat: "Bearer %s"
    deepseek:
      headerName: Authorization
      valueFormat: "Bearer %s"
    mistral:
      headerName: Authorization
      valueFormat: "Bearer %s"
    perplexity:
      headerName: Authorization
      valueFormat: "Bearer %s"
    voyageai:
      headerName: Authorization
      valueFormat: "Bearer %s"
    xai:
      headerName: Authorization
      valueFormat: "Bearer %s"
    jina:
      headerName: Authorization
      valueFormat: "Bearer %s"
    ernie:
      headerName: Authorization
      valueFormat: "Bearer %s"
    hunyuan:
      headerName: Authorization
      valueFormat: "Bearer %s"
    minimax:
      headerName: Authorization
      valueFormat: "Bearer %s"
    moonshot:
      headerName: Authorization
      valueFormat: "Bearer %s"
    qianwen:
      headerName: Authorization
      valueFormat: "Bearer %s"
    zhipuai:
      headerName: Authorization
      valueFormat: "Bearer %s"
  allowedDomains:
    # Coyote release + self-update + model-registry sync
    - "github.com:443"
    - "api.github.com:443"
    - "raw.githubusercontent.com:443"
    - "objects.githubusercontent.com:443"
    - "*.githubusercontent.com:443"
    # Coyote install paths (cargo install + uv + rustup + Python tool deps at runtime)
    - "crates.io:443"
    - "static.crates.io:443"
    - "pypi.org:443"
    - "files.pythonhosted.org:443"
    - "astral.sh:443"
    - "sh.rustup.rs:443"
    - "static.rust-lang.org:443"
    # LLM model OAuth + API endpoints
    - "claude.ai:443"
    - "console.anthropic.com:443"
    - "accounts.google.com:443"
    # *.googleapis.com covers oauth2 + userinfo + VertexAI regional endpoints
    # (*-aiplatform.googleapis.com). Do not narrow without re-checking VertexAI.
    - "*.googleapis.com:443"
    # Bedrock and GitHub Models use signed / GitHub-PAT auth that the proxy
    # cannot rewrite. Domains are allow-listed; credentials must be injected
    # separately (see README "Extending").
    - "*.amazonaws.com:443"
    - "models.inference.ai.azure.com:443"
 credentials:
  sources:
    openai:
      env:
        - OPENAI_API_KEY
    anthropic:
      env:
        - ANTHROPIC_API_KEY
    gemini:
      env:
        - GEMINI_API_KEY
        - GOOGLE_API_KEY
    cohere:
      env:
        - COHERE_API_KEY
    groq:
      env:
        - GROQ_API_KEY
    openrouter:
      env:
        - OPENROUTER_API_KEY
    ai21:
      env:
        - AI21_API_KEY
    cloudflare:
      env:
        - CLOUDFLARE_API_KEY
    deepinfra:
      env:
        - DEEPINFRA_API_KEY
    deepseek:
      env:
        - DEEPSEEK_API_KEY
    mistral:
      env:
        - MISTRAL_API_KEY
    perplexity:
      env:
        - PERPLEXITY_API_KEY
    voyageai:
      env:
        - VOYAGE_API_KEY
    xai:
      env:
        - XAI_API_KEY
    jina:
      env:
        - JINA_API_KEY
    ernie:
      env:
        - ERNIE_API_KEY
    hunyuan:
      env:
        - HUNYUAN_API_KEY
    minimax:
      env:
        - MINIMAX_API_KEY
    moonshot:
      env:
        - MOONSHOT_API_KEY
    qianwen:
      env:
        - DASHSCOPE_API_KEY
    zhipuai:
      env:
        - ZHIPUAI_API_KEY
 environment:
  variables:
    IS_SANDBOX: "1"
    COYOTE_LOG_LEVEL: INFO
    COYOTE_CONFIG_DIR: /home/agent/.config/coyote
  proxyManaged:
    - OPENAI_API_KEY
    - ANTHROPIC_API_KEY
    - GEMINI_API_KEY
    - GOOGLE_API_KEY
    - COHERE_API_KEY
    - GROQ_API_KEY
    - OPENROUTER_API_KEY
    - AI21_API_KEY
    - CLOUDFLARE_API_KEY
    - DEEPINFRA_API_KEY
    - DEEPSEEK_API_KEY
    - MISTRAL_API_KEY
    - PERPLEXITY_API_KEY
    - VOYAGE_API_KEY
    - XAI_API_KEY
    - JINA_API_KEY
    - ERNIE_API_KEY
    - HUNYUAN_API_KEY
    - MINIMAX_API_KEY
    - MOONSHOT_API_KEY
    - DASHSCOPE_API_KEY
    - ZHIPUAI_API_KEY
 commands:
  install:
    - command: |
        sudo apt-get update &&
        sudo apt-get install -y \
          jq curl git \
          build-essential pkg-config \
          cmake \
          clang libclang-dev \
          musl-tools \
          libssl-dev \
          pandoc \
          bzip2
      user: "1000"
      description: Install system prerequisites (including pandoc for fetch_url_via_curl)
    - command: |
        curl -LsSf https://astral.sh/uv/install.sh | sh
        if [ -f "$HOME/.local/bin/uv" ]; then
          printf '#!/bin/sh\nexec uv tool run "$@"\n' > "$HOME/.local/bin/uvx"
          chmod +x "$HOME/.local/bin/uvx"
        fi
      user: "1000"
      description: Install uv and write a uvx shell wrapper (the installer may place a macOS binary at this path on Docker-for-Mac hosts, which the Linux container cannot execute)
    - command: |
        set -euo pipefail
        USQL_VERSION=0.21.4
        ARCH=$(uname -m)
        case "$ARCH" in
          x86_64) USQL_ARCH=amd64 ;;
          aarch64) USQL_ARCH=arm64 ;;
          *) echo "Unsupported arch for usql install: $ARCH" >&2; exit 1 ;;
        esac
        TMPDIR=$(mktemp -d)
        trap 'rm -rf "$TMPDIR"' EXIT
        curl -fsSL --retry 3 "https://github.com/xo/usql/releases/download/v${USQL_VERSION}/usql_static-${USQL_VERSION}-linux-${USQL_ARCH}.tar.bz2" -o "$TMPDIR/usql.tar.bz2"
        tar -xjf "$TMPDIR/usql.tar.bz2" -C "$TMPDIR"
        sudo install -m 0755 "$TMPDIR/usql_static" /usr/local/bin/usql
      user: "1000"
      description: Install the usql universal SQL CLI (used by the built-in sql agent and execute_sql_code tool)
    - command: |
        curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
          sh -s -- -y \
          --default-toolchain stable \
          --profile minimal \
          --target x86_64-unknown-linux-musl
        . "$HOME/.cargo/env"
        cargo install --locked coyote-ai
      user: "1000"
      description: Install Coyote AI CLI via Rust's Cargo
    - command: |
        . "$HOME/.cargo/env"
        cargo install --locked iwec
      user: "1000"
      description: Install the IWE MCP server binary (iwec) used by the built-in iwe MCP server and iwe-knowledge-base skill
    - command: |
        . "$HOME/.cargo/env"
        cargo install --locked ast-grep
      user: "1000"
      description: Install ast-grep, used by the built-in ast_grep structural code search tool (and the explore agent)
  startup:
    - command:
        [
          "sh",
          "-c",
          'test -f "$HOME/.config/coyote/config.yaml" || coyote --info >/dev/null 2>&1 || true',
        ]
      user: "1000"
      background: false
      description: Bootstrap Coyote config directory on first sandbox start
 agentContext: |
  ## Sandbox environment
  You are running inside a Docker sandbox launched via `sbx run coyote`. The
  user's project workspace is mounted at its absolute host path and is the
  current working directory. `sudo` is passwordless; use it for system
  package installs.
  Coyote's configuration lives at `~/.config/coyote/` and logs at
  `~/.cache/coyote/coyote.log`. Persistence is enabled, so config, sessions,
  vault state, OAuth tokens, and installed tools survive sandbox restarts.
  LLM provider credentials are forwarded by the sandbox HTTP proxy. The
  following provider env vars are recognized - export the ones you use on
  the host before running `sbx run coyote`:
    OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY / GOOGLE_API_KEY,
    COHERE_API_KEY, GROQ_API_KEY, OPENROUTER_API_KEY, AI21_API_KEY,
    CLOUDFLARE_API_KEY, DEEPINFRA_API_KEY, DEEPSEEK_API_KEY,
    MISTRAL_API_KEY, PERPLEXITY_API_KEY, VOYAGE_API_KEY, XAI_API_KEY,
    JINA_API_KEY, ERNIE_API_KEY, HUNYUAN_API_KEY, MINIMAX_API_KEY, 
    MOONSHOT_API_KEY, DASHSCOPE_API_KEY (Qwen), ZHIPUAI_API_KEY
  Inside the sandbox these appear as the placeholder string `proxy-managed`;
  the proxy substitutes the real value at request time. OAuth flows for
  Claude Pro/Max and Gemini are also allow-listed.
  Bedrock (AWS) and VertexAI (Google Cloud) use signed/OAuth-token requests
  that the proxy cannot rewrite. Their domains are allow-listed but you must
  inject credentials yourself via `sbx run --env AWS_ACCESS_KEY_ID=...` or
  a mixin kit that mounts a service-account JSON.
  Useful first-run commands:
  - `coyote --info`          # show config paths and resolved settings
  - `coyote --list-secrets`  # initialise the local vault
  - `coyote --authenticate <client>`  # OAuth flow (Claude Pro/Max, Gemini)
@@ -0,0 +1,33 @@
 schemaVersion: "1"
 kind: mixin
 name: vault-aws-secrets-manager
 description: >
  Installs the AWS CLI v2 so the Coyote vault can read secrets from AWS
  Secrets Manager inside the sandbox. The AWS Rust SDK does not strictly
  require the CLI, but most users authenticate via `aws sso login` or
  `aws configure`, which need the CLI to be installed. After install, run
  the appropriate auth command in the sandbox; cached credentials persist
  for the lifetime of the sandbox.
 network:
  allowedDomains:
    - "awscli.amazonaws.com:443"
    - "sts.amazonaws.com:443"
    - "*.sts.amazonaws.com:443"
    - "*.secretsmanager.amazonaws.com:443"
    - "*.amazonaws.com:443"
    - "*.awsapps.com:443"
 commands:
  install:
    - command: |
        set -euo pipefail
        sudo apt-get update
        sudo apt-get install -y unzip
        ARCH=$(uname -m)
        curl -sSL "https://awscli.amazonaws.com/awscli-exe-linux-${ARCH}.zip" -o /tmp/awscliv2.zip
        unzip -q /tmp/awscliv2.zip -d /tmp
        sudo /tmp/aws/install
        rm -rf /tmp/awscliv2.zip /tmp/aws
      user: "1000"
      description: Install AWS CLI v2 from the official installer
@@ -0,0 +1,24 @@
 schemaVersion: "1"
 kind: mixin
 name: vault-azure-key-vault
 description: >
  Installs the Azure CLI (`az`) so the Coyote vault can read secrets from
  Azure Key Vault inside the sandbox. After install, run `az login` in the
  sandbox to authenticate; the session token persists for the lifetime of
  the sandbox.
 network:
  allowedDomains:
    - "aka.ms:443"
    - "packages.microsoft.com:443"
    - "azurecliprod.blob.core.windows.net:443"
    - "login.microsoftonline.com:443"
    - "graph.microsoft.com:443"
    - "management.azure.com:443"
    - "*.vault.azure.net:443"
 commands:
  install:
    - command: "curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash"
      user: "1000"
      description: Install Azure CLI via Microsoft's official install script
@@ -0,0 +1,34 @@
 schemaVersion: "1"
 kind: mixin
 name: vault-gcp-secret-manager
 description: >
  Installs the Google Cloud CLI (`gcloud`) so the Coyote vault can read
  secrets from GCP Secret Manager inside the sandbox. The GCP Rust SDK does
  not strictly require the CLI, but most users authenticate via
  `gcloud auth application-default login`, which needs the CLI to be
  installed. After install, run that command in the sandbox; the ADC file
  persists for the lifetime of the sandbox.
 network:
  allowedDomains:
    - "packages.cloud.google.com:443"
    - "accounts.google.com:443"
    - "oauth2.googleapis.com:443"
    - "secretmanager.googleapis.com:443"
    - "cloudresourcemanager.googleapis.com:443"
    - "*.googleapis.com:443"
 commands:
  install:
    - command: |
        set -euo pipefail
        sudo apt-get update
        sudo apt-get install -y apt-transport-https ca-certificates gnupg
        echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
          | sudo tee /etc/apt/sources.list.d/google-cloud-sdk.list >/dev/null
        curl -sSL https://packages.cloud.google.com/apt/doc/apt-key.gpg \
          | sudo gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
        sudo apt-get update
        sudo apt-get install -y google-cloud-cli
      user: "1000"
      description: Install gcloud CLI from Google's official apt repository
@@ -0,0 +1,30 @@
 schemaVersion: "1"
 kind: mixin
 name: vault-gopass
 description: >
  Installs `gopass` and `gpg` so the Coyote vault can read secrets from a
  gopass store inside the sandbox. The store must be cloned manually
  (gopass walks a user-specific git remote, so v1 only allowlists github.com
  and gitlab.com; add other hosts via a user mixin if needed). After install,
  run `gopass setup` or `gopass clone <remote>` in the sandbox.
 network:
  allowedDomains:
    - "github.com:443"
    - "api.github.com:443"
    - "objects.githubusercontent.com:443"
    - "gitlab.com:443"
 commands:
  install:
    - command: |
        set -euo pipefail
        sudo apt-get update
        sudo apt-get install -y gnupg2 git
        GOPASS_VERSION="1.15.13"
        ARCH=$(dpkg --print-architecture)
        curl -sSL "https://github.com/gopasspw/gopass/releases/download/v${GOPASS_VERSION}/gopass_${GOPASS_VERSION}_linux_${ARCH}.deb" -o /tmp/gopass.deb
        sudo dpkg -i /tmp/gopass.deb
        rm -f /tmp/gopass.deb
      user: "1000"
      description: Install gnupg2, git, and gopass from the official .deb release
@@ -0,0 +1,31 @@
 schemaVersion: "1"
 kind: mixin
 name: vault-one-password
 description: >
  Installs the 1Password CLI (`op`) so the Coyote vault can decrypt secrets
  inside the sandbox. After install, run `op signin` in the sandbox to
  authenticate; credentials persist for the lifetime of the sandbox.
 network:
  allowedDomains:
    - "downloads.1password.com:443"
    - "cache.agilebits.com:443"
    - "my.1password.com:443"
    - "my.1password.eu:443"
    - "my.1password.ca:443"
    - "events.1password.com:443"
 commands:
  install:
    - command: |
        set -euo pipefail
        sudo apt-get update
        sudo apt-get install -y unzip
        OP_VERSION="v2.30.3"
        ARCH=$(dpkg --print-architecture)
        curl -sSL "https://cache.agilebits.com/dist/1P/op2/pkg/${OP_VERSION}/op_linux_${ARCH}_${OP_VERSION}.zip" -o /tmp/op.zip
        sudo unzip -od /usr/local/bin /tmp/op.zip op
        sudo chmod +x /usr/local/bin/op
        rm -f /tmp/op.zip
      user: "1000"
      description: Install 1Password CLI from the official archive
@@ -37,7 +37,7 @@ Every `agent__spawn` result includes a session_id. **Use it.**
 Starting a fresh agent for a follow-up forces it to re-read every file it already read. That's 70%+ wasted tokens, plus the agent loses the reasoning it built up.
-After every delegation, **store the session_id** for potential continuation.
+After every delegation, **store the session_id compression-safe** for potential continuation. Long sessions compress: chat history gets replaced by a summary, and a session_id that exists only in chat history is unresumable afterward. Embed it in the todo item for that work — `todo__add "Implement auth endpoint (coder ses_abc123)"` — or in your run-state memory file. The todo list and memory survive compression; the conversation does not.
 ## Skill nudges to delegates
@@ -0,0 +1,40 @@
 ---
 description: Systematic troubleshooting of technical issues (services, networking, containers, OS) by running diagnostic commands directly instead of asking the user to.
 enabled_tools: execute_command
 ---
 A technical problem needs diagnosing. Apply this methodology strictly. Use the `execute_command` tool to gather
 evidence yourself — never ask the user to run commands and paste output back.
 ## Loop
 1. **Reproduce first.** Run the failing thing and read the actual error before theorizing.
 2. **Ask "what changed?"** Updates, config edits, reboots, expirations. Check recent history early.
 3. **Cheap checks first.** Service running/enabled? Interface up? Disk full? DNS resolving? Clock right?
 4. **Isolate by layer, one variable at a time.** Network: link → IP → routing → DNS → transport → app.
   Software: process → logs → config → deps/permissions → environment. Containers: daemon → image → container →
   logs → mounts/networks → host.
 5. **State each hypothesis in one line before testing it.** Pivot openly when disproved.
 6. **Fix root cause, then verify** by re-running the original failing operation. No verification, no fix.
 ## Command Discipline
 - Non-interactive and bounded, always: `--no-pager`, `-n`/`--since` on logs, `timeout 10` on anything that might
  hang, `-c` on ping. No TUIs — use batch modes.
 - Unprivileged first; `sudo` only when required, stating why.
 - Web-search exact quoted error strings (with software name + version) for unfamiliar errors.
 ## Safety Tiers
 1. **Read-only** (status, logs, ls, cat, ping, dig): run freely.
 2. **Reversible changes** (service restart, interface bounce, config edit): announce in one sentence, back up files
   first (`cp file file.bak.$(date +%s)`), then do it.
 3. **Destructive** (data/volume deletion, formatting, `dd`, package removal, firewall flush): require explicit user
   confirmation with the exact command and a rollback plan. Never on your own judgment.
 Redact any secrets appearing in command output. Never disable security controls as a "fix". Stop and present options
 if evidence suggests failing hardware or data-loss risk.
 ## Reporting
 Lead with findings, show trimmed key evidence, and close resolved issues with: root cause → fix → verification →
 prevention.
@@ -0,0 +1,78 @@
 ---
 description: Schema and discipline for writing and reading step handoff documents - the only channel between implementation steps. Evidence must be pasted, downstream plan changes proposed not imposed. Grants filesystem access for reading and writing handoffs.
 enabled_tools: fs_read, fs_cat, fs_ls, fs_write
 ---
 A handoff is the ONLY channel between step N and step N+1. The next executor runs in a fresh session: it sees the plan repo, the code, and this document — nothing else. Whatever you learned that isn't in the handoff (or in `plans/NOTES.md`) is lost. Write accordingly.
 Handoffs live in `plans/handoffs/`, named to match their step plan: `plans/handoffs/03-<slug>.md` for `plans/steps/03-<slug>.md`.
 ## Required schema (writer)
 Frontmatter:
 ```yaml
 ---
 step: 3
 title: Add retry policy to the fetch client
 result: complete   # complete | partial | blocked
 ---
 ```
 Sections, all mandatory (write "None" rather than omitting — an absent section is indistinguishable from a forgotten one):
 | Section | Contents |
 |---|---|
 | Summary | 2-4 sentences: what exists now that didn't before |
 | Completed | Task-by-task, mirroring the plan's Tasks section |
 | Not completed | Deferred or dropped tasks, each WITH a reason |
 | Deviations | Every departure from the plan: what the plan said, what you did, why |
 | Downstream plan updates | Edge-case annotations made directly (which plan, which section) and proposed diffs awaiting approval (see below) |
 | Edge cases discovered | Found during implementation — including ones you handled, so the next step knows they're covered |
 | Evidence | Pasted verbatim: format/lint/build/test commands, exit codes, salient output lines. Note pre-existing failures explicitly |
 | Notes for next step | Warnings, gotchas, invariants the next executor must not violate |
 ## Evidence rules
 Assertions are not evidence. "Tests pass" is a claim; this is evidence:
 ```
 $ cargo test
   ...
 test result: ok. 47 passed; 0 failed; exit code 0
 ```
 - Paste the command, the exit code, and the decisive output lines (not the full log).
 - Evidence must reflect the FINAL state of the code — collected after formatting and linting, re-collected after any post-review fix.
 - If a check was skipped (no formatter configured, etc.), say so explicitly.
 ## Downstream plan updates: annotate vs propose
 Two classes, with different authority:
 - **Annotations (make directly).** Adding an entry to a later plan's Edge cases section. Additive, non-scope-changing. Record each in Downstream plan updates.
 - **Proposals (never apply directly).** Anything touching a later plan's Objective, Tasks, Acceptance criteria, or Out of scope. Write the change as a fenced before/after diff in Downstream plan updates and flag it at the approval gate. The user applies or rejects it.
 The executor who rationalizes a shortcut must not be able to quietly rewrite the spec they'll be judged against — that is why scope changes route through the user.
 ## Rolling notes vs handoff
 - **Handoff**: step-scoped. What happened in THIS step.
 - **`plans/NOTES.md`**: durable, step-independent facts ("config loader lowercases all keys", "integration tests need docker running"). Append; never rewrite others' entries. Without this file, facts discovered in step 2 are invisible to step 7, because step 7 reads only step 6's handoff.
 ## Reading a handoff (start of a step)
 1. Check `result`. `partial` or `blocked` → read Not completed first; your plan's `depends_on` may not actually be satisfied. Escalate rather than build on missing ground.
 2. Trust what has pasted evidence. Re-verify bare assertions before depending on them.
 3. Apply Notes for next step and any approved proposals aimed at your step, BEFORE the staleness check.
 4. Treat Deviations as corrections to your mental model of the codebase — the plans upstream of you described code that no longer exists as written.
 5. Read `plans/NOTES.md` — handoffs chain pairwise; the rolling notes are the only cumulative memory.
 ## Anti-patterns
 - "All tests pass" with nothing pasted — a claim, not a handoff
 - Omitting a section instead of writing "None" — forgotten or empty, the reader can't tell
 - Editing a later plan's Tasks or scope directly instead of proposing a diff
 - Burying a major deviation in prose instead of the Deviations section
 - Durable facts in the handoff only — lost after one more step
 - Evidence collected before the formatter ran — the pasted output describes bytes that no longer exist
 - Writing the handoff before the completion gate (todos done or deferred-with-reason) is satisfied
@@ -0,0 +1,65 @@
 ---
 description: Navigate and curate markdown knowledge bases (plan repos, spec repos, companion docs) with IWE graph tools. Load when the workspace is or contains a markdown knowledge base and the task involves finding, reading, or reorganizing plans, specs, designs, or notes. Activates the iwe MCP server rooted at the current directory.
 enabled_mcp_servers: iwe
 ---
 You are working with a markdown knowledge base through IWE, a graph-based knowledge tool. The `iwe` MCP server is rooted at the current working directory (`--project .`), so the knowledge base is the directory Coyote was launched in. IWE derives structure from links: a link on its own line is an *inclusion link* (parent-child hierarchy); a link inside text is an *inline reference* (cross-reference, produces backlinks). The server watches the filesystem, so external edits are picked up automatically — never ask for a restart.
 ## When to use this (and when not)
 Use IWE tools when the task involves a corpus of markdown documents: plan repositories, spec/design collections, companion docs repos, meeting notes, PKM vaults.
 Do NOT use IWE tools for:
 - **Agent memory** (`.coyote/memory/`, `COYOTE.md`) — use the `memory__*` tools; they own the index conventions there.
 - **Semantic/similarity search over documents** — that is RAG's job. IWE search is fuzzy title/key matching plus structural traversal, not embeddings.
 - **Source code** — IWE only understands markdown.
 If unsure whether the current directory is actually a knowledge base, probe with `iwe_stats` first. Few or zero documents means this skill does not apply; unload it rather than forcing the tools.
 ## Orientation protocol (always start here)
 Never guess document keys. Orient first:
 1. `iwe_stats` — corpus size and shape. Cheap sanity check.
 2. `iwe_find(query="<topic>")` — fuzzy search for entry points. Use `roots` behavior via structural selectors when you want top-level topics only.
 3. `iwe_tree(key="<entry>", max_depth=2)` — see the hierarchy before reading bodies.
 4. `iwe_retrieve(key="<entry>", depth=1, context=1)` — read with structure.
 ## Reading efficiently
 `iwe_retrieve` is the workhorse. Control cost explicitly:
 - `depth` — how many levels of included children to expand. Start at 1-2; increase only if needed.
 - `context` — parent levels to include, so you know where a document sits. `context=1` is usually enough.
 - `max_tokens` — ALWAYS set a budget (e.g. 2000-4000) on large corpora; results report truncation so you can drill further deliberately.
 - `exclude` — pass keys you have already read to avoid re-retrieving known content.
 - `links` / `backlinks` — include outbound/inbound references when tracing how a topic connects.
 Scope searches structurally with selectors on `iwe_find`/`iwe_retrieve`/`iwe_tree`:
 - `in` — only sub-documents of EVERY listed key (AND)
 - `in_any` — sub-documents of at least one key (OR)
 - `not_in` — exclude subtrees (e.g. archives)
 Filter by frontmatter with the YAML query language: `status: draft`, `created: {$gte: "2026-01-01"}`, `tags: {$in: [urgent]}`, `reviewed: {$exists: true}`.
 Use `iwe_squash(key=...)` to flatten a subtree into one linear document — good for producing a full plan readout or summary input.
 ## Writing and refactoring
 Write tools: `iwe_create` (new doc from title + content), `iwe_update` (replace a doc's content), `iwe_delete` (remove + clean up references). Refactor tools: `iwe_rename` (key rename with automatic link updates everywhere), `iwe_extract` (split a section into its own doc, leaving an inclusion link), `iwe_inline` (merge a referenced doc back into its parent), `iwe_normalize` (reformat all docs consistently).
 Rules:
 - **Preview destructive operations**: `iwe_rename`, `iwe_delete`, `iwe_extract`, `iwe_inline`, and `iwe_normalize` support `dry_run` — use it first, show the user what will change, then apply.
 - Never rename or delete by editing files directly; the refactor tools update every referencing document, manual edits break links.
 - When adding a document, link it from an existing parent (inclusion link on its own line) so it joins the hierarchy instead of becoming an orphan.
 - Match the corpus conventions: check an existing document's frontmatter fields before inventing your own schema.
 - Do not run `iwe_normalize` across someone's knowledge base unprompted — it rewrites every file's formatting.
 ## Anti-patterns
 - Retrieving with `depth=5` and no `max_tokens` "to get everything" — you will flood the context. Iterate: shallow first, drill selectively.
 - Calling `iwe_find` repeatedly with rephrased queries when structural navigation (`iwe_tree`, selectors) would locate the document deterministically.
 - Using IWE write tools on `.coyote/memory/` files — wrong tier; that corrupts the memory index.
 - Creating documents without linking them into the hierarchy — orphans are invisible to depth-based retrieval.
@@ -0,0 +1,82 @@
 ---
 description: Author executable high-level plans and per-step implementation plans for phased work. Defines the plan repo layout and step-plan schema. Grants filesystem access for grounding plans in real code.
 enabled_tools: fs_read, fs_grep, fs_glob, fs_ls, fs_cat, fs_write
 ---
 You are writing implementation plans that a DIFFERENT agent will execute later, in a fresh session, with zero access to this conversation. The plan IS the executor's entire context. A plan that needs the conversation to make sense is a broken plan.
 ## Plan repo layout
 Default layout (match the existing layout instead if the repo already has one):
 ```
 plans/
  plan.md            # high-level plan; links each step plan
  steps/01-<slug>.md # one file per step, numbered in execution order
  handoffs/          # written by executors; see `handoff-protocol`
  NOTES.md           # rolling durable facts discovered during execution
 ```
 In `plan.md`, link each step plan with an inclusion link (the link alone on its own line). This makes the plan repo an IWE hierarchy — agents navigating a large plan corpus can load `iwe-knowledge-base` and traverse it structurally instead of globbing.
 ## High-level plan requirements
 - Ordered list of steps. Each step is independently implementable and independently verifiable — it compiles and its tests pass WITHOUT any later step existing.
 - The dependency graph is explicit and acyclic. If step 4 needs step 2's API, step 4's plan says so.
 - Steps are sized for one focused session: roughly 1-5 files of meaningful change. A step that needs "and then also..." is two steps.
 - State what the plan does NOT cover. Scope creep starts where scope boundaries are implicit.
 ## Step plan schema
 Every step plan starts with frontmatter:
 ```yaml
 ---
 step: 3
 title: Add retry policy to the fetch client
 depends_on: [1, 2]
 status: pending   # pending | in-progress | complete
 ---
 ```
 And contains these sections, all mandatory:
 | Section | Contents |
 |---|---|
 | Objective | 1-3 sentences: what exists after this step that didn't before |
 | Context | File paths AND pasted code snippets (5-20 lines) showing the patterns to follow. Not just paths — actual code |
 | Tasks | Ordered, atomic tasks. Each maps to one todo item for the executor |
 | Acceptance criteria | Measurable behaviors. These become the tests |
 | Test commands | Exact commands to run, from the repo root |
 | Edge cases | Known edge cases this step must handle or explicitly punt on |
 | Out of scope | What the executor must NOT touch, even if tempting |
 ## Writing for a context-free executor
 - Paste code snippets from your exploration into Context. "Follow the pattern in foo.rs" forces the executor to re-do exploration you already did.
 - Use repo-relative paths from the project root. Never "the file we discussed."
 - Name symbols exactly: `RetryPolicy::backoff`, not "the backoff logic."
 - If a decision was made in discussion (X over Y), record the decision AND the one-line reason. The executor will face the same fork and must not re-litigate it.
 - Write acceptance criteria as observable behavior ("returns 429 after 3 failed attempts"), not implementation ("uses a for loop"). Criteria that describe implementation produce tautological tests.
 ## Grounding (before the plan is done)
 Plans rot when written from memory. Before finalizing each step plan:
 1. `fs_grep` every symbol the plan references — confirm it exists and is spelled right.
 2. `fs_read` the files listed in Context — confirm the pasted snippets are current.
 3. Confirm the test commands actually exist (check `justfile`, `Makefile`, `package.json` scripts, CI config).
 A plan referencing a function that doesn't exist fails the executor at the worst possible time: mid-implementation.
 ## Edge cases are a first-class section
 For every step, enumerate the edge cases you can foresee: empty inputs, concurrent access, error paths, partial failures, migration/compat concerns. If an edge case belongs to a LATER step, write it in that step's plan now — not in a comment, not in your head. Executors are instructed to propagate newly discovered edge cases downstream; make their diff small by having the section exist.
 ## Anti-patterns
 - "As discussed above" / "per our conversation" — the executor has no conversation
 - File paths without pasted snippets in Context — forces re-exploration
 - Acceptance criteria like "works correctly" — unmeasurable, untestable
 - A step that depends on a later step — cycle; re-order or merge
 - Omitting Out of scope — the executor will helpfully refactor things you didn't ask for
 - Frontmatter without `depends_on` or `status` — breaks status queries and dependency checks
@@ -0,0 +1,83 @@
 ---
 description: Adversarial review of implementation plans against executability, verifiability, and completeness standards. Verdict is OKAY or REJECT with line-referenced complaints. Grants read-only filesystem access for ground-truth checks.
 enabled_tools: fs_read, fs_grep, fs_glob, fs_ls, fs_cat
 ---
 You are reviewing an implementation plan BEFORE any code is written. You are the critic, not a co-author: your job is to find the ways this plan fails an executor who has zero conversation context, not to redesign the approach. A flaw caught here costs one plan edit; the same flaw caught mid-implementation costs a deviation, a handoff note, and possibly rework across steps.
 The plan schema you are checking against is defined in the `plan-authoring` skill — load it alongside this one if it is not already loaded.
 ## Review checklist (in order)
 ### 1. Executability without context
 Read the plan as if you know nothing but what is on the page.
 - Does every referenced decision carry its rationale, or does it assume a conversation you can't see?
 - Does Context contain pasted code snippets, or only file paths (which force re-exploration)?
 - Are symbols named exactly? "The validation logic" is not a name.
 ### 2. Ground truth (verify, don't trust)
 Plans are written from exploration that may be stale or wrong. Spot-check claims against the actual codebase:
 - `fs_grep` for every function, type, and file the plan references. Flag anything that doesn't exist or is spelled differently.
 - `fs_read` 1-2 of the pasted Context snippets at their claimed locations. Flag drift.
 - Check that the Test commands exist (`justfile`, `Makefile`, `package.json`, CI config).
 A plan that references phantom code is an automatic REJECT.
 ### 3. Verifiability
 - Is every acceptance criterion a measurable, observable behavior? "Works correctly" and "is robust" are unmeasurable — flag them.
 - Do the criteria describe behavior rather than implementation? Implementation-shaped criteria produce tautological tests.
 - Can each criterion be checked by the listed Test commands, or is there a criterion with no way to verify it?
 ### 4. Dependencies and ordering
 - Is `depends_on` present, acyclic, and complete? If the step uses an API introduced in step N, is N listed?
 - Does anything in this step silently assume a LATER step's output? That's a cycle the frontmatter hides.
 - Is the step independently verifiable — will it build and pass tests without later steps existing?
 ### 5. Scope and sizing
 - Is Out of scope present and specific? Absent scope boundaries invite helpful refactoring.
 - Is the step sized for one focused session (~1-5 files of meaningful change)? Flag steps hiding an "and then also".
 - Do two steps touch the same code region without an ordering constraint between them?
 ### 6. Edge cases
 - Is the Edge cases section present and non-empty (or explicitly "none foreseen — <reason>")?
 - Think adversarially for 60 seconds: empty inputs, concurrency, error paths, partial failure, compat. Anything obvious the plan misses?
 - If this step creates a new surface (API, config, schema), do DOWNSTREAM step plans account for it where they must?
 ## Verdict format
 End with exactly one of:
 ```
 PLAN_REVIEW: OKAY
 <optional: 1-3 non-blocking observations>
 ```
 ```
 PLAN_REVIEW: REJECT
 Complaints:
 1. <file>:<line or section> — <what is wrong> — <what would fix it>
 2. ...
 ```
 Every complaint must be actionable and point at a specific location. "The plan could be clearer" is noise; "steps/03-retry.md, Acceptance criteria #2 — 'handles errors gracefully' is unmeasurable — specify the expected behavior per error class" is signal.
 ## Scope discipline
 - Review THE PLAN, not the design. If the approach is defensible, do not relitigate it because you'd have chosen differently. Flag design only when it is factually broken (races, missing dependency, contradicts the codebase).
 - Do not rewrite the plan yourself. Complaints, not patches — the author owns the fix.
 - Three strong complaints beat fifteen weak ones. If you have fifteen, the plan needs a rewrite, not a list: say so.
 ## Anti-patterns
 - Approving without running a single ground-truth check — a syntax review, not a plan review
 - REJECT for style or phrasing while missing a phantom-symbol reference
 - Redesigning the author's approach in your complaints
 - Vague complaints with no location and no fix direction
 - Rubber-stamping a step with no acceptance criteria because "the tasks look reasonable"
@@ -0,0 +1,85 @@
 ---
 description: End-to-end protocol for executing one step of a phased implementation plan - orient, staleness check, checklist, implement, edge-case sweep, verify, review, handoff, approval. Grants shell access for build/test commands.
 enabled_tools: execute_command
 ---
 You are executing ONE step of a phased implementation plan. Previous steps were executed in sessions you cannot see; later steps depend on what you do and document. The protocol below is ordered — do not skip phases, do not reorder them.
 Companion skills: load `handoff-protocol` before Phase 1 (you must READ a handoff correctly) and keep it loaded for Phase 8 (you must WRITE one). Load `verification-gates` for Phase 6. The plan schema is defined in `plan-authoring`.
 ## Phase 1 - Orient
 1. Read the previous step's handoff (`plans/handoffs/`, highest step number below yours). If none exists, you are step 1.
 2. Read the current step plan (`plans/steps/`). Note its `depends_on` — confirm those steps' handoffs exist and report success. If a dependency failed or is missing, STOP and escalate via `user__ask`.
 3. Read `plans/NOTES.md` for durable facts discovered by earlier steps.
 4. Apply anything the previous handoff directed at your step (approved plan updates, warnings).
 5. Set the plan's frontmatter `status: in-progress`.
 ## Phase 2 - Staleness check (BEFORE any edit)
 The plan was written before steps 1..N-1 changed the codebase. Verify its assumptions still hold:
 - Grep the symbols the plan references — do they still exist, with the claimed signatures?
 - Read the plan's Context snippets at their claimed locations — has the code drifted?
 - Confirm the Test commands still work.
 Discrepancies are deviations — handle them via Phase 5's protocol BEFORE implementing. Executing a stale plan literally is the primary failure mode of phased work.
 ## Phase 3 - Checklist
 `todo__init` with the step objective, then one `todo__add` per task in the plan's Tasks section, in order. Append the protocol's own gates as todos: edge-case sweep, verify, review, handoff. Mark items done with `todo__done` as you go — never batch. The checklist is what survives context compression; keep it truthful.
 When you spawn an agent whose session you may need to resume, embed its session_id in the corresponding todo item text (`"Implement task 3 (coder ses_abc123)"`). If your context gets compressed mid-step, the plan repo tells you WHAT the step is and the todo list tells you WHERE you are and WHICH sessions to resume — re-orient from those, not from the summary's recollection.
 ## Phase 4 - Implement
 - Implement ONLY what the plan's Tasks and Objective ask. Out of scope means out of scope.
 - Follow the patterns pasted in the plan's Context. When plan and current codebase disagree, the codebase wins — record the deviation.
 - Write tests from the plan's Acceptance criteria, not from your implementation. Criteria-first tests catch what tautological tests cannot.
 - While in the code, note (do not fix) anything the planning exploration missed — feed it to Phase 5.
 ## Phase 5 - Edge-case sweep and deviations
 **Edge cases.** For each edge case you discovered: if it belongs to THIS step, handle it (or punt explicitly in the handoff with a reason). If it belongs to a LATER step, check that step's plan — if the plan already covers it, done; if not, add it to that plan's Edge cases section and record the addition in your handoff.
 **Deviations.** Classify each:
 | Class | Definition | Action |
 |---|---|---|
 | Minor | Same objective and scope, mechanics differ (renamed symbol, moved file, extra helper) | Resolve it, document in handoff |
 | Major | Changes scope, approach, interfaces, or invalidates a later step's assumptions | Do NOT silently proceed. Either escalate via `user__ask`, or write a proposed downstream-plan diff into the handoff per `handoff-protocol` |
 Never rewrite a later step's Objective, Tasks, or Out of scope directly — edge-case annotations are the only direct downstream edit you may make.
 ## Phase 6 - Verify (order matters)
 1. Formatter (if configured) — format BEFORE collecting evidence, so evidence reflects final code.
 2. Linter (if configured) — fix findings your change introduced.
 3. Build/typecheck — exit code 0.
 4. FULL test suite — not just your new tests; regressions in untouched code are your problem if your change caused them.
 Capture commands and exit codes verbatim — they go in the handoff as evidence. Pre-existing failures: note explicitly, don't fix, don't hide. Apply the 3-strike rule: after 3 failed fix attempts, stop, revert to working state, escalate.
 ## Phase 7 - Review
 Self-review the diff with `code-review` + `ai-slop-remover` loaded. For broad steps (5+ files or crossing architectural boundaries), request an independent pass (`code-reviewer` agent) instead. Fix blockers; re-run Phase 6 after any fix.
 ## Phase 8 - Handoff
 Gate: every todo is either done or explicitly deferred with a reason. No silent drops.
 Write the handoff per `handoff-protocol` — schema, pasted evidence, deviations, downstream updates, notes for the next step. Append durable, step-independent facts to `plans/NOTES.md`. Set the plan's frontmatter `status: complete`.
 ## Phase 9 - User approval
 Present: what was done, deviations, downstream plan changes (made or proposed), evidence summary, handoff location. Then STOP — do not begin the next step. If the user requests changes, address them, re-run Phase 6, update the handoff, and present again.
 ## Anti-patterns
 - Editing code before the staleness check — the primary source of mid-step surprises
 - Implementing "while I'm here" improvements outside the plan's scope
 - Tests derived from the implementation instead of the acceptance criteria
 - Collecting build/test evidence BEFORE formatting/linting, then shipping different bytes
 - Running only your new tests and claiming "tests pass"
 - Silently absorbing a major deviation instead of escalating or proposing a plan diff
 - Rewriting downstream plan scope directly instead of proposing per `handoff-protocol`
 - Starting the next step without user approval
@@ -26,9 +26,6 @@ auto_continue: false             # Enable automatic continuation when incomplete
 max_auto_continues: 10           # Maximum number of automatic continuations before stopping
 inject_todo_instructions: true   # Inject the default todo tool usage instructions into the agent's system prompt
 continuation_prompt: null        # Custom prompt used when auto-continuing (optional; uses default if null)
 inject_skill_instructions: true  # Inject a short hint pointing the model at `skill__list` when skills are enabled
                                 # (default: true). Suppressed automatically when no skills are available.
 skill_instructions: null         # Custom text for the skill hint (optional; uses built-in default if null)
 # Sub-Agent Spawning System
 # Enable this agent to spawn and manage child agents in parallel.
 # See https://github.com/Dark-Alex-17/coyote/wiki/Agents for detailed documentation.
@@ -51,6 +48,12 @@ enabled_skills:                  # Optional list of skills available when this a
                                 # Must be a subset of global `visible_skills`. Omit to inherit the global default.
  - git-master
  - ai-slop-remover
 inject_skill_instructions: true  # Inject a short hint pointing the model at `skill__list` when skills are enabled
                                 # (default: true). Suppressed automatically when no skills are available.
 skill_instructions: null         # Custom text for the skill hint (optional; uses built-in default if null)
 memory: null                     # Per-agent memory override (default: inherit). Set to `false` to disable memory
                                 # for this agent regardless of workspace/global presence. See the Memory wiki page.
 dynamic_instructions: false      # Whether to use dynamic instructions for the agent; if false, static instructions are used
 instructions: |                  # Static instructions for the agent; ignored if dynamic instructions are used
  You are a AI agent designed to demonstrate agent capabilities.
@@ -91,6 +91,7 @@ enabled_tools: null              # Which tools to enable by default.
                                 # Example (comma-separated form):
                                 #   enabled_tools: fs,web_search_coyote
 visible_tools:                   # Which tools are visible to be compiled (and are thus able to be defined in 'enabled_tools')
 #  - ast_grep.sh
 #  - demo_py.py
 #  - demo_sh.sh
 #  - demo_ts.ts
@@ -137,21 +138,25 @@ enabled_mcp_servers: null        # Which MCP servers to enable by default.
 # ---- Skills ----
 # Skills are modular knowledge or capability packs the LLM can load and unload mid-conversation.
 # See the [Skills documentation](https://github.com/Dark-Alex-17/coyote/wiki/Skills) for more details.
-skills_enabled: true             # Master switch. Set to false to hide all skill management tools from the model.
+skills_enabled: true              # Master switch. Set to false to hide all skill management tools from the model.
-                                 # Skills also require `function_calling_support: true` above to work at all.
+                                  # Skills also require `function_calling_support: true` above to work at all.
-visible_skills:                  # The universe of skills allowed to be enabled in any context. Omit (null) for "all installed".
+visible_skills:                   # The universe of skills allowed to be enabled in any context. Omit (null) for "all installed".
  - ai-slop-remover
  - code-review
  - frontend-ui-ux
  - git-master
-enabled_skills: null             # Which skills are available by default (no role/agent/session active). null = all visible.
+enabled_skills: null              # Which skills are available by default (no role/agent/session active). null = all visible.
-                                 # Accepts either a YAML list or a comma-separated string.
+                                  # Accepts either a YAML list or a comma-separated string.
-                                 # Example (list form):
+                                  # Example (list form):
-                                 #   enabled_skills:
+                                  #   enabled_skills:
-                                 #     - git-master
+                                  #     - git-master
-                                 #     - ai-slop-remover
+                                  #     - ai-slop-remover
-                                 # Example (comma-separated form):
+                                  # Example (comma-separated form):
-                                 #   enabled_skills: git-master,ai-slop-remover
+                                  #   enabled_skills: git-master,ai-slop-remover
 inject_skill_instructions: true   # Inject a short hint pointing the model at `skill__list` when skills are enabled in
                                  # this context. Only injected if `function_calling_support`, `skills_enabled`, and the
                                  # effective enabled skill set is non-empty (default: true).
 skill_instructions: null          # Custom text used for the skill hint when injected. If null, uses built-in default.
 # ---- Auto-Continue (Todo System) ----
 # The auto-continue system provides built-in task tracking for improved reliability.
@@ -162,10 +167,6 @@ auto_continue: false              # Enable automatic continuation when incomplet
 max_auto_continues: 10            # Maximum number of automatic continuations before stopping (default: 10)
 inject_todo_instructions: true    # Inject default todo usage instructions into the system prompt (default: true)
 continuation_prompt: null         # Custom prompt used when auto-continuing. If null, uses built-in default
 inject_skill_instructions: true   # Inject a short hint pointing the model at `skill__list` when skills are enabled in
                                  # this context. Only injected if `function_calling_support`, `skills_enabled`, and the
                                  # effective enabled skill set is non-empty (default: true).
 skill_instructions: null          # Custom text used for the skill hint when injected. If null, uses built-in default.
 # ---- Session ----
 # See the [Session documentation](https://github.com/Dark-Alex-17/coyote/wiki/Sessions) for more information
@@ -176,6 +177,19 @@ summarization_prompt: >          # The text prompt used for creating a concise s
 summary_context_prompt: >        # The text prompt used for including the summary of the entire session as context to the model
  'This is a summary of the chat history as a recap: '
 # ---- Memory ----
 # See the [Memory documentation](https://github.com/Dark-Alex-17/coyote/wiki/Memory) for more information.
 # Memory is opt-in by workspace presence (a `COYOTE.md` or `.coyote/memory/MEMORY.md`)
 # and global presence (`<config_dir>/memory/MEMORY.md`). Set `memory: false` to disable
 # even when memory files exist. The cascade is: agent > session > role > app.
 # Bootstrap with `coyote --init-memory [global|workspace]` to create the marker file
 # the LLM needs before it will write any memory.
 memory: null                     # null = enabled when memory exists on disk; true = force on; false = force off
 memory_cap_with_tools: null      # Char cap for injected memory when function calling is available (default: 6000).
                                 # Only MEMORY.md indexes are injected; the LLM uses memory__read to fetch drill files.
 memory_cap_without_tools: null   # Char cap when function calling is unavailable (default: 12000).
                                 # Indexes plus drill file bodies are injected up to this cap.
 # ---- RAG ----
 # See the [RAG Docs](https://github.com/Dark-Alex-17/coyote/wiki/RAG) for more details.
 rag_embedding_model: null        # Specifies the embedding model used for context retrieval
@@ -19,6 +19,12 @@ skills_enabled: true                  # Master switch for skills in this role (d
 enabled_skills:                       # Skills available when this role is active. Accepts a YAML list (preferred)
  - git-master                        # or a comma-separated string (e.g. `enabled_skills: git-master,ai-slop-remover`).
  - ai-slop-remover                   # Must be a subset of global `visible_skills`. Omit to inherit the global default.
 inject_skill_instructions: true       # Inject a short hint pointing the model at `skill__list` when skills are enabled 
                                      # (default: true). Suppressed automatically when no skills are available.
 skill_instructions: null              # Custom text for the skill hint (optional; uses built-in default if null)
 memory: null                          # Per-role memory override (default: inherit). Set to `false` to disable memory
                                      # when this role is active. See the Memory wiki page.
 prompt: null                          # A custom prompt to use for this role that will immediately query
                                      # the model for output instead of using the instructions below
 # Auto-Continue (Todo System)
@@ -30,8 +36,5 @@ auto_continue: false                  # Enable automatic continuation when incom
 max_auto_continues: 10                # Maximum number of automatic continuations before stopping (default: 10)
 inject_todo_instructions: true        # Inject default todo tool usage instructions into the system prompt (default: true)
 continuation_prompt: null             # Custom prompt used when auto-continuing. If null, uses built-in default
 inject_skill_instructions: true       # Inject a short hint pointing the model at `skill__list` when skills are enabled 
                                      # (default: true). Suppressed automatically when no skills are available.
 skill_instructions: null              # Custom text for the skill hint (optional; uses built-in default if null)
 ---
 You are an expert at doing things. This is where you write the instructions for the role.
@@ -329,6 +329,14 @@
 #  - https://docs.anthropic.com/en/api/messages
 - provider: claude
  models:
    - name: claude-fable-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 10
      output_price: 50
      supports_function_calling: true
      supports_vision: true
    - name: claude-opus-4-8
      max_input_tokens: 1000000
      max_output_tokens: 128000
@@ -369,6 +377,14 @@
          thinking:
            type: enabled
            budget_tokens: 16000
    - name: claude-sonnet-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-sonnet-4-6
      max_input_tokens: 200000
      max_output_tokens: 8192
@@ -867,6 +883,14 @@
      max_input_tokens: 1048576
      supports_vision: true
      supports_function_calling: true
    - name: claude-fable-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 10
      output_price: 50
      supports_function_calling: true
      supports_vision: true
    - name: claude-opus-4-8
      max_input_tokens: 1000000
      max_output_tokens: 128000
@@ -906,6 +930,14 @@
          thinking:
            type: enabled
            budget_tokens: 16000
    - name: claude-sonnet-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-sonnet-4-6
      max_input_tokens: 200000
      max_output_tokens: 8192
@@ -1038,6 +1070,14 @@
 #  - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-call.html
 - provider: bedrock
  models:
    - name: us.anthropic.claude-fable-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 10
      output_price: 50
      supports_function_calling: true
      supports_vision: true
    - name: us.anthropic.claude-opus-4-8
      max_input_tokens: 1000000
      max_output_tokens: 128000
@@ -1079,6 +1119,14 @@
            thinking:
              type: enabled
              budget_tokens: 16000
    - name: us.anthropic.claude-sonnet-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: us.anthropic.claude-sonnet-4-6
      max_input_tokens: 200000
      max_output_tokens: 8192
@@ -1729,6 +1777,14 @@
      max_input_tokens: 131072
      input_price: 0.1
      output_price: 0.2
    - name: anthropic/claude-fable-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 10
      output_price: 50
      supports_function_calling: true
      supports_vision: true
    - name: anthropic/claude-opus-4-8
      max_input_tokens: 1000000
      max_output_tokens: 128000
@@ -1753,6 +1809,14 @@
      output_price: 25
      supports_vision: true
      supports_function_calling: true
    - name: anthropic/claude-sonnet-5
      max_input_tokens: 1000000
      max_output_tokens: 128000
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: anthropic/claude-sonnet-4.6
      max_input_tokens: 200000
      max_output_tokens: 8192
@@ -39,7 +39,7 @@ switch ([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture) {
 if (-not $BinDir) {
  if ($isWin) { $BinDir = Join-Path $env:LOCALAPPDATA 'coyote\bin' }
-  else { $home = $env:HOME; if (-not $home) { $home = (Get-Item -Path ~).FullName }; $BinDir = Join-Path $home '.local/bin' }
+  else { $userHome = $env:HOME; if (-not $userHome) { $userHome = (Get-Item -Path ~).FullName }; $BinDir = Join-Path $userHome '.local/bin' }
 }
 New-Item -ItemType Directory -Force -Path $BinDir | Out-Null
@@ -95,13 +95,13 @@ if ($asset.name -match '\.zip$') {
  [System.IO.Compression.ZipFile]::ExtractToDirectory($archive, $extractDir)
 } elseif ($asset.name -match '\.tar\.gz$' -or $asset.name -match '\.tgz$') {
  $tar = Get-Command tar -ErrorAction SilentlyContinue
-  if ($tar) { & $tar.FullName -xzf $archive -C $extractDir }
+  if ($tar) { & $tar.Source -xzf $archive -C $extractDir }
  else { Fail "Asset is tar archive but 'tar' is not available." }
 } else {
  try { Add-Type -AssemblyName System.IO.Compression.FileSystem; [System.IO.Compression.ZipFile]::ExtractToDirectory($archive, $extractDir) }
  catch {
    $tar = Get-Command tar -ErrorAction SilentlyContinue
-    if ($tar) { & $tar.FullName -xf $archive -C $extractDir } else { Fail "Unknown archive format; neither zip nor tar workable." }
+    if ($tar) { & $tar.Source -xf $archive -C $extractDir } else { Fail "Unknown archive format; neither zip nor tar workable." }
  }
 }
@@ -133,30 +133,21 @@ else
  echo "Error: unsupported OS for this installer: $OS" >&2; exit 1
 fi
 DL_URLS=$(grep -oE '"browser_download_url":[[:space:]]*"[^"]+"' "$JSON" \
  | sed -E 's/.*"browser_download_url":[[:space:]]*"//; s/"$//' \
  || true)
 ASSET_NAME=""; ASSET_URL=""
 for candidate in "${ASSET_CANDIDATES[@]}"; do
-  NAME=$(grep -oE '"name":\s*"[^"]+"' "$JSON" | sed 's/"name":\s*"//; s/"$//' | grep -Fx "$candidate" || true)
+  while IFS= read -r url; do
-  if [[ -n "$NAME" ]]; then
+    [[ -z "$url" ]] && continue
-    ASSET_NAME="$NAME"
+    if [[ "$url" == */"$candidate" ]]; then
-    ASSET_URL=$(awk -v pat="$NAME" '
+      ASSET_NAME="$candidate"
-      BEGIN{ FS=":"; want=0 }
+      ASSET_URL="$url"
-      /"name"/ {
+      break
-        line=$0;
+    fi
-        gsub(/^\s+|\s+$/,"",line);
+  done <<< "$DL_URLS"
-        gsub(/"name"\s*:\s*"|"/ ,"", line);
+  [[ -n "$ASSET_URL" ]] && break
        want = (line==pat) ? 1 : 0;
        next
      }
      want==1 && /"browser_download_url"/ {
        u=$0;
        gsub(/^\s+|\s+$/,"",u);
        gsub(/.*"browser_download_url"\s*:\s*"|".*/ ,"", u);
        print u;
        exit
      }
    ' "$JSON")
    if [[ -n "$ASSET_URL" ]]; then break; fi
  fi
 done
 if [[ -z "$ASSET_URL" ]]; then
@@ -5,9 +5,9 @@ use crate::utils::list_file_names;
 use crate::vault::Vault;
 use clap_complete::{CompletionCandidate, Shell, generate};
 use clap_complete_nushell::Nushell;
 use std::env;
 use std::ffi::OsStr;
 use std::io;
 use std::{env, fs};
 const COYOTE_CLI_NAME: &str = "coyote";
@@ -134,6 +134,34 @@ pub(super) fn session_completer(current: &OsStr) -> Vec<CompletionCandidate> {
        .collect()
 }
 pub(super) fn mcp_server_completer(current: &OsStr) -> Vec<CompletionCandidate> {
    let cur = current.to_string_lossy();
    let content = match fs::read_to_string(paths::mcp_config_file()) {
        Ok(c) => c,
        Err(_) => return vec![],
    };
    let json: serde_json::Value = match serde_json::from_str(&content) {
        Ok(v) => v,
        Err(_) => return vec![],
    };
    let servers = match json.get("mcpServers").and_then(|v| v.as_object()) {
        Some(s) => s,
        None => return vec![],
    };
    servers
        .iter()
        .filter(|(_, v)| {
            v.get("type")
                .and_then(|t| t.as_str())
                .map(|t| t == "http" || t == "sse")
                .unwrap_or(false)
        })
        .filter(|(k, _)| k.starts_with(&*cur))
        .map(|(k, _)| CompletionCandidate::new(k))
        .collect()
 }
 pub(super) fn secrets_completer(current: &OsStr) -> Vec<CompletionCandidate> {
    let cur = current.to_string_lossy();
    match load_app_config_for_completion() {
@@ -1,12 +1,12 @@
 mod completer;
 use crate::cli::completer::{
-    ShellCompletion, agent_completer, macro_completer, model_completer, rag_completer,
+    ShellCompletion, agent_completer, macro_completer, mcp_server_completer, model_completer,
-    role_completer, secrets_completer, session_completer,
+    rag_completer, role_completer, secrets_completer, session_completer,
 };
-use crate::config::{AssetCategory, InstallFilter};
+use crate::config::{AssetCategory, InstallFilter, MemoryScope};
 use anyhow::{Context, Result};
-use clap::ValueHint;
+use clap::{ArgGroup, ValueHint};
 use clap::{Parser, crate_authors, crate_description, crate_version};
 use clap_complete::ArgValueCompleter;
 use is_terminal::IsTerminal;
@@ -27,7 +27,20 @@ use std::io::{Read, stdin};
 {usage-heading} {usage}
 {all-args}{after-help}
-"
+",
 	group(
 		ArgGroup::new("sbx-mode")
 			.args(["sandbox", "fresh", "no_mixins"])
 			.multiple(true)
 			.conflicts_with_all([
 				"model", "prompt", "role", "session", "agent", "rag", "rebuild_rag",
 				"macro_name", "execute", "code", "file", "no_stream", "no_memory",
 				"init_memory", "dry_run", "info", "build_tools", "install",
 				"install_from", "sync_models", "list_models", "list_roles",
 				"list_sessions", "list_agents", "list_rags", "list_macros",
 				"list_skills", "skill", "tail_logs", "completions", "update",
 			])
 	),
 )]
 pub struct Cli {
    /// Select a LLM model
@@ -75,6 +88,12 @@ pub struct Cli {
    /// Turn off stream mode
    #[arg(short = 'S', long)]
    pub no_stream: bool,
    /// Disable memory for this invocation
    #[arg(long)]
    pub no_memory: bool,
    /// Bootstrap a memory marker so coyote begins loading memory next run
    #[arg(long, value_name = "SCOPE", value_enum)]
    pub init_memory: Option<MemoryScope>,
    /// Display the message without sending it
    #[arg(long)]
    pub dry_run: bool,
@@ -152,6 +171,9 @@ pub struct Cli {
    /// Authenticate with an LLM provider using OAuth (e.g., --authenticate client_name)
    #[arg(long, exclusive = true, value_name = "CLIENT_NAME")]
    pub authenticate: Option<Option<String>>,
    /// Authenticate with an OAuth-protected remote MCP server (e.g., --auth-mcp server_name)
    #[arg(long, exclusive = true, value_name = "SERVER_NAME", add = ArgValueCompleter::new(mcp_server_completer))]
    pub auth_mcp: Option<String>,
    /// Generate static shell completion scripts
    #[arg(long, value_name = "SHELL", value_enum)]
    pub completions: Option<ShellCompletion>,
@@ -161,6 +183,15 @@ pub struct Cli {
    /// With --update, update even if Coyote was installed via a package manager
    #[arg(long, requires = "update")]
    pub force: bool,
    /// Launch Coyote inside a Docker sandbox (via `sbx`); name defaults to current directory basename
    #[arg(long, value_name = "NAME")]
    pub sandbox: Option<Option<String>>,
    /// Create the sandbox without bootstrapping the host config or vault password file
    #[arg(long, requires = "sandbox")]
    pub fresh: bool,
    /// Skip discovery and application of all sbx mixins (user and built-in)
    #[arg(long, requires = "sandbox")]
    pub no_mixins: bool,
 }
 impl Cli {
@@ -489,4 +520,59 @@ mod tests {
    fn parse_force_without_update_fails() {
        assert!(Cli::try_parse_from(["coyote", "--force"]).is_err());
    }
    #[test]
    fn parse_sandbox_flag_no_value() {
        let cli = parse(&["--sandbox"]);
        assert_eq!(cli.sandbox, Some(None));
    }
    #[test]
    fn parse_sandbox_flag_with_name() {
        let cli = parse(&["--sandbox", "my-box"]);
        assert_eq!(cli.sandbox, Some(Some("my-box".to_string())));
    }
    #[test]
    fn parse_sandbox_is_exclusive() {
        assert!(Cli::try_parse_from(["coyote", "--sandbox", "--agent", "foo"]).is_err());
    }
    #[test]
    fn parse_fresh_flag_requires_sandbox() {
        assert!(Cli::try_parse_from(["coyote", "--fresh"]).is_err());
    }
    #[test]
    fn parse_fresh_flag_with_sandbox() {
        let cli = parse(&["--sandbox", "--fresh"]);
        assert_eq!(cli.sandbox, Some(None));
        assert!(cli.fresh);
    }
    #[test]
    fn parse_fresh_flag_with_named_sandbox() {
        let cli = parse(&["--sandbox", "foo", "--fresh"]);
        assert_eq!(cli.sandbox, Some(Some("foo".to_string())));
        assert!(cli.fresh);
    }
    #[test]
    fn parse_no_mixins_requires_sandbox() {
        assert!(Cli::try_parse_from(["coyote", "--no-mixins"]).is_err());
    }
    #[test]
    fn parse_no_mixins_with_sandbox() {
        let cli = parse(&["--sandbox", "--no-mixins"]);
        assert!(cli.no_mixins);
    }
    #[test]
    fn parse_sandbox_with_fresh_and_no_mixins() {
        let cli = parse(&["--sandbox", "foo", "--fresh", "--no-mixins"]);
        assert_eq!(cli.sandbox, Some(Some("foo".to_string())));
        assert!(cli.fresh);
        assert!(cli.no_mixins);
    }
 }
@@ -18,7 +18,7 @@ pub struct AzureOpenAIConfig {
 impl AzureOpenAIClient {
    config_get_fn!(api_base, get_api_base);
    config_get_fn!(api_key, get_api_key);
-    
+
    create_client_config!([
        (
            "api_base",
@@ -133,6 +133,13 @@ impl MessageContent {
        }
    }
    pub fn as_text(&self) -> Option<&str> {
        match self {
            MessageContent::Text(text) => Some(text),
            _ => None,
        }
    }
    pub fn merge_prompt(&mut self, replace_fn: impl Fn(&str) -> String) {
        match self {
            MessageContent::Text(text) => *text = replace_fn(text),
@@ -53,6 +53,10 @@ pub trait OAuthProvider: Send + Sync {
    fn extra_request_headers(&self) -> Vec<(&str, &str)> {
        vec![]
    }
    fn fixed_redirect_uri(&self) -> Option<String> {
        None
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -72,14 +76,16 @@ pub async fn run_oauth_flow(provider: &dyn OAuthProvider, client_name: &str) ->
    let state = Uuid::new_v4().to_string();
-    let redirect_uri = if provider.uses_localhost_redirect() {
+    let (redirect_uri, use_callback_listener) = if let Some(fixed) = provider.fixed_redirect_uri() {
        (fixed, true)
    } else if provider.uses_localhost_redirect() {
        let listener = TcpListener::bind("127.0.0.1:0")?;
        let port = listener.local_addr()?.port();
        let uri = format!("http://127.0.0.1:{port}/callback");
        drop(listener);
-        uri
+        (uri, true)
    } else {
-        provider.redirect_uri().to_string()
+        (provider.redirect_uri().to_string(), false)
    };
    let encoded_scopes = urlencoding::encode(provider.scopes());
@@ -112,7 +118,7 @@ pub async fn run_oauth_flow(provider: &dyn OAuthProvider, client_name: &str) ->
    let _ = open::that(&authorize_url);
-    let (code, returned_state) = if provider.uses_localhost_redirect() {
+    let (code, returned_state) = if use_callback_listener {
        listen_for_oauth_callback(&redirect_uri)?
    } else {
        let input = Text::new("Paste the authorization code:").prompt()?;
@@ -119,7 +119,11 @@ fn prepare_chat_completions(
            format!("{base_url}/google/models/{model_name}:{func}")
        }
        ModelCategory::Claude => {
-            format!("{base_url}/anthropic/models/{model_name}:streamRawPredict")
+            let func = match data.stream {
                true => "streamRawPredict",
                false => "rawPredict",
            };
            format!("{base_url}/anthropic/models/{model_name}:{func}")
        }
        ModelCategory::Mistral => {
            let func = match data.stream {
@@ -2,6 +2,7 @@ use super::*;
 use crate::{
    client::Model,
    config::memory,
    function::{Functions, run_llm_function},
 };
@@ -19,7 +20,7 @@ use fancy_regex::Captures;
 use inquire::{Text, validator::Validation};
 use rust_embed::Embed;
 use serde::{Deserialize, Serialize};
-use std::{ffi::OsStr, path::Path};
+use std::{env, ffi::OsStr, path::Path};
 const DEFAULT_AGENT_NAME: &str = "rag";
@@ -214,6 +215,20 @@ impl Agent {
            functions.append_skill_functions();
        }
        if app.function_calling_support
            && !matches!(agent_config.memory, Some(false))
            && !matches!(app.memory, Some(false))
        {
            let memory_exists = paths::global_memory_index_path().exists()
                || env::current_dir()
                    .ok()
                    .and_then(|cwd| memory::discover_workspace_memory(&cwd))
                    .is_some();
            if memory_exists {
                functions.append_memory_functions();
            }
        }
        agent_config.replace_tools_placeholder(&functions);
        Ok(Self {
@@ -352,6 +367,10 @@ impl Agent {
        self.config.enabled_skills.as_deref()
    }
    pub fn memory(&self) -> Option<bool> {
        self.config.memory
    }
    pub fn set_skills_enabled(&mut self, value: Option<bool>) {
        self.config.skills_enabled = value;
    }
@@ -638,6 +657,8 @@ pub struct AgentConfig {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub skill_instructions: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub memory: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub compression_threshold: Option<usize>,
    #[serde(default)]
    pub description: String,
@@ -64,6 +64,10 @@ pub struct AppConfig {
    pub summarization_prompt: Option<String>,
    pub summary_context_prompt: Option<String>,
    pub memory: Option<bool>,
    pub memory_cap_with_tools: Option<usize>,
    pub memory_cap_without_tools: Option<usize>,
    pub rag_embedding_model: Option<String>,
    pub rag_reranker_model: Option<String>,
    pub rag_top_k: usize,
@@ -132,6 +136,10 @@ impl Default for AppConfig {
            summarization_prompt: None,
            summary_context_prompt: None,
            memory: None,
            memory_cap_with_tools: None,
            memory_cap_without_tools: None,
            rag_embedding_model: None,
            rag_reranker_model: None,
            rag_top_k: 5,
@@ -201,6 +209,10 @@ impl AppConfig {
            summarization_prompt: config.summarization_prompt,
            summary_context_prompt: config.summary_context_prompt,
            memory: config.memory,
            memory_cap_with_tools: config.memory_cap_with_tools,
            memory_cap_without_tools: config.memory_cap_without_tools,
            rag_embedding_model: config.rag_embedding_model,
            rag_reranker_model: config.rag_reranker_model,
            rag_top_k: config.rag_top_k,
@@ -262,10 +274,25 @@ impl AppConfig {
    pub fn vault_password_file(&self) -> PathBuf {
        match &self.vault_password_file {
-            Some(path) => match path.exists() {
+            Some(path) => {
-                true => path.clone(),
+                if path.exists() {
-                false => gman::config::Config::local_provider_password_file(),
+                    return path.clone();
-            },
+                }
                if let Some(translated) = paths::translate_sandboxed_home_path(path)
                    && translated.exists()
                {
                    info!(
                        "vault_password_file '{}' not found; resolved to sandboxed path '{}'",
                        path.display(),
                        translated.display()
                    );
                    return translated;
                }
                gman::config::Config::local_provider_password_file()
            }
            None => gman::config::Config::local_provider_password_file(),
        }
    }
@@ -1,4 +1,6 @@
-use crate::mcp::{ConnectedServer, JsonField, McpServer, McpTransportType, spawn_mcp_server};
+use crate::mcp::{
    ConnectedServer, JsonField, McpServer, McpTransportType, oauth, spawn_mcp_server,
 };
 use anyhow::Result;
 use parking_lot::Mutex;
@@ -99,7 +101,12 @@ impl McpFactory {
            return Ok(existing);
        }
-        let handle = spawn_mcp_server(spec, log_path).await?;
+        let bearer_token = if spec.is_remote() {
            oauth::load_valid_mcp_token(name)
        } else {
            None
        };
        let handle = spawn_mcp_server(spec, log_path, bearer_token).await?;
        self.insert_active(key, &handle);
        Ok(handle)
    }
@@ -125,6 +132,7 @@ mod tests {
            cwd: None,
            url: None,
            headers: None,
            oauth_client_id: None,
        }
    }
@@ -141,6 +149,7 @@ mod tests {
            cwd: None,
            url: Some(url.to_string()),
            headers,
            oauth_client_id: None,
        }
    }
@@ -0,0 +1,742 @@
 use std::fs;
 use std::path::{Path, PathBuf};
 use anyhow::{Context, Result};
 use log::warn;
 use serde::{Deserialize, Serialize};
 use crate::config::{
    GIT_DIR_NAME, GITIGNORE_FILE_NAME, MEMORY_DIR_NAME, MEMORY_INDEX_FILE_NAME,
    WORKSPACE_MEMORY_DIR_NAME, WORKSPACE_MEMORY_FILE_NAME, paths,
 };
 pub const DEFAULT_MEMORY_CAP_WITH_TOOLS: usize = 6_000;
 pub const DEFAULT_MEMORY_CAP_WITHOUT_TOOLS: usize = 12_000;
 #[derive(Debug, Clone)]
 pub enum WorkspaceMemory {
    Structured {
        workspace_root: PathBuf,
        dir: PathBuf,
    },
    Lite {
        workspace_root: PathBuf,
        file: PathBuf,
    },
 }
 pub fn discover_workspace_memory(start: &Path) -> Option<WorkspaceMemory> {
    for dir in start.ancestors() {
        let structured = dir.join(WORKSPACE_MEMORY_DIR_NAME).join(MEMORY_DIR_NAME);
        if structured.join(MEMORY_INDEX_FILE_NAME).exists() {
            return Some(WorkspaceMemory::Structured {
                workspace_root: dir.to_path_buf(),
                dir: structured,
            });
        }
        let lite = dir.join(WORKSPACE_MEMORY_FILE_NAME);
        if lite.exists() {
            return Some(WorkspaceMemory::Lite {
                workspace_root: dir.to_path_buf(),
                file: lite,
            });
        }
    }
    None
 }
 pub fn find_git_root(start: &Path) -> Option<PathBuf> {
    for dir in start.ancestors() {
        if dir.join(GIT_DIR_NAME).exists() {
            return Some(dir.to_path_buf());
        }
    }
    None
 }
 pub fn bootstrap_workspace_memory(git_root: &Path) -> Result<PathBuf> {
    let mem_dir = paths::workspace_memory_dir_for(git_root);
    fs::create_dir_all(&mem_dir)
        .with_context(|| format!("create memory dir {}", mem_dir.display()))?;
    let index_path = mem_dir.join(MEMORY_INDEX_FILE_NAME);
    if !index_path.exists() {
        fs::write(&index_path, "# Workspace Memory Index\n\n")
            .with_context(|| format!("write {}", index_path.display()))?;
    }
    let gitignore_appended = append_gitignore_entry(git_root)?;
    let suffix = if gitignore_appended {
        " (appended .coyote/memory/ to .gitignore)"
    } else {
        ""
    };
    warn!(
        "auto-bootstrapped workspace memory at {}{}",
        mem_dir.display(),
        suffix
    );
    Ok(mem_dir)
 }
 fn append_gitignore_entry(git_root: &Path) -> Result<bool> {
    let gitignore = git_root.join(GITIGNORE_FILE_NAME);
    let entry = format!("{WORKSPACE_MEMORY_DIR_NAME}/{MEMORY_DIR_NAME}/");
    let entry_no_slash = format!("{WORKSPACE_MEMORY_DIR_NAME}/{MEMORY_DIR_NAME}");
    let existing = fs::read_to_string(&gitignore).unwrap_or_default();
    let already_present = existing.lines().any(|line| {
        let trimmed = line.trim();
        trimmed == entry || trimmed == entry_no_slash
    });
    if already_present {
        return Ok(false);
    }
    let new_content = if existing.is_empty() {
        format!("{entry}\n")
    } else if existing.ends_with('\n') {
        format!("{existing}{entry}\n")
    } else {
        format!("{existing}\n{entry}\n")
    };
    fs::write(&gitignore, new_content).with_context(|| format!("write {}", gitignore.display()))?;
    Ok(true)
 }
 #[derive(Debug, Clone, Deserialize, Serialize, Default)]
 pub struct MemoryFrontmatter {
    #[serde(default)]
    pub name: String,
    #[serde(default)]
    pub description: Option<String>,
    #[serde(default, rename = "type")]
    pub kind: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub created: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub updated: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub superseded_by: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub expires: Option<String>,
 }
 #[derive(Debug, Clone)]
 pub struct MemoryFile {
    pub path: PathBuf,
    pub frontmatter: MemoryFrontmatter,
    pub body: String,
 }
 impl MemoryFile {
    pub fn load(path: &Path) -> Result<Self> {
        let raw = fs::read_to_string(path)
            .with_context(|| format!("read memory file {}", path.display()))?;
        let (frontmatter, body) = parse_frontmatter(&raw)
            .with_context(|| format!("parse frontmatter in {}", path.display()))?;
        Ok(Self {
            path: path.to_path_buf(),
            frontmatter,
            body,
        })
    }
    pub fn save(&self) -> Result<()> {
        if let Some(parent) = self.path.parent() {
            fs::create_dir_all(parent)?;
        }
        let frontmatter_yaml = serde_yaml::to_string(&self.frontmatter)?;
        let content = format!("---\n{}---\n\n{}", frontmatter_yaml, self.body);
        fs::write(&self.path, content)?;
        Ok(())
    }
    pub fn char_len(&self) -> usize {
        self.body.chars().count()
    }
 }
 fn parse_frontmatter(raw: &str) -> Result<(MemoryFrontmatter, String)> {
    let trimmed = raw.trim_start();
    if !trimmed.starts_with("---") {
        return Ok((MemoryFrontmatter::default(), raw.to_string()));
    }
    let after = &trimmed[3..];
    let Some(end) = after.find("\n---") else {
        return Ok((MemoryFrontmatter::default(), raw.to_string()));
    };
    let yaml = &after[..end];
    let body = after[end + 4..].trim_start_matches('\n').to_string();
    let frontmatter: MemoryFrontmatter =
        serde_yaml::from_str(yaml.trim()).context("parse YAML frontmatter")?;
    Ok((frontmatter, body))
 }
 #[derive(Debug, Clone)]
 pub struct MemoryStore {
    pub global_dir: PathBuf,
    pub workspace: Option<WorkspaceMemory>,
 }
 impl MemoryStore {
    pub fn new(cwd: &Path) -> Self {
        Self {
            global_dir: paths::global_memory_dir(),
            workspace: discover_workspace_memory(cwd),
        }
    }
    pub fn load_global_index(&self) -> Result<Option<String>> {
        let path = self.global_dir.join(MEMORY_INDEX_FILE_NAME);
        if path.exists() {
            Ok(Some(fs::read_to_string(path)?))
        } else {
            Ok(None)
        }
    }
    pub fn load_workspace_index(&self) -> Result<Option<String>> {
        match &self.workspace {
            None => Ok(None),
            Some(WorkspaceMemory::Lite { file, .. }) => Ok(Some(fs::read_to_string(file)?)),
            Some(WorkspaceMemory::Structured { dir, .. }) => {
                let index = dir.join(MEMORY_INDEX_FILE_NAME);
                if index.exists() {
                    Ok(Some(fs::read_to_string(index)?))
                } else {
                    Ok(None)
                }
            }
        }
    }
    pub fn list_files(&self) -> Result<Vec<MemoryFile>> {
        let mut out = Vec::new();
        if self.global_dir.exists() {
            collect_md_files(&self.global_dir, &mut out)?;
        }
        if let Some(WorkspaceMemory::Structured { dir, .. }) = &self.workspace {
            collect_md_files(dir, &mut out)?;
        }
        Ok(out)
    }
 }
 pub fn build_memory_section(
    store: &MemoryStore,
    with_tools: bool,
    cap: usize,
 ) -> Result<Option<String>> {
    let global_index = store.load_global_index()?;
    let workspace_index = store.load_workspace_index()?;
    if global_index.is_none() && workspace_index.is_none() {
        return Ok(None);
    }
    let mut buf = String::from("<memory>\n");
    let mut consumed = 0usize;
    if let Some(s) = &global_index {
        buf.push_str("<global_index>\n");
        buf.push_str(s);
        buf.push_str("\n</global_index>\n");
        consumed += s.chars().count();
    }
    if let Some(s) = &workspace_index {
        buf.push_str("<workspace_index>\n");
        buf.push_str(s);
        buf.push_str("\n</workspace_index>\n");
        consumed += s.chars().count();
    }
    if consumed > cap {
        warn!(
            "memory indexes ({} chars) exceed cap ({} chars); injecting fully - \
             consider raising memory_cap_* in config or shrinking MEMORY.md",
            consumed, cap
        );
    }
    if !with_tools {
        let mut budget = cap.saturating_sub(consumed);
        let mut files = store.list_files()?;
        files.sort_by(|a, b| a.frontmatter.name.cmp(&b.frontmatter.name));
        let mut omitted = 0usize;
        for f in files {
            let needed = f.body.chars().count() + 50;
            if needed > budget {
                omitted += 1;
                continue;
            }
            buf.push_str(&format!("<file name=\"{}\">\n", f.frontmatter.name));
            buf.push_str(&f.body);
            buf.push_str("\n</file>\n");
            budget = budget.saturating_sub(needed);
        }
        if omitted > 0 {
            buf.push_str(&format!(
                "<!-- {} memory file(s) omitted; enable function calling for full access -->\n",
                omitted
            ));
        }
    }
    buf.push_str("</memory>");
    Ok(Some(buf))
 }
 fn collect_md_files(dir: &Path, out: &mut Vec<MemoryFile>) -> Result<()> {
    for entry in fs::read_dir(dir)? {
        let entry = entry?;
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("md") {
            continue;
        }
        if path.file_name().and_then(|n| n.to_str()) == Some(MEMORY_INDEX_FILE_NAME) {
            continue;
        }
        match MemoryFile::load(&path) {
            Ok(f) => out.push(f),
            Err(e) => warn!("skip malformed memory file {}: {}", path.display(), e),
        }
    }
    Ok(())
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::{env, time};
    use time::SystemTime;
    fn temp_root(label: &str) -> PathBuf {
        let unique = SystemTime::now()
            .duration_since(time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        let root = env::temp_dir().join(format!("coyote-memory-{label}-{unique}"));
        fs::create_dir_all(&root).unwrap();
        root
    }
    #[test]
    fn loads_global_and_workspace_indexes_from_test_dirs() {
        let root = temp_root("phase1");
        let workspace = root.join("workspace");
        let workspace_memory_dir = workspace
            .join(WORKSPACE_MEMORY_DIR_NAME)
            .join(MEMORY_DIR_NAME);
        fs::create_dir_all(&workspace_memory_dir).unwrap();
        fs::write(
            workspace_memory_dir.join(MEMORY_INDEX_FILE_NAME),
            "workspace-content",
        )
        .unwrap();
        let global = root.join("global");
        fs::create_dir_all(&global).unwrap();
        fs::write(global.join(MEMORY_INDEX_FILE_NAME), "global-content").unwrap();
        let store = MemoryStore {
            global_dir: global,
            workspace: discover_workspace_memory(&workspace),
        };
        assert_eq!(
            store.load_global_index().unwrap().as_deref(),
            Some("global-content")
        );
        assert_eq!(
            store.load_workspace_index().unwrap().as_deref(),
            Some("workspace-content")
        );
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn workspace_discovery_prefers_structured_over_lite() {
        let root = temp_root("prefer");
        let workspace = root.join("ws");
        let structured = workspace
            .join(WORKSPACE_MEMORY_DIR_NAME)
            .join(MEMORY_DIR_NAME);
        fs::create_dir_all(&structured).unwrap();
        fs::write(structured.join(MEMORY_INDEX_FILE_NAME), "s").unwrap();
        fs::write(workspace.join(WORKSPACE_MEMORY_FILE_NAME), "l").unwrap();
        let found = discover_workspace_memory(&workspace);
        assert!(matches!(found, Some(WorkspaceMemory::Structured { .. })));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn build_memory_section_returns_none_when_no_memory_exists() {
        let root = temp_root("none");
        let workspace = root.join("ws");
        fs::create_dir_all(&workspace).unwrap();
        let store = MemoryStore {
            global_dir: root.join("global"),
            workspace: discover_workspace_memory(&workspace),
        };
        assert!(build_memory_section(&store, true, 6_000).unwrap().is_none());
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn build_memory_section_injects_only_indexes_with_tools_on() {
        let root = temp_root("indexes_only");
        let workspace = root.join("ws");
        let structured = workspace
            .join(WORKSPACE_MEMORY_DIR_NAME)
            .join(MEMORY_DIR_NAME);
        fs::create_dir_all(&structured).unwrap();
        fs::write(
            structured.join(MEMORY_INDEX_FILE_NAME),
            "workspace-index-content",
        )
        .unwrap();
        fs::write(
            structured.join("foo.md"),
            "---\nname: foo\n---\nfoo body that should not appear\n",
        )
        .unwrap();
        let store = MemoryStore {
            global_dir: root.join("global"),
            workspace: discover_workspace_memory(&workspace),
        };
        let section = build_memory_section(&store, true, 6_000)
            .unwrap()
            .expect("memory section should exist");
        assert!(section.contains("workspace-index-content"));
        assert!(!section.contains("foo body that should not appear"));
        assert!(section.starts_with("<memory>"));
        assert!(section.ends_with("</memory>"));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn build_memory_section_injects_drill_bodies_alphabetically_without_tools() {
        let root = temp_root("drill_bodies");
        let workspace = root.join("ws");
        let structured = workspace
            .join(WORKSPACE_MEMORY_DIR_NAME)
            .join(MEMORY_DIR_NAME);
        fs::create_dir_all(&structured).unwrap();
        fs::write(structured.join(MEMORY_INDEX_FILE_NAME), "idx").unwrap();
        fs::write(
            structured.join("zebra.md"),
            "---\nname: zebra\n---\nzebra body\n",
        )
        .unwrap();
        fs::write(
            structured.join("alpha.md"),
            "---\nname: alpha\n---\nalpha body\n",
        )
        .unwrap();
        let store = MemoryStore {
            global_dir: root.join("global"),
            workspace: discover_workspace_memory(&workspace),
        };
        let section = build_memory_section(&store, false, 6_000)
            .unwrap()
            .expect("memory section should exist");
        let alpha_pos = section.find("alpha body").expect("alpha body missing");
        let zebra_pos = section.find("zebra body").expect("zebra body missing");
        assert!(alpha_pos < zebra_pos, "drill bodies must be alphabetical");
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn build_memory_section_omits_drill_bodies_when_cap_exceeded() {
        let root = temp_root("cap");
        let workspace = root.join("ws");
        let structured = workspace
            .join(WORKSPACE_MEMORY_DIR_NAME)
            .join(MEMORY_DIR_NAME);
        fs::create_dir_all(&structured).unwrap();
        fs::write(structured.join(MEMORY_INDEX_FILE_NAME), "idx").unwrap();
        let big_body = "x".repeat(200);
        fs::write(
            structured.join("big.md"),
            format!("---\nname: big\n---\n{}\n", big_body),
        )
        .unwrap();
        let store = MemoryStore {
            global_dir: root.join("global"),
            workspace: discover_workspace_memory(&workspace),
        };
        let section = build_memory_section(&store, false, 100)
            .unwrap()
            .expect("memory section should exist");
        assert!(!section.contains(&big_body));
        assert!(section.contains("memory file(s) omitted"));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn parse_frontmatter_extracts_yaml() {
        let raw = "---\nname: foo\ndescription: a thing\ntype: user\n---\nBody text\n";
        let (fm, body) = parse_frontmatter(raw).unwrap();
        assert_eq!(fm.name, "foo");
        assert_eq!(fm.description.as_deref(), Some("a thing"));
        assert_eq!(fm.kind.as_deref(), Some("user"));
        assert_eq!(body, "Body text\n");
    }
    #[test]
    fn parse_frontmatter_handles_missing_block() {
        let raw = "# Just markdown, no frontmatter\nbody";
        let (fm, body) = parse_frontmatter(raw).unwrap();
        assert_eq!(fm.name, "");
        assert!(fm.kind.is_none());
        assert_eq!(body, raw);
    }
    #[test]
    fn parse_frontmatter_handles_unterminated_block() {
        let raw = "---\nname: oops\nno closing delimiter\n# rest of doc";
        let (fm, body) = parse_frontmatter(raw).unwrap();
        assert_eq!(fm.name, "");
        assert_eq!(body, raw);
    }
    #[test]
    fn memory_file_save_and_load_roundtrip() {
        let root = temp_root("roundtrip");
        let path = root.join("test.md");
        let file = MemoryFile {
            path: path.clone(),
            frontmatter: MemoryFrontmatter {
                name: "test".into(),
                description: Some("a test".into()),
                kind: Some("user".into()),
                ..Default::default()
            },
            body: "Hello world\nmore text".into(),
        };
        file.save().unwrap();
        let loaded = MemoryFile::load(&path).unwrap();
        assert_eq!(loaded.frontmatter.name, "test");
        assert_eq!(loaded.frontmatter.description.as_deref(), Some("a test"));
        assert_eq!(loaded.frontmatter.kind.as_deref(), Some("user"));
        assert_eq!(loaded.body, "Hello world\nmore text");
        let raw = fs::read_to_string(&path).unwrap();
        assert!(raw.contains("type: user"), "kind must serialize as 'type:'");
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn discover_walks_up_from_nested_dir() {
        let root = temp_root("walk_up");
        let workspace = root.join("ws");
        let mem_dir = workspace
            .join(WORKSPACE_MEMORY_DIR_NAME)
            .join(MEMORY_DIR_NAME);
        fs::create_dir_all(&mem_dir).unwrap();
        fs::write(mem_dir.join(MEMORY_INDEX_FILE_NAME), "idx").unwrap();
        let nested = workspace.join("src").join("deep").join("path");
        fs::create_dir_all(&nested).unwrap();
        let found = discover_workspace_memory(&nested);
        assert!(matches!(found, Some(WorkspaceMemory::Structured { .. })));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn find_git_root_returns_dir_containing_git_dir() {
        let root = temp_root("git_root");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        assert_eq!(find_git_root(&repo), Some(repo.clone()));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn find_git_root_walks_up_from_nested_dir() {
        let root = temp_root("git_root_walk");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        let nested = repo.join("a").join("b").join("c");
        fs::create_dir_all(&nested).unwrap();
        assert_eq!(find_git_root(&nested), Some(repo));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn find_git_root_treats_git_file_as_repo_marker() {
        let root = temp_root("git_root_worktree");
        let worktree = root.join("worktree");
        fs::create_dir_all(&worktree).unwrap();
        fs::write(
            worktree.join(GIT_DIR_NAME),
            "gitdir: /elsewhere/.git/worktrees/wt\n",
        )
        .unwrap();
        assert_eq!(find_git_root(&worktree), Some(worktree));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn find_git_root_returns_none_when_no_git() {
        let root = temp_root("git_root_missing");
        let bare = root.join("bare");
        fs::create_dir_all(&bare).unwrap();
        assert_eq!(find_git_root(&bare), None);
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn bootstrap_creates_structured_layout_and_index() {
        let root = temp_root("bootstrap_layout");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        let mem_dir = bootstrap_workspace_memory(&repo).unwrap();
        assert_eq!(mem_dir, paths::workspace_memory_dir_for(&repo));
        assert!(mem_dir.is_dir());
        let index = mem_dir.join(MEMORY_INDEX_FILE_NAME);
        assert!(index.exists());
        let body = fs::read_to_string(&index).unwrap();
        assert!(body.starts_with("# Workspace Memory Index"));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn bootstrap_creates_gitignore_when_absent() {
        let root = temp_root("bootstrap_gi_new");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        bootstrap_workspace_memory(&repo).unwrap();
        let gi = repo.join(GITIGNORE_FILE_NAME);
        assert!(gi.exists());
        let body = fs::read_to_string(&gi).unwrap();
        assert!(body.contains(".coyote/memory/"));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn bootstrap_appends_to_existing_gitignore_without_trailing_newline() {
        let root = temp_root("bootstrap_gi_append");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        fs::write(repo.join(GITIGNORE_FILE_NAME), "target/").unwrap();
        bootstrap_workspace_memory(&repo).unwrap();
        let body = fs::read_to_string(repo.join(GITIGNORE_FILE_NAME)).unwrap();
        assert!(body.contains("target/"));
        assert!(body.contains(".coyote/memory/"));
        assert!(body.ends_with('\n'));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn bootstrap_is_idempotent_on_gitignore_entry() {
        let root = temp_root("bootstrap_gi_idempotent");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        let original = "target/\n.coyote/memory/\n";
        fs::write(repo.join(GITIGNORE_FILE_NAME), original).unwrap();
        bootstrap_workspace_memory(&repo).unwrap();
        let body = fs::read_to_string(repo.join(GITIGNORE_FILE_NAME)).unwrap();
        assert_eq!(body, original, "gitignore must be untouched");
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn bootstrap_treats_entry_without_trailing_slash_as_present() {
        let root = temp_root("bootstrap_gi_no_slash");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        let original = ".coyote/memory\n";
        fs::write(repo.join(GITIGNORE_FILE_NAME), original).unwrap();
        bootstrap_workspace_memory(&repo).unwrap();
        let body = fs::read_to_string(repo.join(GITIGNORE_FILE_NAME)).unwrap();
        assert_eq!(body, original);
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn bootstrap_does_not_clobber_existing_index() {
        let root = temp_root("bootstrap_existing_index");
        let repo = root.join("repo");
        fs::create_dir_all(repo.join(GIT_DIR_NAME)).unwrap();
        let mem_dir = paths::workspace_memory_dir_for(&repo);
        fs::create_dir_all(&mem_dir).unwrap();
        let preserved = "# Custom Index\n\n- [[foo]]: keep me\n";
        fs::write(mem_dir.join(MEMORY_INDEX_FILE_NAME), preserved).unwrap();
        bootstrap_workspace_memory(&repo).unwrap();
        let body = fs::read_to_string(mem_dir.join(MEMORY_INDEX_FILE_NAME)).unwrap();
        assert_eq!(body, preserved);
        let _ = fs::remove_dir_all(&root);
    }
 }
@@ -5,6 +5,7 @@ mod input;
 mod install_remote;
 mod macros;
 mod mcp_factory;
 pub(crate) mod memory;
 pub(crate) mod paths;
 pub(crate) mod prompts;
 mod rag_cache;
@@ -134,11 +135,23 @@ const RAGS_DIR_NAME: &str = "rags";
 const FUNCTIONS_DIR_NAME: &str = "functions";
 const FUNCTIONS_BIN_DIR_NAME: &str = "bin";
 const AGENTS_DIR_NAME: &str = "agents";
 const REPL_HISTORY_DIR_NAME: &str = "repl-history";
 const GLOBAL_TOOLS_DIR_NAME: &str = "tools";
 const GLOBAL_TOOLS_UTILS_DIR_NAME: &str = "utils";
 const BASH_PROMPT_UTILS_FILE_NAME: &str = "prompt-utils.sh";
 const MCP_FILE_NAME: &str = "mcp.json";
-const DEFAULT_VISIBLE_TOOLS: [&str; 18] = [
+const MEMORY_DIR_NAME: &str = "memory";
 const MEMORY_INDEX_FILE_NAME: &str = "MEMORY.md";
 const WORKSPACE_MEMORY_FILE_NAME: &str = "COYOTE.md";
 const WORKSPACE_MEMORY_DIR_NAME: &str = ".coyote";
 const SBX_KIT_DIR_NAME: &str = "sbx-kit";
 const SBX_KIT_HASH_FILE: &str = "kit.sha256";
 const SBX_MIXIN_FILE_NAME: &str = "sbx-mixin.yaml";
 const SBX_VAULT_MIXINS_DIR_NAME: &str = "sbx-vault-mixins";
 const SBX_MIXIN_KITS_DIR_NAME: &str = "sbx-mixin-kits";
 const GIT_DIR_NAME: &str = ".git";
 const GITIGNORE_FILE_NAME: &str = ".gitignore";
 const DEFAULT_VISIBLE_TOOLS: [&str; 19] = [
    "execute_command.sh",
    "execute_py_code.py",
    "execute_sql_code.sh",
@@ -152,6 +165,7 @@ const DEFAULT_VISIBLE_TOOLS: [&str; 18] = [
    "fs_read.sh",
    "fs_rm.sh",
    "fs_write.sh",
    "ast_grep.sh",
    "get_current_time.sh",
    "get_current_weather.sh",
    "search_wikipedia.sh",
@@ -226,6 +240,10 @@ pub struct Config {
    pub summarization_prompt: Option<String>,
    pub summary_context_prompt: Option<String>,
    pub memory: Option<bool>,
    pub memory_cap_with_tools: Option<usize>,
    pub memory_cap_without_tools: Option<usize>,
    pub rag_embedding_model: Option<String>,
    pub rag_reranker_model: Option<String>,
    pub rag_top_k: usize,
@@ -294,6 +312,10 @@ impl Default for Config {
            summarization_prompt: None,
            summary_context_prompt: None,
            memory: None,
            memory_cap_with_tools: None,
            memory_cap_without_tools: None,
            rag_embedding_model: None,
            rag_reranker_model: None,
            rag_top_k: 5,
@@ -350,6 +372,12 @@ impl AssetCategory {
    }
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
 pub enum MemoryScope {
    Global,
    Workspace,
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
 pub enum InstallFilter {
    Agents,
@@ -646,6 +674,9 @@ bitflags::bitflags! {
        const SESSION = 1 << 2;
        const RAG = 1 << 3;
        const AGENT = 1 << 4;
        const FUNCTION_CALLING = 1 << 5;
        const AUTO_CONTINUE = 1 << 6;
        const SKILLS_ENABLED = 1 << 7;
    }
 }
@@ -2,10 +2,14 @@ use super::role::Role;
 use super::{
    AGENT_GRAPH_FILE_NAME, AGENTS_DIR_NAME, BASH_PROMPT_UTILS_FILE_NAME, CONFIG_FILE_NAME,
    ENV_FILE_NAME, FUNCTIONS_BIN_DIR_NAME, FUNCTIONS_DIR_NAME, GLOBAL_TOOLS_DIR_NAME,
-    GLOBAL_TOOLS_UTILS_DIR_NAME, MACROS_DIR_NAME, MCP_FILE_NAME, ModelsOverride, RAGS_DIR_NAME,
+    GLOBAL_TOOLS_UTILS_DIR_NAME, MACROS_DIR_NAME, MCP_FILE_NAME, MEMORY_DIR_NAME,
-    ROLES_DIR_NAME, SKILLS_DIR_NAME,
+    MEMORY_INDEX_FILE_NAME, ModelsOverride, RAGS_DIR_NAME, ROLES_DIR_NAME, SBX_KIT_DIR_NAME,
    SBX_KIT_HASH_FILE, SBX_MIXIN_FILE_NAME, SBX_MIXIN_KITS_DIR_NAME, SBX_VAULT_MIXINS_DIR_NAME,
    SKILLS_DIR_NAME, WORKSPACE_MEMORY_DIR_NAME,
 };
 use crate::client::ProviderModels;
 use crate::config::REPL_HISTORY_DIR_NAME;
 use crate::config::session::Session;
 use crate::utils::{get_env_name, list_file_names, normalize_env_name};
 use anyhow::{Context, Result, anyhow, bail};
@@ -13,7 +17,7 @@ use log::LevelFilter;
 use std::collections::HashSet;
 use std::env;
 use std::fs::{read_dir, read_to_string};
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 pub fn config_dir() -> PathBuf {
    if let Ok(v) = env::var(get_env_name("config_dir")) {
@@ -31,8 +35,97 @@ pub fn local_path(name: &str) -> PathBuf {
 }
 pub fn cache_path() -> PathBuf {
-    let base_dir = dirs::cache_dir().unwrap_or_else(env::temp_dir);
+    if let Ok(v) = env::var(get_env_name("cache_dir")) {
-    base_dir.join(env!("CARGO_CRATE_NAME"))
+        PathBuf::from(v)
    } else if let Ok(v) = env::var("XDG_CACHE_HOME") {
        PathBuf::from(v).join(env!("CARGO_CRATE_NAME"))
    } else {
        let base_dir = dirs::cache_dir().unwrap_or_else(env::temp_dir);
        base_dir.join(env!("CARGO_CRATE_NAME"))
    }
 }
 pub fn sandbox_kit_override() -> Option<PathBuf> {
    env::var_os(get_env_name("sandbox_kit")).map(PathBuf::from)
 }
 pub fn translate_sandboxed_home_path(path: &Path) -> Option<PathBuf> {
    env::var_os("IS_SANDBOX")?;
    let s = path.to_str()?;
    if let Some(translated) = translate_unix_home_style(s, "/home/") {
        return Some(translated);
    }
    if let Some(translated) = translate_unix_home_style(s, "/Users/") {
        return Some(translated);
    }
    translate_windows_users_path(s)
 }
 fn translate_unix_home_style(s: &str, prefix: &str) -> Option<PathBuf> {
    let rest = s.strip_prefix(prefix)?;
    let (user, tail) = match rest.split_once('/') {
        Some((u, t)) => (u, t),
        None => (rest, ""),
    };
    if user.is_empty() || user == "agent" {
        return None;
    }
    Some(if tail.is_empty() {
        PathBuf::from("/home/agent")
    } else {
        PathBuf::from(format!("/home/agent/{tail}"))
    })
 }
 fn translate_windows_users_path(s: &str) -> Option<PathBuf> {
    let bytes = s.as_bytes();
    if bytes.len() < 4 || !bytes[0].is_ascii_alphabetic() || bytes[1] != b':' || bytes[2] != b'\\' {
        return None;
    }
    let after_drive = &s[3..];
    let rest = after_drive.strip_prefix("Users\\")?;
    let (user, tail) = match rest.split_once('\\') {
        Some((u, t)) => (u, t.replace('\\', "/")),
        None => (rest, String::new()),
    };
    if user.is_empty() || user == "agent" {
        return None;
    }
    Some(if tail.is_empty() {
        PathBuf::from("/home/agent")
    } else {
        PathBuf::from(format!("/home/agent/{tail}"))
    })
 }
 pub fn sbx_mixin_file() -> PathBuf {
    config_dir().join(SBX_MIXIN_FILE_NAME)
 }
 pub fn global_tools_sbx_mixin_file() -> PathBuf {
    functions_dir().join(SBX_MIXIN_FILE_NAME)
 }
 pub fn find_workspace_sbx_mixin(start: &Path) -> Option<PathBuf> {
    for dir in start.ancestors() {
        let candidate = dir
            .join(WORKSPACE_MEMORY_DIR_NAME)
            .join(SBX_MIXIN_FILE_NAME);
        if candidate.exists() {
            return Some(candidate);
        }
    }
    None
 }
 pub fn oauth_tokens_path() -> PathBuf {
@@ -47,6 +140,26 @@ pub fn log_path() -> PathBuf {
    cache_path().join(format!("{}.log", env!("CARGO_CRATE_NAME")))
 }
 pub fn sbx_kit_dir() -> PathBuf {
    cache_path().join(SBX_KIT_DIR_NAME)
 }
 pub fn sbx_kit_hash_file() -> PathBuf {
    sbx_kit_dir().join(SBX_KIT_HASH_FILE)
 }
 pub fn sbx_vault_mixins_dir() -> PathBuf {
    cache_path().join(SBX_VAULT_MIXINS_DIR_NAME)
 }
 pub fn sbx_vault_mixins_hash_file() -> PathBuf {
    sbx_vault_mixins_dir().join(SBX_KIT_HASH_FILE)
 }
 pub fn sbx_mixin_kits_dir() -> PathBuf {
    cache_path().join(SBX_MIXIN_KITS_DIR_NAME)
 }
 pub fn config_file() -> PathBuf {
    match env::var(get_env_name("config_file")) {
        Ok(value) => PathBuf::from(value),
@@ -195,6 +308,34 @@ pub fn models_override_file() -> PathBuf {
    local_path("models-override.yaml")
 }
 pub fn global_memory_dir() -> PathBuf {
    config_dir().join(MEMORY_DIR_NAME)
 }
 pub fn global_memory_index_path() -> PathBuf {
    global_memory_dir().join(MEMORY_INDEX_FILE_NAME)
 }
 pub fn workspace_memory_dir_for(workspace_root: &Path) -> PathBuf {
    workspace_root
        .join(WORKSPACE_MEMORY_DIR_NAME)
        .join(MEMORY_DIR_NAME)
 }
 pub fn repl_history_dir() -> PathBuf {
    cache_path().join(REPL_HISTORY_DIR_NAME)
 }
 pub fn repl_history_file(session: &Option<Session>) -> PathBuf {
    let history_key = if let Some(session) = &session {
        format!("session_{}", session.name().replace('/', "_"))
    } else {
        "default".to_string()
    };
    repl_history_dir().join(history_key)
 }
 pub fn log_config() -> Result<(LevelFilter, Option<PathBuf>)> {
    let log_level = env::var(get_env_name("log_level"))
        .ok()
@@ -350,6 +491,197 @@ mod tests {
        }
    }
    mod sandbox_home_translation {
        use super::*;
        use serial_test::serial;
        fn with_sandbox<F: FnOnce()>(f: F) {
            let prev = env::var_os("IS_SANDBOX");
            unsafe {
                env::set_var("IS_SANDBOX", "1");
            }
            f();
            unsafe {
                match prev {
                    Some(v) => env::set_var("IS_SANDBOX", v),
                    None => env::remove_var("IS_SANDBOX"),
                }
            }
        }
        fn without_sandbox<F: FnOnce()>(f: F) {
            let prev = env::var_os("IS_SANDBOX");
            unsafe {
                env::remove_var("IS_SANDBOX");
            }
            f();
            unsafe {
                if let Some(v) = prev {
                    env::set_var("IS_SANDBOX", v);
                }
            }
        }
        #[test]
        #[serial]
        fn returns_none_when_not_in_sandbox() {
            without_sandbox(|| {
                let p = Path::new("/home/atusa/.coyote_password");
                assert_eq!(translate_sandboxed_home_path(p), None);
            });
        }
        #[test]
        #[serial]
        fn translates_host_home_to_agent_home() {
            with_sandbox(|| {
                let p = Path::new("/home/atusa/.coyote_password");
                assert_eq!(
                    translate_sandboxed_home_path(p),
                    Some(PathBuf::from("/home/agent/.coyote_password"))
                );
            });
        }
        #[test]
        #[serial]
        fn translates_nested_host_home_path() {
            with_sandbox(|| {
                let p = Path::new("/home/atusa/.config/coyote/.password");
                assert_eq!(
                    translate_sandboxed_home_path(p),
                    Some(PathBuf::from("/home/agent/.config/coyote/.password"))
                );
            });
        }
        #[test]
        #[serial]
        fn returns_none_when_path_already_targets_agent_home() {
            with_sandbox(|| {
                let p = Path::new("/home/agent/.coyote_password");
                assert_eq!(translate_sandboxed_home_path(p), None);
            });
        }
        #[test]
        #[serial]
        fn returns_none_when_path_is_outside_home() {
            with_sandbox(|| {
                let p = Path::new("/etc/coyote/.coyote_password");
                assert_eq!(translate_sandboxed_home_path(p), None);
            });
        }
        #[test]
        #[serial]
        fn returns_none_for_relative_path() {
            with_sandbox(|| {
                let p = Path::new(".coyote_password");
                assert_eq!(translate_sandboxed_home_path(p), None);
            });
        }
        #[test]
        #[serial]
        fn returns_none_for_first_segment_not_home() {
            with_sandbox(|| {
                let p = Path::new("/opt/atusa/.coyote_password");
                assert_eq!(translate_sandboxed_home_path(p), None);
            });
        }
        #[test]
        #[serial]
        fn translates_macos_users_path() {
            with_sandbox(|| {
                let p = Path::new("/Users/atusa/.coyote_password");
                assert_eq!(
                    translate_sandboxed_home_path(p),
                    Some(PathBuf::from("/home/agent/.coyote_password"))
                );
            });
        }
        #[test]
        #[serial]
        fn translates_macos_nested_path() {
            with_sandbox(|| {
                let p = Path::new("/Users/atusa/.config/coyote/.password");
                assert_eq!(
                    translate_sandboxed_home_path(p),
                    Some(PathBuf::from("/home/agent/.config/coyote/.password"))
                );
            });
        }
        #[test]
        #[serial]
        fn returns_none_when_macos_path_already_targets_agent() {
            with_sandbox(|| {
                let p = Path::new("/Users/agent/.coyote_password");
                assert_eq!(translate_sandboxed_home_path(p), None);
            });
        }
        #[test]
        #[serial]
        fn translates_windows_drive_letter_path() {
            with_sandbox(|| {
                let p = Path::new("C:\\Users\\atusa\\.coyote_password");
                assert_eq!(
                    translate_sandboxed_home_path(p),
                    Some(PathBuf::from("/home/agent/.coyote_password"))
                );
            });
        }
        #[test]
        #[serial]
        fn translates_windows_nested_path() {
            with_sandbox(|| {
                let p = Path::new("D:\\Users\\atusa\\.config\\coyote\\.password");
                assert_eq!(
                    translate_sandboxed_home_path(p),
                    Some(PathBuf::from("/home/agent/.config/coyote/.password"))
                );
            });
        }
        #[test]
        #[serial]
        fn returns_none_when_windows_path_already_targets_agent() {
            with_sandbox(|| {
                let p = Path::new("C:\\Users\\agent\\.coyote_password");
                assert_eq!(translate_sandboxed_home_path(p), None);
            });
        }
    }
    #[test]
    fn sandbox_kit_override_reflects_env_var_state() {
        let env_name = get_env_name("sandbox_kit");
        let prev = env::var_os(&env_name);
        unsafe {
            env::remove_var(&env_name);
        }
        assert_eq!(sandbox_kit_override(), None);
        let probe = PathBuf::from("/tmp/coyote-sandbox-kit-probe");
        unsafe {
            env::set_var(&env_name, &probe);
        }
        assert_eq!(sandbox_kit_override(), Some(probe));
        unsafe {
            match prev {
                Some(v) => env::set_var(&env_name, v),
                None => env::remove_var(&env_name),
            }
        }
    }
    #[test]
    fn list_skills_skips_invalid_directory_names() {
        let unique = time::SystemTime::now()
@@ -8,6 +8,53 @@ pub(crate) const DEFAULT_SKILL_INSTRUCTIONS: &str = indoc! {"
    complete to keep the context lean."
 };
 pub(crate) const DEFAULT_MEMORY_INSTRUCTIONS: &str = indoc! {"
    ## Memory
    A persistent memory file system survives across sessions. The MEMORY.md content shown above is
    your always-on context (universal facts, hard rules, binding feedback). Drill files hold deeper,
    on-demand context that you fetch with `memory__read`.
    Tools:
        - `memory__read(name)`: Read a specific drill file's full content.
        - `memory__write(name, content, scope)`: Create or replace a drill file (scope: 'global' | 'workspace').
          The MEMORY.md index is appended automatically; do not also update the index by hand.
          Optional `superseded_by` / `expires` (YYYY-MM-DD) mark a memory as stale for later cleanup.
        - `memory__rename(name, new_name, scope)`: Rename a drill file. Its index entry and every
          [[wikilink]] to it are rewritten automatically.
        - `memory__delete(name, scope)`: Delete a drill file and its index entry. Reports any
          [[wikilinks]] left dangling in other files.
        - `memory__edit_index(scope, content)`: Replace the entire MEMORY.md at the given scope.
          Use this to add always-on facts, reorganize, prune stale entries, or fix descriptions.
        - `memory__list()`: See all known drill files and their metadata.
        - `memory__lint()`: Health-check memory for orphans, broken links, oversized files,
          stale (superseded/expired) files, and index descriptions that drifted from the files.
    RULES:
        - Every interaction has two outputs: your answer AND any memory updates the conversation warrants.
          Don't let learnings evaporate into chat history.
        - All MEMORY.md edits MUST go through `memory__edit_index`. NEVER use `fs_write`, `fs_patch`,
          or any other generic file tool on MEMORY.md — Coyote manages its location and a stray
          MEMORY.md outside the managed path is invisible to memory.
        - All drill files MUST go through `memory__write`. The index updates itself. Renames and
          deletions MUST go through `memory__rename` / `memory__delete` so links stay intact.
        - When a fact becomes outdated, update it in place, delete it, or mark the old file with
          `superseded_by`/`expires` so `memory__lint` flags it later. Never leave contradictory
          memories side by side.
        - Use [[wikilink]] notation in memory files to reference other memories by their `name:` slug.
        - NEVER write secrets, credentials, or API keys to memory — memory is plaintext on disk.
          Use coyote's Vault for secrets.
        - Keep individual drill files focused (under ~2K chars). Split large topics across linked files."
 };
 pub(crate) const DEFAULT_MEMORY_INSTRUCTIONS_READONLY: &str = indoc! {"
    ## Memory (read-only)
    The memory content shown above persists across sessions. In this session it is READ-ONLY — the user
    maintains memory files manually outside the conversation.
    Reference the memory content as authoritative context about the user and their workspace.
    Do not propose writing to memory or call any `memory__*` tools — they are unavailable."
 };
 pub(in crate::config) const DEFAULT_TODO_INSTRUCTIONS: &str = indoc! {"
    ## Task Tracking
    You have built-in task tracking tools. Use them to track your progress:
@@ -62,6 +109,36 @@ pub(in crate::config) const DEFAULT_SPAWN_INSTRUCTIONS: &str = indoc! {"
    agent__collect --id agent_explore_e5f6g7h8
    ```
    ### CRITICAL: Never end your turn with pending agents
    Spawned agents do NOT report back on their own. They run in the background until you
    actively reclaim them with `agent__collect` (to get their output) or `agent__cancel`
    (to discard them). If you spawn agents and then emit a final message without reclaiming
    them, the system will detect the unreclaimed agents and reject the turn-end, injecting
    a reminder forcing you to handle them. After several such reminders, the system will
    auto-cancel them and warn you that work was lost.
    The correct flow when you have nothing else to do:
    ```
    # WRONG - do NOT do this:
    agent__spawn --agent explore --prompt \"...\"
    agent__spawn --agent explore --prompt \"...\"
    # ... emit text like \"I will synthesize once they report back.\" and stop
    # ^ The agents will be abandoned. Their output will be lost.
    # RIGHT - always do this:
    agent__spawn --agent explore --prompt \"...\"
    agent__spawn --agent explore --prompt \"...\"
    agent__collect --id <first_id>   # blocks until done
    agent__collect --id <second_id>  # blocks until done
    # ... NOW you can synthesize and end your turn
    ```
    `agent__collect` is a **blocking wait**: it pauses your execution until the agent
    completes, then returns the output as a tool result. Use it freely — it is the
    correct primitive for \"I'm done with my own work and just need the agents' results\".
    ### Parallel Spawning (DEFAULT for multi-agent work)
    When a task needs multiple agents, **spawn them all at once**, then collect:
@@ -9,7 +9,8 @@ use super::{
    AGENTS_DIR_NAME, Agent, AgentVariables, AppConfig, AppState, AssetCategory, CREATE_TITLE_ROLE,
    Input, InstallFilter, LEFT_PROMPT, LastMessage, MESSAGES_FILE_NAME, RIGHT_PROMPT, Role,
    RoleLike, SESSIONS_DIR_NAME, SUMMARIZATION_PROMPT, SUMMARY_CONTEXT_PROMPT, StateFlags,
-    TEMP_ROLE_NAME, TEMP_SESSION_NAME, WorkingMode, ensure_parent_exists, list_agents, paths,
+    TEMP_ROLE_NAME, TEMP_SESSION_NAME, WorkingMode, ensure_parent_exists, list_agents, memory,
    paths,
 };
 use super::{MessageContentToolCalls, prompts};
 use crate::client::{Model, ModelType, list_models};
@@ -30,6 +31,9 @@ use crate::utils::{
    list_file_names, now, render_prompt, temp_file,
 };
 use super::memory::{
    DEFAULT_MEMORY_CAP_WITH_TOOLS, DEFAULT_MEMORY_CAP_WITHOUT_TOOLS, MemoryStore, WorkspaceMemory,
 };
 use crate::graph;
 use anyhow::{Context, Error, Result, bail};
 use gman::providers::SupportedProvider;
@@ -59,6 +63,21 @@ pub struct SkillInstructionsConfig {
    pub instructions: Option<String>,
 }
 #[derive(Debug, Clone)]
 pub struct MemoryConfig {
    pub enabled: bool,
    pub workspace: Option<WorkspaceMemory>,
 }
 impl MemoryConfig {
    pub fn disabled() -> Self {
        Self {
            enabled: false,
            workspace: None,
        }
    }
 }
 /// Must stay in sync with the predicate that registers `skill__*` tools in `rebuild_tool_scope`
 /// (and in `graph::llm::run_llm_node`). Telling the model to call tools that are not exposed
 /// is a footgun. `compatible_enabled` is the post-filter universe that `skill__list` would
@@ -101,6 +120,7 @@ pub struct RequestContext {
    pub escalation_queue: Option<Arc<EscalationQueue>>,
    pub current_depth: usize,
    pub auto_continue_count: usize,
    pub pending_agents_guardrail_count: u32,
    pub todo_list: TodoList,
    pub skill_registry: SkillRegistry,
    pub last_continuation_response: Option<String>,
@@ -130,6 +150,7 @@ impl RequestContext {
            escalation_queue: None,
            current_depth: 0,
            auto_continue_count: 0,
            pending_agents_guardrail_count: 0,
            todo_list: TodoList::default(),
            skill_registry: SkillRegistry::default(),
            last_continuation_response: None,
@@ -185,6 +206,7 @@ impl RequestContext {
            escalation_queue: None,
            current_depth: 0,
            auto_continue_count: 0,
            pending_agents_guardrail_count: 0,
            todo_list: TodoList::default(),
            skill_registry: SkillRegistry::default(),
            last_continuation_response: None,
@@ -227,6 +249,7 @@ impl RequestContext {
            escalation_queue: self.escalation_queue.clone(),
            current_depth: self.current_depth,
            auto_continue_count: 0,
            pending_agents_guardrail_count: 0,
            todo_list: self.todo_list.clone(),
            skill_registry: self.skill_registry.clone(),
            last_continuation_response: None,
@@ -267,6 +290,7 @@ impl RequestContext {
            escalation_queue: parent.escalation_queue.clone(),
            current_depth,
            auto_continue_count: 0,
            pending_agents_guardrail_count: 0,
            todo_list: TodoList::default(),
            skill_registry: SkillRegistry::default(),
            last_continuation_response: None,
@@ -347,9 +371,32 @@ impl RequestContext {
        if self.rag.is_some() {
            flags |= StateFlags::RAG;
        }
        if self.app.config.function_calling_support {
            flags |= StateFlags::FUNCTION_CALLING;
        }
        if self.auto_continue_config().enabled {
            flags |= StateFlags::AUTO_CONTINUE;
        }
        if self.resolved_skills_enabled() {
            flags |= StateFlags::SKILLS_ENABLED;
        }
        flags
    }
    pub fn resolved_skills_enabled(&self) -> bool {
        if let Some(agent) = &self.agent
            && let Some(value) = agent.skills_enabled()
        {
            return value;
        }
        let app = &self.app.config;
        self.session
            .as_ref()
            .and_then(|s| s.skills_enabled())
            .or_else(|| self.role.as_ref().and_then(|r| r.skills_enabled()))
            .unwrap_or(app.skills_enabled)
    }
    pub fn messages_file(&self) -> PathBuf {
        match &self.agent {
            None => match env::var(get_env_name("messages_file")) {
@@ -426,6 +473,50 @@ impl RequestContext {
        }
    }
    pub fn todo_info(&self) -> Result<String> {
        if !self.auto_continue_config().enabled {
            bail!(
                "Auto-continuation is disabled. Enable it by setting `auto_continue: true` in your config or running `.set auto_continue true`."
            );
        }
        if self.todo_list.is_empty() {
            return Ok("No todos in the running list.\n".to_string());
        }
        let mut out = self.todo_list.render_for_model();
        out.push('\n');
        Ok(out)
    }
    pub fn tools_info(&self) -> Result<String> {
        if !self.app.config.function_calling_support {
            bail!(
                "Function calling is disabled. Enable it by setting `function_calling_support: true` in your config or running `.set function_calling_support true`."
            );
        }
        let role = self.extract_role(&self.app.config)?;
        match self.select_functions(&role) {
            None => Ok("No tools enabled for the next request.\n".to_string()),
            Some(functions) => {
                let mut names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
                names.sort_unstable();
                let mut out = format!(
                    "Tools enabled for the next request: {}\n\n",
                    functions.len()
                );
                for name in names {
                    out.push_str("  ");
                    out.push_str(name);
                    out.push('\n');
                }
                Ok(out)
            }
        }
    }
    pub fn list_sessions(&self) -> Vec<String> {
        list_file_names(self.sessions_dir(), ".yaml")
    }
@@ -618,6 +709,10 @@ impl RequestContext {
    }
    pub fn extract_role(&self, app: &AppConfig) -> Result<Role> {
        self.extract_role_impl(app, true)
    }
    fn extract_role_impl(&self, app: &AppConfig, inject_memory: bool) -> Result<Role> {
        let mut role = if let Some(session) = self.session.as_ref() {
            session.to_role()
        } else if let Some(agent) = self.agent.as_ref() {
@@ -666,6 +761,39 @@ impl RequestContext {
            }
        }
        if inject_memory {
            let memory_config = self.memory_config();
            if memory_config.enabled {
                let store = MemoryStore {
                    global_dir: paths::global_memory_dir(),
                    workspace: memory_config.workspace,
                };
                let with_tools = app.function_calling_support;
                let cap = if with_tools {
                    app.memory_cap_with_tools
                        .unwrap_or(DEFAULT_MEMORY_CAP_WITH_TOOLS)
                } else {
                    app.memory_cap_without_tools
                        .unwrap_or(DEFAULT_MEMORY_CAP_WITHOUT_TOOLS)
                };
                match memory::build_memory_section(&store, with_tools, cap) {
                    Ok(Some(section)) => {
                        let separator = if role.is_empty_prompt() { "" } else { "\n\n" };
                        role.append_to_prompt(separator);
                        role.append_to_prompt(&section);
                        role.append_to_prompt("\n\n");
                        role.append_to_prompt(if with_tools {
                            prompts::DEFAULT_MEMORY_INSTRUCTIONS
                        } else {
                            prompts::DEFAULT_MEMORY_INSTRUCTIONS_READONLY
                        });
                    }
                    Ok(None) => {}
                    Err(e) => warn!("memory injection failed: {}", e),
                }
            }
        }
        Ok(self.skill_registry.effective_role(&role, &policy))
    }
@@ -705,6 +833,52 @@ impl RequestContext {
        }
    }
    pub fn memory_config(&self) -> MemoryConfig {
        if let Some(agent) = &self.agent
            && graph::agent_has_graph(agent.name())
        {
            return MemoryConfig::disabled();
        }
        let agent_pref = self.agent.as_ref().and_then(|a| a.memory());
        let session_pref = self.session.as_ref().and_then(|s| s.memory());
        let role_pref = self.role.as_ref().and_then(|r| r.memory());
        let app_pref = self.app.config.memory;
        let resolved = agent_pref
            .or(session_pref)
            .or(role_pref)
            .or(app_pref)
            .unwrap_or(true);
        if !resolved {
            return MemoryConfig::disabled();
        }
        let cwd = env::current_dir().ok();
        let store = cwd.as_deref().map(MemoryStore::new);
        let workspace = store.as_ref().and_then(|s| s.workspace.clone());
        let global_exists = paths::global_memory_index_path().exists();
        let workspace_exists = workspace.is_some();
        if !global_exists && !workspace_exists {
            return MemoryConfig::disabled();
        }
        MemoryConfig {
            enabled: true,
            workspace,
        }
    }
    pub fn should_inject_memory(&self) -> bool {
        self.memory_config().enabled
    }
    pub fn should_register_memory_tools(&self) -> bool {
        self.should_inject_memory() && self.app.config.function_calling_support
    }
    pub fn auto_continue_config(&self) -> AutoContinueConfig {
        if let Some(agent) = &self.agent {
            return AutoContinueConfig {
@@ -935,6 +1109,10 @@ impl RequestContext {
                "enabled_mcp_servers",
                super::format_option_value(&role.enabled_mcp_servers().map(|v| v.join(","))),
            ),
            (
                "enabled_skills",
                super::format_option_value(&role.enabled_skills().map(|v| v.join(","))),
            ),
            (
                "max_output_tokens",
                role.model()
@@ -950,6 +1128,15 @@ impl RequestContext {
                "compression_threshold",
                app.compression_threshold.to_string(),
            ),
            ("memory", super::format_option_value(&app.memory)),
            (
                "memory_cap_with_tools",
                super::format_option_value(&app.memory_cap_with_tools),
            ),
            (
                "memory_cap_without_tools",
                super::format_option_value(&app.memory_cap_without_tools),
            ),
            (
                "rag_reranker_model",
                super::format_option_value(&rag_reranker_model),
@@ -961,6 +1148,7 @@ impl RequestContext {
                app.function_calling_support.to_string(),
            ),
            ("mcp_server_support", app.mcp_server_support.to_string()),
            ("skills_enabled", app.skills_enabled.to_string()),
            ("auto_continue", app.auto_continue.to_string()),
            ("max_auto_continues", app.max_auto_continues.to_string()),
            ("stream", app.stream.to_string()),
@@ -976,9 +1164,11 @@ impl RequestContext {
            ("roles_dir", display_path(&paths::roles_dir())),
            ("skills_dir", display_path(&paths::skills_dir())),
            ("sessions_dir", display_path(&self.sessions_dir())),
            ("memory_dir", display_path(&paths::global_memory_dir())),
            ("rags_dir", display_path(&paths::rags_dir())),
            ("macros_dir", display_path(&paths::macros_dir())),
            ("functions_dir", display_path(&paths::functions_dir())),
            ("sbx_kit_dir", display_path(&paths::sbx_kit_dir())),
            ("messages_file", display_path(&self.messages_file())),
        ];
@@ -1092,7 +1282,7 @@ impl RequestContext {
    pub fn generate_prompt_context(&self, app: &AppConfig) -> HashMap<&str, String> {
        let mut output = HashMap::new();
-        let role = self.extract_role(app).unwrap_or_else(|err| {
+        let role = self.extract_role_impl(app, false).unwrap_or_else(|err| {
            warn!("failed to compute effective role for prompt rendering: {err}");
            Role::default()
        });
@@ -1836,6 +2026,7 @@ impl RequestContext {
                } else {
                    self.update_app_config(|app| app.skills_enabled = value.unwrap_or(true));
                }
                self.refresh_tool_scope(abort_signal.clone()).await?;
            }
            "enabled_mcp_servers" => {
                let raw: Option<String> = super::parse_value(value)?;
@@ -1945,11 +2136,15 @@ impl RequestContext {
                } else {
                    self.update_app_config(|app| app.auto_continue = value);
                }
-                if value
+                let should_register = self.agent.is_none()
                    && self.app.config.function_calling_support
-                    && !self.tool_scope.functions.contains("todo__init")
+                    && self.auto_continue_config().enabled;
-                {
+                let already_registered = self.tool_scope.functions.contains("todo__init");
                if should_register && !already_registered {
                    self.tool_scope.functions.append_todo_functions();
                } else if !should_register && already_registered {
                    self.tool_scope.functions.remove_todo_functions();
                }
            }
            "max_auto_continues" => {
@@ -1992,6 +2187,24 @@ impl RequestContext {
                    self.update_app_config(|app| app.skill_instructions = value);
                }
            }
            "memory" => {
                let value: bool = value.parse().with_context(|| "Invalid value")?;
                if let Some(session) = self.session.as_mut() {
                    session.set_memory(Some(value));
                } else {
                    self.update_app_config(|app| app.memory = Some(value));
                }
                let should_register = self.should_register_memory_tools();
                let already_registered = self.tool_scope.functions.contains("memory__read");
                if should_register && !already_registered {
                    self.tool_scope.functions.append_memory_functions();
                } else if !should_register && already_registered {
                    self.tool_scope.functions.remove_memory_functions();
                }
            }
            _ => bail!("Unknown key '{key}'"),
        }
        Ok(())
@@ -2068,11 +2281,6 @@ impl RequestContext {
                    super::map_completion_values(values)
                }
                ".macro" => super::map_completion_values(paths::list_macros()),
                ".skill" => super::map_completion_values(vec![
                    "loaded".to_string(),
                    "load".to_string(),
                    "unload".to_string(),
                ]),
                ".starter" => match &self.agent {
                    Some(agent) => agent
                        .conversation_starters()
@@ -2094,6 +2302,7 @@ impl RequestContext {
                        "inject_skill_instructions",
                        "skill_instructions",
                        "max_auto_continues",
                        "memory",
                        "save_session",
                        "compression_threshold",
                        "rag_reranker_model",
@@ -2131,6 +2340,17 @@ impl RequestContext {
                }
                _ => vec![],
            };
        } else if cmd == ".mcp" && args.first() == Some(&"auth") && args.len() == 2 {
            if let Some(mcp_config) = &self.app.mcp_config {
                values = super::map_completion_values(
                    mcp_config
                        .mcp_servers
                        .iter()
                        .filter(|(_, spec)| spec.is_remote())
                        .map(|(name, _)| name.clone())
                        .collect(),
                );
            }
        } else if (cmd == ".edit" && args.first() == Some(&"skill") && args.len() == 2)
            || (cmd == ".skill" && args.first() == Some(&"load") && args.len() == 2)
        {
@@ -2264,10 +2484,11 @@ impl RequestContext {
                    super::complete_bool(config.inject)
                }
                "skill_instructions" => vec!["null".to_string()],
                "memory" => super::complete_bool(self.should_inject_memory()),
                _ => vec![],
            };
            values = candidates.into_iter().map(|v| (v, None)).collect();
-        } else if cmd == ".vault" && args.len() == 2 {
+        } else if cmd == ".vault" && args.len() == 2 && args[0] != "list" {
            values = self
                .app
                .vault
@@ -2396,6 +2617,9 @@ impl RequestContext {
        if app.function_calling_support && policy.skills_enabled {
            functions.append_skill_functions();
        }
        if self.should_register_memory_tools() {
            functions.append_memory_functions();
        }
        let tool_tracker = self.tool_scope.tool_tracker.clone();
        self.tool_scope = ToolScope {
@@ -2655,7 +2879,7 @@ impl RequestContext {
        if self.agent.take().is_some() {
            if let Some(supervisor) = self.supervisor.clone() {
-                supervisor.read().cancel_all();
+                supervisor.read().cancel_recursive();
            }
            self.supervisor = None;
            self.parent_supervisor = None;
@@ -2664,6 +2888,7 @@ impl RequestContext {
            self.escalation_queue = None;
            self.current_depth = 0;
            self.auto_continue_count = 0;
            self.pending_agents_guardrail_count = 0;
            self.todo_list = TodoList::default();
            self.rag.take();
            self.discontinuous_last_message();
@@ -3163,6 +3388,46 @@ mod tests {
        assert!(!Arc::ptr_eq(&ctx.app.config, &previous));
    }
    #[test]
    fn memory_config_app_some_false_disables_via_cascade() {
        let mut ctx = create_test_ctx();
        ctx.update_app_config(|app| app.memory = Some(false));
        assert!(
            !ctx.should_inject_memory(),
            "AppConfig.memory=Some(false) must disable memory regardless of on-disk content (this is the --no-memory CLI path)"
        );
    }
    #[test]
    fn memory_config_role_false_beats_app_true_in_cascade() {
        let mut ctx = create_test_ctx();
        ctx.update_app_config(|app| app.memory = Some(true));
        let role = Role::new("memory_off_role", "---\nmemory: false\n---\n");
        assert_eq!(role.memory(), Some(false), "metadata parser sanity check");
        ctx.role = Some(role);
        assert!(
            !ctx.should_inject_memory(),
            "Role::memory=Some(false) must win over AppConfig::memory=Some(true)"
        );
    }
    #[test]
    fn should_register_memory_tools_false_when_function_calling_off() {
        let mut ctx = create_test_ctx();
        ctx.update_app_config(|app| {
            app.memory = Some(true);
            app.function_calling_support = false;
        });
        assert!(
            !ctx.should_register_memory_tools(),
            "memory tools must require function_calling_support even when memory itself would otherwise be enabled"
        );
    }
    #[test]
    fn use_role_obj_sets_role() {
        let mut ctx = create_test_ctx();
@@ -3433,6 +3698,7 @@ mod tests {
                        cwd: None,
                        url: None,
                        headers: None,
                        oauth_client_id: None,
                    },
                );
            }
@@ -3579,6 +3845,44 @@ mod tests {
        );
    }
    #[test]
    #[serial]
    fn update_skills_enabled_false_removes_skill_meta_tools_from_scope() {
        let _guard = TestConfigDirGuard::new();
        let app_state = app_state_with_mcp_config(false, &[]);
        let mut ctx = RequestContext::new(app_state, WorkingMode::Repl);
        let app = ctx.app.config.clone();
        let abort = utils::create_abort_signal();
        run_async(ctx.rebuild_tool_scope(&app, None, abort.clone())).unwrap();
        let names_before: Vec<String> = ctx
            .tool_scope
            .functions
            .declarations()
            .iter()
            .map(|f| f.name.clone())
            .collect();
        assert!(
            names_before.iter().any(|n| n.starts_with("skill__")),
            "expected skill__* functions before toggle, got: {names_before:?}"
        );
        run_async(ctx.update("skills_enabled false", abort)).unwrap();
        let names_after: Vec<String> = ctx
            .tool_scope
            .functions
            .declarations()
            .iter()
            .map(|f| f.name.clone())
            .collect();
        assert!(
            !names_after.iter().any(|n| n.starts_with("skill__")),
            "expected skill__* functions to be removed after `.set skills_enabled false`, got: {names_after:?}"
        );
    }
    #[test]
    fn select_functions_returns_none_when_no_tools_enabled() {
        let ctx = create_test_ctx();
@@ -3878,9 +4182,84 @@ mod tests {
    }
    #[test]
-    fn state_empty_context() {
+    fn state_empty_context_has_no_context_flags() {
        let ctx = create_test_ctx();
-        assert_eq!(ctx.state(), StateFlags::empty());
+
        let state = ctx.state();
        assert!(!state.contains(StateFlags::ROLE));
        assert!(!state.contains(StateFlags::SESSION));
        assert!(!state.contains(StateFlags::SESSION_EMPTY));
        assert!(!state.contains(StateFlags::AGENT));
        assert!(!state.contains(StateFlags::RAG));
    }
    #[test]
    fn state_includes_function_calling_when_app_enables_it() {
        let ctx = create_test_ctx();
        assert!(ctx.state().contains(StateFlags::FUNCTION_CALLING));
    }
    #[test]
    fn state_includes_skills_enabled_when_app_enables_it() {
        let ctx = create_test_ctx();
        assert!(ctx.state().contains(StateFlags::SKILLS_ENABLED));
    }
    #[test]
    fn state_omits_skills_enabled_when_app_disables_it() {
        let mut ctx = create_test_ctx();
        ctx.update_app_config(|app| app.skills_enabled = false);
        assert!(!ctx.state().contains(StateFlags::SKILLS_ENABLED));
    }
    #[test]
    fn state_skills_enabled_respects_session_override() {
        let mut ctx = create_test_ctx();
        let mut session = Session::default();
        session.set_skills_enabled(Some(false));
        ctx.session = Some(session);
        assert!(!ctx.state().contains(StateFlags::SKILLS_ENABLED));
    }
    #[test]
    fn state_skills_enabled_respects_role_override() {
        let mut ctx = create_test_ctx();
        let role = Role::new("r", "---\nskills_enabled: false\n---\nbody");
        ctx.role = Some(role);
        assert!(!ctx.state().contains(StateFlags::SKILLS_ENABLED));
    }
    #[test]
    fn state_omits_function_calling_when_app_disables_it() {
        let app_state = {
            let config = AppConfig {
                function_calling_support: false,
                ..AppConfig::default()
            };
            Arc::new(AppState {
                config: Arc::new(config),
                vault: Arc::new(Vault::default()),
                mcp_factory: Arc::new(McpFactory::default()),
                rag_cache: Arc::new(RagCache::default()),
                mcp_config: None,
                mcp_log_path: None,
                mcp_registry: None,
                functions: Functions::default(),
            })
        };
        let ctx = RequestContext::new(app_state, WorkingMode::Cmd);
        assert!(!ctx.state().contains(StateFlags::FUNCTION_CALLING));
    }
    #[test]
@@ -3908,6 +4287,144 @@ mod tests {
        assert!(state.contains(StateFlags::SESSION_EMPTY));
    }
    #[test]
    fn todo_info_errors_when_auto_continue_disabled() {
        let ctx = create_test_ctx();
        let err = ctx.todo_info().unwrap_err();
        let msg = err.to_string();
        assert!(
            msg.contains("Auto-continuation is disabled"),
            "expected error to mention auto-continuation, got: {msg}"
        );
    }
    #[test]
    fn todo_info_returns_empty_message_when_list_is_empty() {
        let mut ctx = create_test_ctx();
        ctx.update_app_config(|app| app.auto_continue = true);
        let info = ctx.todo_info().unwrap();
        assert!(
            info.contains("No todos in the running list"),
            "expected 'No todos' message, got: {info}"
        );
    }
    #[test]
    fn todo_info_renders_running_list() {
        let mut ctx = create_test_ctx();
        ctx.update_app_config(|app| app.auto_continue = true);
        ctx.init_todo_list("Map Labs");
        ctx.add_todo("Discover columns");
        ctx.add_todo("Write report");
        ctx.mark_todo_done(1);
        let info = ctx.todo_info().unwrap();
        assert!(
            info.contains("Goal: Map Labs"),
            "expected goal in output, got: {info}"
        );
        assert!(
            info.contains("Progress: 1/2 completed"),
            "expected progress line, got: {info}"
        );
        assert!(
            info.contains("Discover columns"),
            "expected first task, got: {info}"
        );
        assert!(
            info.contains("Write report"),
            "expected second task, got: {info}"
        );
    }
    #[test]
    fn tools_info_returns_message_when_no_tools_enabled() {
        let ctx = create_test_ctx();
        let info = ctx.tools_info().unwrap();
        assert!(
            info.contains("No tools enabled"),
            "expected 'No tools enabled' message, got: {info}"
        );
    }
    #[test]
    fn tools_info_lists_enabled_tool_names_alphabetically() {
        let mut ctx = create_test_ctx();
        ctx.tool_scope.functions.append_todo_functions();
        let mut role = Role::new("r", "p");
        role.set_enabled_tools(Some(vec!["all".to_string()]));
        ctx.role = Some(role);
        let info = ctx.tools_info().unwrap();
        assert!(
            info.contains("Tools enabled for the next request:"),
            "expected count line, got: {info}"
        );
        assert!(
            info.contains("todo__init"),
            "expected todo__init in output, got: {info}"
        );
        let positions: Vec<usize> = info
            .lines()
            .filter(|line| line.trim().starts_with("todo__"))
            .enumerate()
            .map(|(i, _)| i)
            .collect();
        assert!(
            !positions.is_empty(),
            "expected at least one todo__ entry, got: {info}"
        );
        let todo_lines: Vec<&str> = info
            .lines()
            .filter(|line| line.trim().starts_with("todo__"))
            .collect();
        let mut sorted = todo_lines.clone();
        sorted.sort_unstable();
        assert_eq!(
            todo_lines, sorted,
            "expected todo__ entries to be alphabetically sorted, got: {todo_lines:?}"
        );
    }
    #[test]
    fn tools_info_errors_when_function_calling_disabled() {
        let app_state = {
            let config = AppConfig {
                function_calling_support: false,
                ..AppConfig::default()
            };
            Arc::new(AppState {
                config: Arc::new(config),
                vault: Arc::new(Vault::default()),
                mcp_factory: Arc::new(McpFactory::default()),
                rag_cache: Arc::new(RagCache::default()),
                mcp_config: None,
                mcp_log_path: None,
                mcp_registry: None,
                functions: Functions::default(),
            })
        };
        let ctx = RequestContext::new(app_state, WorkingMode::Cmd);
        let err = ctx.tools_info().unwrap_err();
        let msg = err.to_string();
        assert!(
            msg.contains("Function calling is disabled"),
            "expected error to mention function calling, got: {msg}"
        );
    }
    #[test]
    fn role_info_errors_when_no_role() {
        let ctx = create_test_ctx();
@@ -4599,6 +5116,45 @@ mod tests {
        assert!(paths::skill_file("frontend-ui-ux").exists());
    }
    #[test]
    #[serial]
    fn bundled_graph_agents_parse_and_validate() {
        use crate::graph::GraphParser;
        use crate::graph::validator::GraphValidator;
        let _guard = TestConfigDirGuard::new();
        Agent::install_builtin_agents(false).unwrap();
        Skill::install_builtin_skills(false).unwrap();
        let mut checked = Vec::new();
        for entry in std::fs::read_dir(paths::agents_data_dir()).unwrap() {
            let dir = entry.unwrap().path();
            let graph_path = dir.join("graph.yaml");
            if !graph_path.exists() {
                continue;
            }
            let name = dir.file_name().unwrap().to_string_lossy().to_string();
            let graph = GraphParser::new(&dir)
                .load_from_file(&graph_path)
                .unwrap_or_else(|e| panic!("graph.yaml for '{name}' failed to parse: {e}"));
            let result = GraphValidator::new(&dir).validate(&graph);
            assert!(
                result.errors.is_empty(),
                "graph.yaml for '{name}' failed validation: {:#?}",
                result.errors
            );
            checked.push(name);
        }
        checked.sort();
        for expected in ["coder", "librarian", "step-runner"] {
            assert!(
                checked.iter().any(|n| n == expected),
                "expected bundled graph agent '{expected}' to be checked; found {checked:?}"
            );
        }
    }
    #[test]
    #[serial]
    fn install_functions_force_preserves_user_mcp_json() {
@@ -83,6 +83,8 @@ pub struct Role {
    inject_skill_instructions: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    skill_instructions: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    memory: Option<bool>,
    #[serde(skip)]
    model: Model,
@@ -132,6 +134,7 @@ impl Role {
                    "skill_instructions" => {
                        role.skill_instructions = value.as_str().map(|v| v.to_string())
                    }
                    "memory" => role.memory = value.as_bool(),
                    _ => (),
                }
            }
@@ -205,6 +208,9 @@ impl Role {
        if let Some(skill_instructions) = &self.skill_instructions {
            metadata.push(format!("skill_instructions: {skill_instructions}"));
        }
        if let Some(memory) = self.memory {
            metadata.push(format!("memory: {memory}"));
        }
        if metadata.is_empty() {
            format!("{}\n", self.prompt)
        } else if self.prompt.is_empty() {
@@ -323,6 +329,10 @@ impl Role {
        self.skill_instructions.as_deref()
    }
    pub fn memory(&self) -> Option<bool> {
        self.memory
    }
    pub fn skills_enabled(&self) -> Option<bool> {
        self.skills_enabled
    }
@@ -60,6 +60,8 @@ pub struct Session {
    inject_skill_instructions: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    skill_instructions: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    memory: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    role_name: Option<String>,
@@ -161,6 +163,14 @@ impl Session {
        self.messages.is_empty() && self.compressed_messages.is_empty()
    }
    pub fn messages(&self) -> &[Message] {
        &self.messages
    }
    pub fn compressed_messages(&self) -> &[Message] {
        &self.compressed_messages
    }
    pub fn name(&self) -> &str {
        &self.name
    }
@@ -237,6 +247,9 @@ impl Session {
        if let Some(skill_instructions) = self.skill_instructions() {
            data["skill_instructions"] = skill_instructions.into();
        }
        if let Some(memory) = self.memory() {
            data["memory"] = memory.into();
        }
        let (tokens, percent) = self.tokens_usage();
        data["total_tokens"] = tokens.into();
        if let Some(max_input_tokens) = self.model().max_input_tokens() {
@@ -324,6 +337,9 @@ impl Session {
        if let Some(skill_instructions) = self.skill_instructions() {
            items.push(("skill_instructions", skill_instructions.to_string()));
        }
        if let Some(memory) = self.memory() {
            items.push(("memory", memory.to_string()));
        }
        if let Some(max_input_tokens) = self.model().max_input_tokens() {
            items.push(("max_input_tokens", max_input_tokens.to_string()));
@@ -473,6 +489,10 @@ impl Session {
        self.skill_instructions.as_deref()
    }
    pub fn memory(&self) -> Option<bool> {
        self.memory
    }
    pub fn set_inject_todo_instructions(&mut self, value: Option<bool>) {
        if self.inject_todo_instructions != value {
            self.inject_todo_instructions = value;
@@ -494,6 +514,13 @@ impl Session {
        }
    }
    pub fn set_memory(&mut self, value: Option<bool>) {
        if self.memory != value {
            self.memory = value;
            self.dirty = true;
        }
    }
    pub fn set_skill_instructions(&mut self, value: Option<String>) {
        if self.skill_instructions != value {
            self.skill_instructions = value;
@@ -68,6 +68,14 @@ fn normalize_version(requested: Option<String>) -> Option<String> {
    }
 }
 fn preferred_update_target() -> Option<&'static str> {
    match (env::consts::OS, env::consts::ARCH) {
        ("linux", "x86_64") => Some("x86_64-unknown-linux-musl"),
        ("linux", "aarch64") => Some("aarch64-unknown-linux-musl"),
        _ => None,
    }
 }
 fn is_dir_writable(dir: &Path) -> bool {
    let probe = dir.join(format!(".coyote-update-write-test-{}", process::id()));
    match OpenOptions::new().write(true).create_new(true).open(&probe) {
@@ -147,6 +155,9 @@ pub fn run_self_update(requested: Option<String>, force: bool) -> Result<()> {
    if let Some(tag) = &target_tag {
        builder.target_version_tag(tag.as_str());
    }
    if let Some(target) = preferred_update_target() {
        builder.target(target);
    }
    let status = builder
        .build()
        .context("Failed to configure the self-update")?
@@ -1,3 +1,4 @@
 pub(crate) mod memory;
 pub(crate) mod skill;
 pub(crate) mod supervisor;
 pub(crate) mod todo;
@@ -19,6 +20,7 @@ use crate::parsers::{bash, python, typescript};
 use anyhow::{Context, Result, anyhow, bail};
 use indexmap::IndexMap;
 use indoc::formatdoc;
 use memory::MEMORY_FUNCTION_PREFIX;
 use rust_embed::Embed;
 use serde::{Deserialize, Serialize};
 use serde_json::{Value, json};
@@ -355,6 +357,21 @@ impl Functions {
        self.declarations.extend(todo::todo_function_declarations());
    }
    pub fn remove_todo_functions(&mut self) {
        self.declarations
            .retain(|f| !f.name.starts_with(TODO_FUNCTION_PREFIX));
    }
    pub fn append_memory_functions(&mut self) {
        self.declarations
            .extend(memory::memory_function_declarations());
    }
    pub fn remove_memory_functions(&mut self) {
        self.declarations
            .retain(|f| !f.name.starts_with(MEMORY_FUNCTION_PREFIX));
    }
    pub fn append_skill_functions(&mut self) {
        self.declarations
            .extend(skill::skill_function_declarations());
@@ -1046,6 +1063,13 @@ impl ToolCall {
                    json!({"tool_call_error": error_msg})
                })
            }
            _ if cmd_name.starts_with(MEMORY_FUNCTION_PREFIX) => {
                memory::handle_memory_tool(ctx, &cmd_name, &json_data).unwrap_or_else(|e| {
                    let error_msg = format!("Memory tool failed: {e}");
                    eprintln!("{}", warning_text(&format!("⚠️ {error_msg} ⚠️")));
                    json!({"tool_call_error": error_msg})
                })
            }
            _ if cmd_name.starts_with(SKILL_FUNCTION_PREFIX) => {
                skill::handle_skill_tool(ctx, &cmd_name, &json_data)
                    .await
@@ -1268,11 +1292,13 @@ pub fn run_llm_function(
        let mut buffer = [0; 1024];
        let mut reader = stdout;
        let mut out = io::stdout();
        let mut buf = Vec::new();
        while let Ok(n) = reader.read(&mut buffer) {
            if n == 0 {
                break;
            }
            let chunk = &buffer[0..n];
            buf.extend_from_slice(chunk);
            let mut last_pos = 0;
            for (i, &byte) in chunk.iter().enumerate() {
                if byte == b'\n' {
@@ -1286,6 +1312,7 @@ pub fn run_llm_function(
            }
            let _ = out.flush();
        }
        buf
    });
    let stderr_thread = std::thread::spawn(move || {
@@ -1318,18 +1345,22 @@ pub fn run_llm_function(
    let status = child
        .wait()
        .map_err(|err| anyhow!("Unable to run {command_name}, {err}"))?;
-    let _ = stdout_thread.join();
+    let stdout_bytes = stdout_thread.join().unwrap_or_default();
    let stderr_bytes = stderr_thread.join().unwrap_or_default();
    let exit_code = status.code().unwrap_or_default();
    if exit_code != 0 {
        let stderr = String::from_utf8_lossy(&stderr_bytes).trim().to_string();
        let stdout = String::from_utf8_lossy(&stdout_bytes).trim().to_string();
        let tool_error_message = format!("Tool call '{command_name}' exited with code {exit_code}");
        eprintln!("{}", warning_text(&format!("⚠️ {tool_error_message} ⚠️")));
        let mut error_json = json!({"tool_call_error": tool_error_message});
        if !stderr.is_empty() {
            error_json["stderr"] = json!(stderr);
        }
        if !stdout.is_empty() {
            error_json["stdout"] = json!(stdout);
        }
        debug!("Tool call error: {error_json:?}");
        return Ok(Some(error_json.to_string()));
    }
@@ -1660,6 +1691,33 @@ mod tests {
        assert!(f.declarations().is_empty());
    }
    #[test]
    fn bundled_bash_tools_generate_declarations() {
        let tools_dir =
            std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("assets/functions/tools");
        let mut checked = Vec::new();
        for entry in std::fs::read_dir(&tools_dir).unwrap() {
            let path = entry.unwrap().path();
            if path.extension().and_then(OsStr::to_str) != Some("sh") {
                continue;
            }
            let name = path.file_stem().unwrap().to_string_lossy().to_string();
            let declarations = Functions::generate_declarations(&path)
                .unwrap_or_else(|e| panic!("bundled tool '{name}' failed to parse: {e}"));
            assert!(
                !declarations.is_empty(),
                "bundled tool '{name}' produced no function declaration"
            );
            checked.push(name);
        }
        for expected in ["fs_grep", "ast_grep", "execute_command"] {
            assert!(
                checked.iter().any(|n| n == expected),
                "expected bundled tool '{expected}' to be checked; found {checked:?}"
            );
        }
    }
    #[test]
    fn functions_append_todo_adds_declarations() {
        let mut f = Functions::default();
@@ -3,7 +3,7 @@ use crate::client::{Model, ModelType, call_chat_completions};
 use crate::config::{Agent, AppState, Input, RequestContext, Role, RoleLike};
 use crate::supervisor::mailbox::{Envelope, EnvelopePayload, Inbox};
 use crate::supervisor::{AgentExitStatus, AgentHandle, AgentResult, Supervisor};
-use crate::utils::{AbortSignal, create_abort_signal};
+use crate::utils::{AbortSignal, create_abort_signal, wait_abort_signal};
 use crate::graph;
 use anyhow::{Context, Result, anyhow, bail};
@@ -16,10 +16,69 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::time::Duration;
 use tokio::time;
 use tokio::time::Instant;
 use uuid::Uuid;
 pub const SUPERVISOR_FUNCTION_PREFIX: &str = "agent__";
 pub const PENDING_AGENTS_GUARDRAIL_MAX: u32 = 3;
 pub enum GuardrailAction {
    NoAction,
    Inject(String),
    ForceTerminate(Vec<String>),
 }
 pub fn pending_agent_ids(ctx: &RequestContext) -> Vec<String> {
    let Some(sup) = ctx.supervisor.as_ref() else {
        return Vec::new();
    };
    let sup = sup.read();
    sup.list_agents()
        .into_iter()
        .filter_map(|(id, _)| match sup.is_finished(id) {
            Some(false) => Some(id.to_string()),
            _ => None,
        })
        .collect()
 }
 pub fn build_pending_agents_guardrail_prompt(ids: &[String]) -> String {
    let count = ids.len();
    let id_list = ids
        .iter()
        .map(|id| format!("- {id}"))
        .collect::<Vec<_>>()
        .join("\n");
    format!(
        "[SYSTEM GUARDRAIL] You attempted to end your turn while {count} spawned background agent(s) \
         are still running:\n{id_list}\n\nThese agents will be abandoned if your turn ends now. You MUST \
         reclaim each one before ending your turn. For each agent: call `agent__collect` (blocks until \
         done, returns output) or `agent__cancel` (discards). Do NOT emit a text-only response \
         expecting them to 'report back' — they will not."
    )
 }
 pub fn check_pending_agents_guardrail(ctx: &mut RequestContext) -> GuardrailAction {
    let pending = pending_agent_ids(ctx);
    if pending.is_empty() {
        ctx.pending_agents_guardrail_count = 0;
        return GuardrailAction::NoAction;
    }
    if ctx.pending_agents_guardrail_count >= PENDING_AGENTS_GUARDRAIL_MAX {
        if let Some(sup) = ctx.supervisor.as_ref().cloned() {
            sup.read().cancel_recursive();
        }
        ctx.pending_agents_guardrail_count = 0;
        return GuardrailAction::ForceTerminate(pending);
    }
    ctx.pending_agents_guardrail_count += 1;
    GuardrailAction::Inject(build_pending_agents_guardrail_prompt(&pending))
 }
 pub fn escalation_function_declarations() -> Vec<FunctionDeclaration> {
    vec![FunctionDeclaration {
        name: format!("{SUPERVISOR_FUNCTION_PREFIX}reply_escalation"),
@@ -55,7 +114,11 @@ pub fn supervisor_function_declarations() -> Vec<FunctionDeclaration> {
    vec![
        FunctionDeclaration {
            name: format!("{SUPERVISOR_FUNCTION_PREFIX}spawn"),
-            description: "Spawn a subagent to run in the background. Returns a task_id for tracking. The agent runs in parallel. You can continue working while it executes.".to_string(),
+            description: "Spawn a subagent to run in the background. Returns an `id` immediately so you can continue \
                          working in parallel. CRITICAL: every spawned agent MUST be reclaimed before you end your \
                          turn — call `agent__collect` to retrieve its output, or `agent__cancel` if you no longer \
                          need it. Ending your turn with pending agents will abandon their work and the system will \
                          reject the turn-end.".to_string(),
            parameters: JsonSchema {
                type_value: Some("object".to_string()),
                properties: Some(IndexMap::from([
@@ -109,7 +172,11 @@ pub fn supervisor_function_declarations() -> Vec<FunctionDeclaration> {
        },
        FunctionDeclaration {
            name: format!("{SUPERVISOR_FUNCTION_PREFIX}collect"),
-            description: "Wait for a spawned agent to finish and return its result. Blocks until the agent completes.".to_string(),
+            description: "Block until the named spawned agent finishes and return its result. This is your primary \
                          wait primitive — it pauses your execution until the agent completes (or you are interrupted). \
                          Call this for every agent you spawned before ending your turn. Do NOT end your turn assuming \
                          agents will 'report back later' — they will not; they will be abandoned. If you no longer \
                          need an agent's result, call `agent__cancel` instead.".to_string(),
            parameters: JsonSchema {
                type_value: Some("object".to_string()),
                properties: Some(IndexMap::from([(
@@ -137,7 +204,10 @@ pub fn supervisor_function_declarations() -> Vec<FunctionDeclaration> {
        },
        FunctionDeclaration {
            name: format!("{SUPERVISOR_FUNCTION_PREFIX}cancel"),
-            description: "Cancel a running subagent by its ID.".to_string(),
+            description: "Cancel a running subagent by its ID. Use this when an agent's output is no longer needed \
                          (e.g. you changed direction, or you're about to end your turn and don't want to wait). \
                          Cancellation cascades: all of the cancelled agent's own descendants are also cancelled. This \
                          call waits briefly for the agent to actually finish cleanup before returning.".to_string(),
            parameters: JsonSchema {
                type_value: Some("object".to_string()),
                properties: Some(IndexMap::from([(
@@ -315,7 +385,7 @@ pub async fn handle_supervisor_tool(
        "check" => handle_check(ctx, args).await,
        "collect" => handle_collect(ctx, args).await,
        "list" => handle_list(ctx),
-        "cancel" => handle_cancel(ctx, args),
+        "cancel" => handle_cancel(ctx, args).await,
        "send_message" => handle_send_message(ctx, args),
        "check_inbox" => handle_check_inbox(ctx),
        "task_create" => handle_task_create(ctx, args),
@@ -370,14 +440,28 @@ pub fn run_child_agent(
            }
            if tool_results.is_empty() {
-                break;
+                match check_pending_agents_guardrail(&mut child_ctx) {
                    GuardrailAction::NoAction => break,
                    GuardrailAction::ForceTerminate(ids) => {
                        log::warn!(
                            "Pending-agent guardrail force-cancelled {} agent(s) after max reminders: {:?}",
                            ids.len(),
                            ids
                        );
                        break;
                    }
                    GuardrailAction::Inject(prompt) => {
                        input = Input::from_str(&child_ctx, &prompt, None)?;
                        continue;
                    }
                }
            }
            input = input.merge_tool_results(output, tool_results);
        }
        if let Some(supervisor) = child_ctx.supervisor.clone() {
-            supervisor.read().cancel_all();
+            supervisor.read().cancel_recursive();
        }
        Ok(accumulated_output)
@@ -642,6 +726,7 @@ async fn handle_spawn(ctx: &mut RequestContext, args: &Value) -> Result<Value> {
    let spawn_agent_id = agent_id.clone();
    let spawn_agent_name = agent_name.clone();
    let spawn_abort = child_abort.clone();
    let child_supervisor = child_ctx.supervisor.clone();
    let join_handle = tokio::spawn(async move {
        let result = run_child_agent(child_ctx, input, spawn_abort).await;
@@ -669,6 +754,7 @@ async fn handle_spawn(ctx: &mut RequestContext, args: &Value) -> Result<Value> {
        inbox: child_inbox,
        abort_signal: child_abort,
        join_handle,
        child_supervisor,
    };
    let supervisor = ctx
@@ -683,7 +769,11 @@ async fn handle_spawn(ctx: &mut RequestContext, args: &Value) -> Result<Value> {
        "status": "ok",
        "id": agent_id,
        "agent": agent_name,
-        "message": format!("Agent '{agent_name}' spawned as '{agent_id}'. Use agent__check or agent__collect to get results."),
+        "message": format!("Agent '{agent_name}' spawned as '{agent_id}' and is running in the background. CRITICAL: \
                           you MUST reclaim this agent before ending your turn — call `agent__collect` (blocks until \
                           done, returns output) or `agent__cancel` (if you no longer need it). Ending your turn with \
                           unreclaimed agents will be rejected and forces you to handle them. Do NOT assume the agent \
                           will 'report back' on its own."),
    }))
 }
@@ -743,7 +833,7 @@ async fn handle_collect(ctx: &mut RequestContext, args: &Value) -> Result<Value>
        .cloned()
        .ok_or_else(|| anyhow!("No supervisor active"))?;
-    {
+    let target_abort = {
        let sup = supervisor.read();
        if sup.is_finished(id).is_none() {
            return Ok(json!({
@@ -751,7 +841,8 @@ async fn handle_collect(ctx: &mut RequestContext, args: &Value) -> Result<Value>
                "message": format!("Agent '{id}' not found. Use agent__check to verify it exists and is finished.")
            }));
        }
-    }
+        sup.abort_signal_for(id)
    };
    loop {
        let is_finished = {
@@ -775,7 +866,27 @@ async fn handle_collect(ctx: &mut RequestContext, args: &Value) -> Result<Value>
            }));
        }
-        time::sleep(Duration::from_millis(200)).await;
+        match target_abort.as_ref() {
            Some(abort) if abort.aborted() => {
                let deadline = Instant::now() + Duration::from_secs(2);
                while Instant::now() < deadline {
                    if supervisor.read().is_finished(id).unwrap_or(false) {
                        break;
                    }
                    time::sleep(Duration::from_millis(50)).await;
                }
                break;
            }
            Some(abort) => {
                tokio::select! {
                    _ = time::sleep(Duration::from_millis(200)) => {}
                    _ = wait_abort_signal(abort) => {}
                }
            }
            None => {
                time::sleep(Duration::from_millis(200)).await;
            }
        }
    }
    let handle = {
@@ -792,6 +903,7 @@ async fn handle_collect(ctx: &mut RequestContext, args: &Value) -> Result<Value>
                .map_err(|e| anyhow!("Agent failed: {e}"))?;
            let output = summarize_output(ctx, &result.agent_name, &result.output).await?;
            ctx.pending_agents_guardrail_count = 0;
            Ok(json!({
                "status": "completed",
@@ -836,7 +948,7 @@ fn handle_list(ctx: &mut RequestContext) -> Result<Value> {
    }))
 }
-fn handle_cancel(ctx: &mut RequestContext, args: &Value) -> Result<Value> {
+async fn handle_cancel(ctx: &mut RequestContext, args: &Value) -> Result<Value> {
    let id = args
        .get("id")
        .and_then(Value::as_str)
@@ -847,14 +959,34 @@ fn handle_cancel(ctx: &mut RequestContext, args: &Value) -> Result<Value> {
        .as_ref()
        .cloned()
        .ok_or_else(|| anyhow!("No supervisor active"))?;
    let mut sup = supervisor.write();
-    match sup.take(id) {
+    let handle = {
        let mut sup = supervisor.write();
        sup.take(id)
    };
    match handle {
        Some(handle) => {
            let agent_name = handle.agent_name.clone();
            if let Some(child_sup) = handle.child_supervisor.as_ref() {
                child_sup.read().cancel_recursive();
            }
            handle.abort_signal.set_ctrlc();
            let cleanup = tokio::time::timeout(Duration::from_secs(5), handle.join_handle).await;
            ctx.pending_agents_guardrail_count = 0;
            let message = match cleanup {
                Ok(_) => format!("Cancelled agent '{agent_name}' and waited for cleanup."),
                Err(_) => format!(
                    "Cancelled agent '{agent_name}'; cleanup did not complete within 5s. Its descendants have been signalled and will tear down asynchronously."
                ),
            };
            Ok(json!({
                "status": "ok",
-                "message": format!("Cancelled agent '{}'", handle.agent_name),
+                "message": message,
            }))
        }
        None => Ok(json!({
@@ -1283,6 +1415,7 @@ mod tests {
            inbox: Arc::new(Inbox::new()),
            abort_signal: create_abort_signal(),
            join_handle,
            child_supervisor: None,
        };
        ctx.supervisor
            .as_ref()
@@ -1362,6 +1495,7 @@ mod tests {
                inbox,
                abort_signal: abort,
                join_handle,
                child_supervisor: None,
            };
            ctx.supervisor
                .as_ref()
@@ -1381,7 +1515,7 @@ mod tests {
    fn handle_cancel_registered_agent() {
        let mut ctx = ctx_with_supervisor(4, 3);
        register_fake_agent(&mut ctx, "a1", "explore");
-        let result = handle_cancel(&mut ctx, &json!({"id": "a1"})).unwrap();
+        let result = run_async(handle_cancel(&mut ctx, &json!({"id": "a1"}))).unwrap();
        assert_eq!(result["status"], "ok");
        assert_eq!(ctx.supervisor.as_ref().unwrap().read().active_count(), 0);
    }
@@ -1389,14 +1523,14 @@ mod tests {
    #[test]
    fn handle_cancel_unknown_agent() {
        let mut ctx = ctx_with_supervisor(4, 3);
-        let result = handle_cancel(&mut ctx, &json!({"id": "missing"})).unwrap();
+        let result = run_async(handle_cancel(&mut ctx, &json!({"id": "missing"}))).unwrap();
        assert_eq!(result["status"], "error");
    }
    #[test]
    fn handle_cancel_no_supervisor_errors() {
        let mut ctx = RequestContext::new(default_app_state(), WorkingMode::Cmd);
-        let result = handle_cancel(&mut ctx, &json!({"id": "x"}));
+        let result = run_async(handle_cancel(&mut ctx, &json!({"id": "x"})));
        assert!(result.is_err());
    }
@@ -7,8 +7,10 @@ use crate::config::{
    Input, RequestContext, Role, RoleLike, SkillPolicy, should_inject_skill_instructions,
 };
 use crate::function::skill::skill_function_declarations;
 use crate::function::supervisor::{GuardrailAction, check_pending_agents_guardrail};
 use crate::utils::create_abort_signal;
 use anyhow::{Context, Error, Result, anyhow, bail};
 use log::warn;
 use serde_json::Value;
 use std::collections::HashSet;
 use std::sync::Arc;
@@ -266,7 +268,28 @@ async fn run_chat_loop(node: &LlmNode, prompt: &str, ctx: &mut RequestContext) -
        }
        if tool_results.is_empty() {
-            return Ok(accumulated);
+            match check_pending_agents_guardrail(ctx) {
                GuardrailAction::NoAction => return Ok(accumulated),
                GuardrailAction::ForceTerminate(ids) => {
                    warn!(
                        "Pending-agent guardrail force-cancelled {} agent(s) after max reminders: {:?}",
                        ids.len(),
                        ids
                    );
                    return Ok(accumulated);
                }
                GuardrailAction::Inject(prompt) => {
                    if turn + 1 == node.max_iterations {
                        bail!(
                            "llm node hit max_iterations ({}) before LLM concluded",
                            node.max_iterations
                        );
                    }
                    let role = ctx.role.clone();
                    input = Input::from_str(ctx, &prompt, role)?;
                    continue;
                }
            }
        }
        if turn + 1 == node.max_iterations {
@@ -10,6 +10,7 @@ mod repl;
 mod utils;
 mod mcp;
 mod parsers;
 mod sandbox;
 mod supervisor;
 mod vault;
@@ -22,27 +23,32 @@ use crate::client::{
 };
 use crate::config::paths;
 use crate::config::{
-    Agent, AppConfig, AppState, CODE_ROLE, Config, EXPLAIN_SHELL_ROLE, Input, RequestContext,
+    Agent, AppConfig, AppState, CODE_ROLE, Config, EXPLAIN_SHELL_ROLE, Input, MemoryScope,
-    SHELL_ROLE, TEMP_SESSION_NAME, WorkingMode, ensure_parent_exists, install_builtins,
+    RequestContext, SHELL_ROLE, TEMP_SESSION_NAME, WorkingMode, ensure_parent_exists,
-    list_agents, load_env_file, macro_execute, sync_models,
+    install_builtins, list_agents, load_env_file, macro_execute, sync_models,
 };
 use crate::function::supervisor::{GuardrailAction, check_pending_agents_guardrail};
 use crate::mcp::McpServersConfig;
 use crate::render::{prompt_theme, render_error};
 use crate::repl::Repl;
 use crate::utils::*;
-use crate::vault::Vault;
+use crate::vault::{Vault, interpolate_secrets};
-use anyhow::{Result, anyhow, bail};
+use anyhow::{Context, Result, anyhow, bail};
 use clap::{CommandFactory, Parser};
 use clap_complete::CompleteEnv;
 use client::ClientConfig;
 use inquire::{Select, Text, set_global_render_config};
-use log::LevelFilter;
+use log::{LevelFilter, warn};
 use log4rs::append::console::ConsoleAppender;
-use log4rs::append::file::FileAppender;
+use log4rs::append::rolling_file::RollingFileAppender;
 use log4rs::append::rolling_file::policy::compound::CompoundPolicy;
 use log4rs::append::rolling_file::policy::compound::roll::fixed_window::FixedWindowRoller;
 use log4rs::append::rolling_file::policy::compound::trigger::size::SizeTrigger;
 use log4rs::config::{Appender, Logger, Root};
 use log4rs::encode::pattern::PatternEncoder;
 use oauth::OAuthProvider;
 use std::path::PathBuf;
-use std::{env, process, sync::Arc};
+use std::{env, fs, process, sync::Arc};
 #[tokio::main]
 async fn main() -> Result<()> {
@@ -55,6 +61,7 @@ async fn main() -> Result<()> {
        shell.generate_completions(&mut cmd);
        return Ok(());
    }
    if cli.tail_logs {
        tail_logs(cli.disable_log_colors).await;
        return Ok(());
@@ -91,6 +98,10 @@ async fn main() -> Result<()> {
            .await?;
    }
    if let Some(name) = &cli.sandbox {
        return sandbox::launch(name.clone(), cli.fresh, cli.no_mixins);
    }
    install_builtins()?;
    if let Some(category) = cli.install {
@@ -110,6 +121,49 @@ async fn main() -> Result<()> {
        return Ok(());
    }
    if let Some(server_name) = &cli.auth_mcp {
        let cfg = Config::load_with_interpolation(true).await?;
        let app_config = AppConfig::from_config(cfg)?;
        let vault = Vault::init(&app_config)?;
        let mcp_path = paths::mcp_config_file();
        if !mcp_path.exists() {
            bail!(
                "No MCP configuration file found at '{}'",
                mcp_path.display()
            );
        }
        let raw = tokio::fs::read_to_string(&mcp_path)
            .await
            .with_context(|| format!("Failed to read MCP config at '{}'", mcp_path.display()))?;
        let (content, missing) = interpolate_secrets(&raw, &vault)?;
        if !missing.is_empty() {
            bail!(
                "MCP config references vault secrets that are missing: {:?}",
                missing
            );
        }
        let mcp_config: McpServersConfig =
            serde_json::from_str(&content).context("Failed to parse MCP config file")?;
        let spec = mcp_config
            .mcp_servers
            .get(server_name.as_str())
            .ok_or_else(|| anyhow!("MCP server '{server_name}' not found in mcp.json"))?;
        if !spec.is_remote() {
            bail!(
                "MCP server '{server_name}' is a stdio server; OAuth is only supported for http/sse servers"
            );
        }
        let url = spec.url.as_deref().expect("validated: remote spec has url");
        mcp::oauth::run_mcp_oauth_flow(server_name, url, spec.oauth_client_id.as_deref()).await?;
        println!("Authentication saved. '{server_name}' is now available for use.");
        return Ok(());
    }
    if vault_flags {
        let cfg = Config::load_with_interpolation(true).await?;
        let app_config = AppConfig::from_config(cfg)?;
@@ -130,7 +184,10 @@ async fn main() -> Result<()> {
        )
        .await?,
    );
-    let ctx = RequestContext::bootstrap(app_state, working_mode, info_flag)?;
+    let mut ctx = RequestContext::bootstrap(app_state, working_mode, info_flag)?;
    let app_config = Arc::clone(&ctx.app.config);
    ctx.bootstrap_tools(&app_config, start_mcp_servers, abort_signal.clone())
        .await?;
    {
        let app = &*ctx.app.config;
@@ -292,12 +349,40 @@ async fn run(
    if cli.no_stream {
        update_app_config(&mut ctx, |app| app.stream = false);
    }
    if cli.no_memory {
        update_app_config(&mut ctx, |app| app.memory = Some(false));
    }
    if cli.empty_session {
        ctx.empty_session()?;
    }
    if cli.save_session {
        ctx.set_save_session_this_time()?;
    }
    if let Some(scope) = cli.init_memory {
        let (path, content) = match scope {
            MemoryScope::Global => (
                paths::global_memory_index_path(),
                "# Global Memory\n\n<!-- Universal facts about you go here. The LLM uses this as always-on context. -->\n<!-- Drill files (when created) are listed below. -->\n",
            ),
            MemoryScope::Workspace => (
                env::current_dir()?.join("COYOTE.md"),
                "# Workspace Memory\n\n<!-- Facts about this project go here. The LLM uses this as always-on context. -->\n",
            ),
        };
        if path.exists() {
            eprintln!("Memory marker already exists at '{}'.", path.display());
            return Ok(());
        }
        if let Some(parent) = path.parent() {
            fs::create_dir_all(parent)?;
        }
        fs::write(&path, content)?;
        println!("✓ Created memory marker at '{}'.", path.display());
        return Ok(());
    }
    if cli.info {
        let app: Arc<AppConfig> = Arc::clone(&ctx.app.config);
        let info = ctx.info(app.as_ref())?;
@@ -391,6 +476,21 @@ async fn start_directive(
            abort_signal,
        )
        .await?;
    } else {
        match check_pending_agents_guardrail(ctx) {
            GuardrailAction::Inject(prompt) => {
                let guardrail_input = Input::from_str(ctx, &prompt, None)?;
                return start_directive(ctx, guardrail_input, code_mode, abort_signal).await;
            }
            GuardrailAction::ForceTerminate(ids) => {
                warn!(
                    "Pending-agent guardrail force-cancelled {} agent(s) after max reminders: {:?}",
                    ids.len(),
                    ids
                );
            }
            GuardrailAction::NoAction => {}
        }
    }
    ctx.exit_session()?;
@@ -532,7 +632,20 @@ fn setup_logger() -> Result<Option<PathBuf>> {
        }
        Some(path) => {
            ensure_parent_exists(&path)?;
-            let file_appender = FileAppender::builder().encoder(encoder.clone()).build(path);
+
            let archive_pattern = path
                .with_extension("archived.{}.log")
                .to_string_lossy()
                .into_owned();
            let trigger = SizeTrigger::new(10 * 1024 * 1024);
            let roller = FixedWindowRoller::builder()
                .build(&archive_pattern, 5)
                .unwrap();
            let policy = CompoundPolicy::new(Box::new(trigger), Box::new(roller));
            let file_appender = RollingFileAppender::builder()
                .encoder(encoder.clone())
                .build(path, Box::new(policy));
            match file_appender {
                Ok(appender) => {
@@ -555,7 +668,7 @@ fn setup_logger() -> Result<Option<PathBuf>> {
 fn init_file_logger(
    log_level: LevelFilter,
    log_filter: Option<String>,
-    file_appender: FileAppender,
+    file_appender: RollingFileAppender,
 ) -> log4rs::Config {
    let root_log_level = if log_filter.is_some() {
        LevelFilter::Off
@@ -1,3 +1,4 @@
 pub(crate) mod oauth;
 mod sse_transport;
 use crate::config::AppConfig;
@@ -73,6 +74,8 @@ pub(crate) struct McpServer {
    pub url: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub headers: Option<IndexMap<String, String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub oauth_client_id: Option<String>,
 }
 impl McpServer {
@@ -107,10 +110,10 @@ impl McpServer {
                    "MCP server '{name}' is missing a \"command\" field (required for stdio transport)"
                ));
            }
-            if self.url.is_some() || self.headers.is_some() {
+            if self.url.is_some() || self.headers.is_some() || self.oauth_client_id.is_some() {
                return Err(anyhow!(
                    "MCP server '{name}' has type \"stdio\" but also specifies remote fields \
-                     (url/headers). Remove the remote fields or change the type to \"http\" or \"sse\"."
+                     (url/headers/oauth_client_id). Remove the remote fields or change the type to \"http\" or \"sse\"."
                ));
            }
        }
@@ -237,7 +240,7 @@ impl McpRegistry {
        debug!("Starting selected MCP servers: {:?}", ids_to_start);
-        let results: Vec<(String, Arc<_>, ServerCatalog)> = stream::iter(
+        let results: Vec<Option<(String, Arc<ConnectedServer>, ServerCatalog)>> = stream::iter(
            ids_to_start
                .into_iter()
                .map(|id| async { self.start_server(id).await }),
@@ -246,7 +249,7 @@ impl McpRegistry {
        .try_collect()
        .await?;
-        for (id, server, catalog) in results {
+        for (id, server, catalog) in results.into_iter().flatten() {
            self.servers.insert(id.clone(), server);
            self.catalogs.insert(id, catalog);
        }
@@ -257,14 +260,30 @@ impl McpRegistry {
    async fn start_server(
        &self,
        id: String,
-    ) -> Result<(String, Arc<ConnectedServer>, ServerCatalog)> {
+    ) -> Result<Option<(String, Arc<ConnectedServer>, ServerCatalog)>> {
        let spec = self
            .config
            .as_ref()
            .and_then(|c| c.mcp_servers.get(&id))
            .with_context(|| format!("MCP server not found in config: {id}"))?;
-        let service = spawn_mcp_server(spec, self.log_path.as_deref()).await?;
+        let bearer_token = if spec.is_remote() {
            oauth::load_valid_mcp_token(&id)
        } else {
            None
        };
        let service = match spawn_mcp_server(spec, self.log_path.as_deref(), bearer_token).await {
            Ok(s) => s,
            Err(e) if is_auth_required_error(&e) => {
                warn!(
                    "MCP server '{id}' requires OAuth authentication. \
                     Run `.mcp auth {id}` in the REPL to authenticate, then restart Coyote."
                );
                return Ok(None);
            }
            Err(e) => return Err(e),
        };
        let tools = service.list_tools(None).await?;
        debug!("Available tools for MCP server {id}: {tools:?}");
@@ -289,7 +308,7 @@ impl McpRegistry {
        info!("Started MCP server: {id}");
-        Ok((id.to_string(), service, catalog))
+        Ok(Some((id.to_string(), service, catalog)))
    }
    fn resolve_server_ids(&self, enabled_mcp_servers: Option<Vec<String>>) -> Vec<String> {
@@ -337,15 +356,18 @@ impl McpRegistry {
 pub(crate) async fn spawn_mcp_server(
    spec: &McpServer,
    log_path: Option<&Path>,
    bearer_token: Option<String>,
 ) -> Result<Arc<ConnectedServer>> {
    match spec.transport_type {
        McpTransportType::Http => {
            let url = spec.url.as_deref().expect("validated: http spec has url");
-            spawn_http_mcp_server(url, spec.headers.as_ref()).await
+            let headers = merge_bearer_token(spec.headers.as_ref(), bearer_token);
            spawn_http_mcp_server(url, headers.as_ref()).await
        }
        McpTransportType::Sse => {
            let url = spec.url.as_deref().expect("validated: sse spec has url");
-            spawn_sse_mcp_server(url, spec.headers.as_ref()).await
+            let headers = merge_bearer_token(spec.headers.as_ref(), bearer_token);
            spawn_sse_mcp_server(url, headers.as_ref()).await
        }
        McpTransportType::Stdio => {
            let command = spec
@@ -357,6 +379,30 @@ pub(crate) async fn spawn_mcp_server(
    }
 }
 fn merge_bearer_token(
    headers: Option<&IndexMap<String, String>>,
    bearer_token: Option<String>,
 ) -> Option<IndexMap<String, String>> {
    match (headers, bearer_token) {
        (None, None) => None,
        (Some(h), None) => Some(h.clone()),
        (None, Some(token)) => {
            let mut m = IndexMap::new();
            m.insert("Authorization".to_string(), format!("Bearer {token}"));
            Some(m)
        }
        (Some(h), Some(token)) => {
            let mut m = h.clone();
            m.insert("Authorization".to_string(), format!("Bearer {token}"));
            Some(m)
        }
    }
 }
 fn is_auth_required_error(e: &anyhow::Error) -> bool {
    e.to_string().contains("Auth required")
 }
 async fn spawn_http_mcp_server(
    url: &str,
    headers: Option<&IndexMap<String, String>>,
@@ -433,8 +479,12 @@ async fn spawn_stdio_mcp_server(
        let log_file = OpenOptions::new()
            .create(true)
            .append(true)
-            .open(log_path)?;
+            .open(log_path)
-        let (transport, _) = TokioChildProcess::builder(cmd).stderr(log_file).spawn()?;
+            .with_context(|| format!("Failed to open MCP log file at '{}'", log_path.display()))?;
        let (transport, _) = TokioChildProcess::builder(cmd)
            .stderr(log_file)
            .spawn()
            .with_context(|| format!("Failed to spawn MCP server: {command}"))?;
        transport
    } else {
        TokioChildProcess::new(cmd)?
@@ -461,6 +511,7 @@ mod tests {
            cwd: None,
            url: None,
            headers: None,
            oauth_client_id: None,
        }
    }
@@ -473,6 +524,7 @@ mod tests {
            cwd: None,
            url: Some(url.to_string()),
            headers: None,
            oauth_client_id: None,
        }
    }
@@ -485,6 +537,7 @@ mod tests {
            cwd: None,
            url: Some(url.to_string()),
            headers: None,
            oauth_client_id: None,
        }
    }
@@ -502,6 +555,7 @@ mod tests {
    #[test]
    fn validate_stdio_with_command_succeeds() {
        let spec = stdio_server("npx");
        assert!(spec.validate("test").is_ok());
    }
@@ -515,8 +569,11 @@ mod tests {
            cwd: None,
            url: None,
            headers: None,
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("missing a \"command\" field"));
    }
@@ -530,8 +587,11 @@ mod tests {
            cwd: None,
            url: Some("http://localhost".into()),
            headers: None,
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("remote fields"));
    }
@@ -547,14 +607,18 @@ mod tests {
            cwd: None,
            url: None,
            headers: Some(headers),
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("remote fields"));
    }
    #[test]
    fn validate_http_with_url_succeeds() {
        let spec = http_server("http://localhost:8080");
        assert!(spec.validate("test").is_ok());
    }
@@ -568,8 +632,11 @@ mod tests {
            cwd: None,
            url: None,
            headers: None,
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("missing a \"url\" field"));
    }
@@ -583,8 +650,11 @@ mod tests {
            cwd: None,
            url: Some("http://localhost".into()),
            headers: None,
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("stdio fields"));
    }
@@ -598,8 +668,11 @@ mod tests {
            cwd: None,
            url: Some("http://localhost".into()),
            headers: None,
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("stdio fields"));
    }
@@ -613,14 +686,18 @@ mod tests {
            cwd: Some("/tmp".into()),
            url: Some("http://localhost".into()),
            headers: None,
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("stdio fields"));
    }
    #[test]
    fn validate_sse_with_url_succeeds() {
        let spec = sse_server("http://sse.example.com");
        assert!(spec.validate("test").is_ok());
    }
@@ -634,8 +711,11 @@ mod tests {
            cwd: None,
            url: None,
            headers: None,
            oauth_client_id: None,
        };
        let err = spec.validate("test").unwrap_err();
        assert!(err.to_string().contains("missing a \"url\" field"));
    }
@@ -661,9 +741,13 @@ mod tests {
                }
            }
        }"#;
        let config: McpServersConfig = serde_json::from_str(json).unwrap();
        assert!(config.mcp_servers.contains_key("my-server"));
        let spec = &config.mcp_servers["my-server"];
        assert_eq!(spec.transport_type, McpTransportType::Stdio);
        assert_eq!(spec.command.as_deref(), Some("npx"));
        assert_eq!(
@@ -684,7 +768,9 @@ mod tests {
            }
        }"#;
        let config: McpServersConfig = serde_json::from_str(json).unwrap();
        let spec = &config.mcp_servers["remote"];
        assert_eq!(spec.transport_type, McpTransportType::Http);
        assert_eq!(spec.url.as_deref(), Some("http://localhost:8080/mcp"));
        assert_eq!(
@@ -709,7 +795,9 @@ mod tests {
            }
        }"#;
        let config: McpServersConfig = serde_json::from_str(json).unwrap();
        let env = config.mcp_servers["s"].env.as_ref().unwrap();
        assert!(matches!(env["STR_VAR"], JsonField::Str(ref s) if s == "hello"));
        assert!(matches!(env["BOOL_VAR"], JsonField::Bool(true)));
        assert!(matches!(env["INT_VAR"], JsonField::Int(42)));
@@ -723,7 +811,9 @@ mod tests {
                "remote-api": { "type": "http", "url": "http://api.example.com" }
            }
        }"#;
        let config: McpServersConfig = serde_json::from_str(json).unwrap();
        assert_eq!(config.mcp_servers.len(), 2);
        assert!(config.mcp_servers.contains_key("github"));
        assert!(config.mcp_servers.contains_key("remote-api"));
@@ -732,7 +822,9 @@ mod tests {
    #[test]
    fn deserialize_empty_servers_map() {
        let json = r#"{ "mcpServers": {} }"#;
        let config: McpServersConfig = serde_json::from_str(json).unwrap();
        assert!(config.mcp_servers.is_empty());
    }
@@ -747,77 +839,96 @@ mod tests {
                }
            }
        }"#;
        let config: McpServersConfig = serde_json::from_str(json).unwrap();
        assert_eq!(config.mcp_servers["s"].cwd.as_deref(), Some("/tmp/work"));
    }
    #[test]
    fn resolve_all_returns_all_configured_servers() {
        let registry = make_registry_with_config(&["github", "slack", "jira"]);
        let mut ids = registry.resolve_server_ids(Some(vec!["all".to_string()]));
        ids.sort();
        assert_eq!(ids, vec!["github", "jira", "slack"]);
    }
    #[test]
    fn resolve_comma_separated_returns_matching_servers() {
        let registry = make_registry_with_config(&["github", "slack", "jira"]);
        let mut ids =
            registry.resolve_server_ids(Some(vec!["github".to_string(), "jira".to_string()]));
        ids.sort();
        assert_eq!(ids, vec!["github", "jira"]);
    }
    #[test]
    fn resolve_single_server_name() {
        let registry = make_registry_with_config(&["github", "slack"]);
        let ids = registry.resolve_server_ids(Some(vec!["slack".to_string()]));
        assert_eq!(ids, vec!["slack"]);
    }
    #[test]
    fn resolve_none_returns_empty() {
        let registry = make_registry_with_config(&["github"]);
        let ids = registry.resolve_server_ids(None);
        assert!(ids.is_empty());
    }
    #[test]
    fn resolve_no_config_returns_empty() {
        let registry = McpRegistry::default();
        let ids = registry.resolve_server_ids(Some(vec!["all".to_string()]));
        assert!(ids.is_empty());
    }
    #[test]
    fn resolve_nonexistent_server_filtered_out() {
        let registry = make_registry_with_config(&["github"]);
        let ids = registry
            .resolve_server_ids(Some(vec!["github".to_string(), "nonexistent".to_string()]));
        assert_eq!(ids, vec!["github"]);
    }
    #[test]
    fn resolve_all_nonexistent_returns_empty() {
        let registry = make_registry_with_config(&["github"]);
        let ids = registry.resolve_server_ids(Some(vec!["foo".to_string(), "bar".to_string()]));
        assert!(ids.is_empty());
    }
    #[test]
    fn resolve_trims_whitespace() {
        let registry = make_registry_with_config(&["github", "slack"]);
        let mut ids = registry.resolve_server_ids(Some(vec![
            "  github  ".to_string(),
            "  slack  ".to_string(),
        ]));
        ids.sort();
        assert_eq!(ids, vec!["github", "slack"]);
    }
    #[test]
    fn registry_default_is_empty() {
        let registry = McpRegistry::default();
        assert!(registry.is_empty());
        assert!(registry.list_started_servers().is_empty());
        assert!(registry.mcp_config().is_none());
@@ -827,6 +938,7 @@ mod tests {
    #[test]
    fn registry_with_config_reports_config() {
        let registry = make_registry_with_config(&["github"]);
        assert!(registry.mcp_config().is_some());
        assert!(
            registry
@@ -843,4 +955,53 @@ mod tests {
        assert_eq!(MCP_SEARCH_META_FUNCTION_NAME_PREFIX, "mcp_search");
        assert_eq!(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX, "mcp_describe");
    }
    #[test]
    fn merge_bearer_token_both_none_returns_none() {
        assert!(merge_bearer_token(None, None).is_none());
    }
    #[test]
    fn merge_bearer_token_headers_only_passes_through() {
        let mut h = IndexMap::new();
        h.insert("X-Key".to_string(), "val".to_string());
        let result = merge_bearer_token(Some(&h), None).unwrap();
        assert_eq!(result["X-Key"], "val");
        assert!(!result.contains_key("Authorization"));
    }
    #[test]
    fn merge_bearer_token_token_only_injects_bearer() {
        let result = merge_bearer_token(None, Some("tok123".to_string())).unwrap();
        assert_eq!(result["Authorization"], "Bearer tok123");
    }
    #[test]
    fn merge_bearer_token_both_merges_and_overrides_authorization() {
        let mut h = IndexMap::new();
        h.insert("Authorization".to_string(), "old".to_string());
        h.insert("X-Custom".to_string(), "keep".to_string());
        let result = merge_bearer_token(Some(&h), Some("newtoken".to_string())).unwrap();
        assert_eq!(result["Authorization"], "Bearer newtoken");
        assert_eq!(result["X-Custom"], "keep");
    }
    #[test]
    fn is_auth_required_error_matches_rmcp_message() {
        let e = anyhow!("Auth required, when send initialize request");
        assert!(is_auth_required_error(&e));
    }
    #[test]
    fn is_auth_required_error_does_not_match_unrelated() {
        let e = anyhow!("Connection refused");
        assert!(!is_auth_required_error(&e));
    }
 }
@@ -0,0 +1,329 @@
 use crate::client::oauth::{OAuthProvider, TokenRequestFormat, load_oauth_tokens, run_oauth_flow};
 use crate::config::paths;
 use anyhow::{Context, Result, anyhow};
 use chrono::Utc;
 use inquire::Text;
 use log::warn;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::fs;
 use std::net::TcpListener;
 use url::Url;
 #[derive(Debug, Deserialize)]
 struct ProtectedResourceMetadata {
    #[serde(default)]
    authorization_servers: Vec<String>,
 }
 #[derive(Debug, Deserialize)]
 struct OAuthServerMetadata {
    authorization_endpoint: String,
    token_endpoint: String,
    #[serde(default)]
    scopes_supported: Vec<String>,
    registration_endpoint: Option<String>,
 }
 #[derive(Serialize, Deserialize)]
 struct McpRegistration {
    client_id: String,
 }
 struct McpOAuthProvider {
    client_id: String,
    authorize_url: String,
    token_url: String,
    scopes: String,
    fixed_redirect: String,
 }
 impl OAuthProvider for McpOAuthProvider {
    fn provider_name(&self) -> &str {
        "MCP"
    }
    fn client_id(&self) -> &str {
        &self.client_id
    }
    fn authorize_url(&self) -> &str {
        &self.authorize_url
    }
    fn token_url(&self) -> &str {
        &self.token_url
    }
    fn redirect_uri(&self) -> &str {
        ""
    }
    fn scopes(&self) -> &str {
        &self.scopes
    }
    fn token_request_format(&self) -> TokenRequestFormat {
        TokenRequestFormat::FormUrlEncoded
    }
    fn uses_localhost_redirect(&self) -> bool {
        false
    }
    fn fixed_redirect_uri(&self) -> Option<String> {
        Some(self.fixed_redirect.clone())
    }
 }
 pub async fn run_mcp_oauth_flow(
    server_name: &str,
    server_url: &str,
    configured_client_id: Option<&str>,
 ) -> Result<()> {
    let metadata = discover_oauth_metadata(server_url).await?;
    let listener = TcpListener::bind("127.0.0.1:0")?;
    let port = listener.local_addr()?.port();
    drop(listener);
    let redirect_uri = format!("http://127.0.0.1:{port}/callback");
    let client_id = if let Some(id) = configured_client_id {
        id.to_string()
    } else if let Some(cached) = load_registered_client_id(server_name) {
        cached
    } else if let Some(reg_endpoint) = &metadata.registration_endpoint {
        match register_client(reg_endpoint, &redirect_uri).await {
            Ok(id) => {
                let _ = save_registered_client_id(server_name, &id);
                id
            }
            Err(e) => {
                warn!("Dynamic client registration failed: {e}. Falling back to manual entry.");
                Text::new("Enter the OAuth client ID for this MCP server:")
                    .prompt()
                    .context("Failed to read client ID")?
            }
        }
    } else {
        Text::new("Enter the OAuth client ID for this MCP server:")
            .prompt()
            .context("Failed to read client ID")?
    };
    let provider = McpOAuthProvider {
        client_id,
        authorize_url: metadata.authorization_endpoint,
        token_url: metadata.token_endpoint,
        scopes: metadata.scopes_supported.join(" "),
        fixed_redirect: redirect_uri,
    };
    run_oauth_flow(&provider, &mcp_token_key(server_name)).await
 }
 pub fn load_valid_mcp_token(server_name: &str) -> Option<String> {
    let tokens = load_oauth_tokens(&mcp_token_key(server_name))?;
    if Utc::now().timestamp() < tokens.expires_at {
        Some(tokens.access_token)
    } else {
        None
    }
 }
 fn mcp_token_key(server_name: &str) -> String {
    format!("mcp_{server_name}")
 }
 fn load_registered_client_id(server_name: &str) -> Option<String> {
    let path = paths::oauth_tokens_path().join(format!("mcp_{server_name}_registration.json"));
    let content = fs::read_to_string(path).ok()?;
    let reg: McpRegistration = serde_json::from_str(&content).ok()?;
    Some(reg.client_id)
 }
 fn save_registered_client_id(server_name: &str, client_id: &str) -> Result<()> {
    let dir = paths::oauth_tokens_path();
    fs::create_dir_all(&dir)?;
    let path = dir.join(format!("mcp_{server_name}_registration.json"));
    let reg = McpRegistration {
        client_id: client_id.to_string(),
    };
    fs::write(path, serde_json::to_string_pretty(&reg)?)?;
    Ok(())
 }
 async fn register_client(endpoint: &str, redirect_uri: &str) -> Result<String> {
    let body = serde_json::json!({
        "client_name": "Coyote",
        "redirect_uris": [redirect_uri],
        "grant_types": ["authorization_code", "refresh_token"],
        "response_types": ["code"],
        "token_endpoint_auth_method": "none"
    });
    let response: serde_json::Value = Client::new()
        .post(endpoint)
        .json(&body)
        .send()
        .await
        .context("Failed to reach registration endpoint")?
        .json()
        .await
        .context("Failed to parse registration response")?;
    response["client_id"]
        .as_str()
        .ok_or_else(|| anyhow!("Missing client_id in registration response: {response}"))
        .map(|s| s.to_string())
 }
 async fn discover_oauth_metadata(server_url: &str) -> Result<OAuthServerMetadata> {
    let base = extract_base_url(server_url)?;
    let client = Client::new();
    // RFC 9728: try protected resource metadata first; it points to the auth server
    let pr_url = format!("{base}/.well-known/oauth-protected-resource");
    if let Ok(resp) = client.get(&pr_url).send().await
        && resp.status().is_success()
        && let Ok(pr) = resp.json::<ProtectedResourceMetadata>().await
        && let Some(auth_server) = pr.authorization_servers.first()
    {
        let as_url = format!("{auth_server}/.well-known/oauth-authorization-server");
        if let Ok(resp) = client.get(&as_url).send().await
            && resp.status().is_success()
            && let Ok(meta) = resp.json::<OAuthServerMetadata>().await
        {
            return Ok(meta);
        }
    }
    let as_url = format!("{base}/.well-known/oauth-authorization-server");
    let resp = client
        .get(&as_url)
        .send()
        .await
        .with_context(|| format!("Failed to reach {as_url}"))?;
    if resp.status().is_success() {
        return resp
            .json::<OAuthServerMetadata>()
            .await
            .with_context(|| format!("Failed to parse OAuth metadata from {as_url}"));
    }
    Err(anyhow!(
        "Could not discover OAuth metadata for '{server_url}'.\n\
         Tried:\n  {pr_url}\n  {as_url}\n\
         Ensure the server supports MCP OAuth discovery, or consult its documentation."
    ))
 }
 fn extract_base_url(url: &str) -> Result<String> {
    let parsed = Url::parse(url).with_context(|| format!("Invalid URL: {url}"))?;
    let scheme = parsed.scheme();
    let host = parsed
        .host_str()
        .ok_or_else(|| anyhow!("No host in URL: {url}"))?;
    let port = parsed.port().map(|p| format!(":{p}")).unwrap_or_default();
    Ok(format!("{scheme}://{host}{port}"))
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::utils::get_env_name;
    use serial_test::serial;
    use std::{
        env, fs,
        time::{self, SystemTime},
    };
    fn with_temp_cache<F: FnOnce()>(f: F) {
        let unique = SystemTime::now()
            .duration_since(time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        let root = env::temp_dir().join(format!("coyote-mcp-oauth-test-{unique}"));
        fs::create_dir_all(&root).unwrap();
        let env_key = get_env_name("cache_dir");
        let prev = env::var_os(&env_key);
        unsafe {
            env::set_var(&env_key, &root);
        }
        f();
        unsafe {
            match prev {
                Some(v) => env::set_var(&env_key, v),
                None => env::remove_var(&env_key),
            }
        }
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn extract_base_url_strips_path_and_query() {
        let result = extract_base_url("https://mcp.notion.com/mcp?foo=bar").unwrap();
        assert_eq!(result, "https://mcp.notion.com");
    }
    #[test]
    fn extract_base_url_preserves_explicit_port() {
        let result = extract_base_url("http://localhost:8080/mcp").unwrap();
        assert_eq!(result, "http://localhost:8080");
    }
    #[test]
    fn extract_base_url_standard_port_omitted() {
        let result = extract_base_url("https://example.com/mcp/v1").unwrap();
        assert_eq!(result, "https://example.com");
    }
    #[test]
    fn extract_base_url_rejects_invalid_url() {
        assert!(extract_base_url("not-a-url").is_err());
    }
    #[test]
    #[serial]
    fn registered_client_id_roundtrip() {
        with_temp_cache(|| {
            save_registered_client_id("notion", "client-xyz-123").unwrap();
            let loaded = load_registered_client_id("notion");
            assert_eq!(loaded, Some("client-xyz-123".to_string()));
        });
    }
    #[test]
    #[serial]
    fn load_registered_client_id_returns_none_for_missing() {
        with_temp_cache(|| {
            let loaded = load_registered_client_id("no-such-server");
            assert!(loaded.is_none());
        });
    }
    #[test]
    #[serial]
    fn registered_client_id_second_save_overwrites_first() {
        with_temp_cache(|| {
            save_registered_client_id("github", "first-id").unwrap();
            save_registered_client_id("github", "second-id").unwrap();
            let loaded = load_registered_client_id("github");
            assert_eq!(loaded, Some("second-id".to_string()));
        });
    }
 }
@@ -16,8 +16,8 @@ use parking_lot::RwLock;
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 use std::{
-    collections::HashMap, env, fmt, fmt::Debug, fs, hash::Hash, path::Path, sync::Arc,
+    cmp::Ordering, collections::HashMap, env, fmt, fmt::Debug, fs, hash::Hash, path::Path,
-    time::Duration,
+    sync::Arc, time::Duration,
 };
 use tokio::time::sleep;
@@ -1196,7 +1196,7 @@ fn reciprocal_rank_fusion(
        }
    }
    let mut sorted_items: Vec<(DocumentId, f32)> = map.into_iter().collect();
-    sorted_items.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+    sorted_items.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
    sorted_items
        .into_iter()
@@ -6,18 +6,24 @@ use self::completer::ReplCompleter;
 use self::highlighter::ReplHighlighter;
 use self::prompt::ReplPrompt;
-use crate::client::{call_chat_completions, call_chat_completions_streaming, init_client, oauth};
+use crate::client::{
    Message, MessageRole, call_chat_completions, call_chat_completions_streaming, init_client,
    oauth,
 };
 use crate::config::{
    AgentVariables, AppConfig, AssertState, Input, LastMessage, RequestContext, StateFlags,
    macro_execute,
 };
 use crate::config::{AssetCategory, paths};
 use crate::function::supervisor::{GuardrailAction, check_pending_agents_guardrail};
 use crate::render::render_error;
 use crate::utils::{
-    AbortSignal, abortable_run_with_spinner, create_abort_signal, dimmed_text, set_text, temp_file,
+    AbortSignal, SHELL, abortable_run_with_spinner, create_abort_signal, dimmed_text, run_command,
    set_text, temp_file,
 };
-use crate::{config, graph, resolve_oauth_client};
+use crate::sandbox::SANDBOX_ENV_FLAG;
 use crate::{config, graph, mcp, resolve_oauth_client};
 use anyhow::{Context, Result, bail};
 use crossterm::cursor::SetCursorStyle;
 use fancy_regex::Regex;
@@ -26,9 +32,9 @@ use log::warn;
 use parking_lot::RwLock;
 use reedline::CursorConfig;
 use reedline::{
-    ColumnarMenu, EditCommand, EditMode, Emacs, KeyCode, KeyModifiers, Keybindings, Reedline,
+    ColumnarMenu, EditCommand, EditMode, Emacs, FileBackedHistory, KeyCode, KeyModifiers,
-    ReedlineEvent, ReedlineMenu, ValidationResult, Validator, Vi, default_emacs_keybindings,
+    Keybindings, Reedline, ReedlineEvent, ReedlineMenu, ValidationResult, Validator, Vi,
-    default_vi_insert_keybindings, default_vi_normal_keybindings,
+    default_emacs_keybindings, default_vi_insert_keybindings, default_vi_normal_keybindings,
 };
 use reedline::{MenuBuilder, Signal};
 use std::sync::LazyLock;
@@ -46,15 +52,25 @@ pub const DEFAULT_CONTINUATION_PROMPT: &str = indoc! {"
    4. Continue with the next pending item now. Call tools immediately."
 };
-static REPL_COMMANDS: LazyLock<[ReplCommand; 44]> = LazyLock::new(|| {
+static REPL_COMMANDS: LazyLock<[ReplCommand; 50]> = LazyLock::new(|| {
    [
        ReplCommand::new(".help", "Show this help guide", AssertState::pass()),
        ReplCommand::new(".info", "Show system info", AssertState::pass()),
        ReplCommand::new(
            ".info tools",
            "Show the list of enabled tools to be passed to the LLM",
            AssertState::True(StateFlags::FUNCTION_CALLING),
        ),
        ReplCommand::new(
            ".authenticate",
            "Authenticate the current model client via OAuth (if configured)",
            AssertState::pass(),
        ),
        ReplCommand::new(
            ".mcp auth",
            "Authenticate with an MCP server via OAuth",
            AssertState::pass(),
        ),
        ReplCommand::new(
            ".edit config",
            "Modify configuration file",
@@ -160,6 +176,11 @@ static REPL_COMMANDS: LazyLock<[ReplCommand; 44]> = LazyLock::new(|| {
            "Clear the todo list and stop auto-continuation",
            AssertState::pass(),
        ),
        ReplCommand::new(
            ".info todo",
            "Show the current todo list driving auto-continuation",
            AssertState::True(StateFlags::AUTO_CONTINUE),
        ),
        ReplCommand::new(
            ".rag",
            "Initialize or access RAG",
@@ -193,13 +214,28 @@ static REPL_COMMANDS: LazyLock<[ReplCommand; 44]> = LazyLock::new(|| {
        ReplCommand::new(".macro", "Execute a macro", AssertState::pass()),
        ReplCommand::new(
            ".skill",
-            "List, load, unload, or create skills",
+            "Create a new skill",
-            AssertState::pass(),
+            AssertState::True(StateFlags::SKILLS_ENABLED),
        ),
        ReplCommand::new(
            ".skill load",
            "Load a skill into the current context",
            AssertState::True(StateFlags::SKILLS_ENABLED),
        ),
        ReplCommand::new(
            ".skill loaded",
            "List currently-loaded skills",
            AssertState::True(StateFlags::SKILLS_ENABLED),
        ),
        ReplCommand::new(
            ".skill unload",
            "Unload a skill from the current context",
            AssertState::True(StateFlags::SKILLS_ENABLED),
        ),
        ReplCommand::new(
            ".edit skill",
            "Modify an existing skill by name",
-            AssertState::pass(),
+            AssertState::True(StateFlags::SKILLS_ENABLED),
        ),
        ReplCommand::new(
            ".file",
@@ -277,7 +313,64 @@ Type ".help" for additional help.
 "#,
                env!("CARGO_CRATE_NAME"),
                env!("CARGO_PKG_VERSION"),
-            )
+            );
            if env::var_os(SANDBOX_ENV_FLAG).is_some() {
                eprintln!(
                    "Sandbox mode is enabled. All changes made to the Coyote config will not persist to the host machine."
                );
            }
        }
        {
            let (messages_snapshot, compressed_count) = {
                let ctx = self.ctx.read();
                if let Some(session) = &ctx.session {
                    let msgs: Vec<Message> = session
                        .messages()
                        .iter()
                        .filter(|m| !m.role.is_system())
                        .cloned()
                        .collect();
                    let compressed = session.compressed_messages().len();
                    (msgs, compressed)
                } else {
                    (vec![], 0)
                }
            };
            if !messages_snapshot.is_empty() || compressed_count > 0 {
                let app = Arc::clone(&self.ctx.read().app.config);
                if compressed_count > 0 {
                    println!(
                        "{}",
                        dimmed_text(&format!(
                            "({compressed_count} earlier messages not shown; compressed for context)"
                        ))
                    );
                    println!();
                }
                for message in &messages_snapshot {
                    match message.role {
                        MessageRole::User => {
                            if let Some(text) = message.content.as_text() {
                                println!("{}", dimmed_text("You:"));
                                println!("{text}");
                                println!();
                            }
                        }
                        MessageRole::Assistant => {
                            if let Some(text) = message.content.as_text() {
                                app.print_markdown(text)?;
                                println!();
                            }
                        }
                        _ => {}
                    }
                }
                println!("{}", dimmed_text("─── ↑ previous conversation ↑ ───"));
                println!();
            }
        }
        loop {
@@ -306,6 +399,9 @@ Type ".help" for additional help.
                }
                Ok(Signal::CtrlC) => {
                    self.abort_signal.set_ctrlc();
                    if let Some(supervisor) = self.ctx.read().supervisor.clone() {
                        supervisor.read().cancel_recursive();
                    }
                    println!("(To exit, press Ctrl+D or enter \".exit\")\n");
                }
                Ok(Signal::CtrlD) => {
@@ -315,6 +411,11 @@ Type ".help" for additional help.
                _ => {}
            }
        }
        if let Some(supervisor) = self.ctx.read().supervisor.clone() {
            supervisor.read().cancel_recursive();
        }
        self.ctx.write().exit_session()?;
        Ok(())
    }
@@ -347,6 +448,14 @@ Type ".help" for additional help.
            editor = editor.with_buffer_editor(command, temp_file);
        }
        if app.save_shell_history {
            let ctx = ctx.read();
            let history_path = paths::repl_history_file(&ctx.session);
            if let Ok(history) = FileBackedHistory::with_file(1000, history_path) {
                editor = editor.with_history(Box::new(history));
            }
        }
        Ok(editor)
    }
@@ -435,6 +544,7 @@ pub async fn run_repl_command(
    abort_signal: AbortSignal,
    mut line: &str,
 ) -> Result<bool> {
    ctx.pending_agents_guardrail_count = 0;
    if let Ok(Some(captures)) = MULTILINE_RE.captures(line)
        && let Some(text_match) = captures.get(1)
    {
@@ -463,6 +573,14 @@ pub async fn run_repl_command(
                    let info = ctx.agent_info()?;
                    print!("{info}");
                }
                Some("tools") => {
                    let info = ctx.tools_info()?;
                    print!("{info}");
                }
                Some("todo") => {
                    let info = ctx.todo_info()?;
                    print!("{info}");
                }
                Some(_) => unknown_command()?,
                None => {
                    let app = Arc::clone(&ctx.app.config);
@@ -491,6 +609,53 @@ pub async fn run_repl_command(
                let (client_name, provider) = resolve_oauth_client(Some(client.name()), &clients)?;
                oauth::run_oauth_flow(&*provider, &client_name).await?;
            }
            ".mcp" => match args {
                Some(args) => {
                    let mut parts = args.splitn(2, char::is_whitespace);
                    let sub = parts.next().unwrap_or("").trim();
                    let rest = parts.next().map(str::trim).unwrap_or("");
                    match sub {
                        "auth" => {
                            if rest.is_empty() {
                                println!("Usage: .mcp auth <server_name>");
                            } else {
                                let server_name = rest;
                                let server_spec = ctx
                                    .app
                                    .mcp_config
                                    .as_ref()
                                    .and_then(|c| c.mcp_servers.get(server_name))
                                    .cloned();
                                match server_spec {
                                    None => {
                                        bail!("MCP server '{}' not found in mcp.json.", server_name)
                                    }
                                    Some(spec) if !spec.is_remote() => bail!(
                                        "MCP server '{}' uses stdio transport; \
                                         OAuth is only supported for http/sse servers.",
                                        server_name
                                    ),
                                    Some(spec) => {
                                        let url = spec
                                            .url
                                            .as_deref()
                                            .expect("validated: remote spec has url");
                                        let client_id = spec.oauth_client_id.as_deref();
                                        mcp::oauth::run_mcp_oauth_flow(server_name, url, client_id)
                                            .await?;
                                        println!(
                                            "Authentication saved. \
                                             Restart Coyote to connect to '{server_name}'."
                                        );
                                    }
                                }
                            }
                        }
                        _ => unknown_command()?,
                    }
                }
                None => println!("Usage: .mcp auth <server_name>"),
            },
            ".prompt" => match args {
                Some(text) => {
                    let app = Arc::clone(&ctx.app.config);
@@ -582,6 +747,46 @@ pub async fn run_repl_command(
                        session.set_autonaming(false);
                    }
                }
                if let Some(session) = &ctx.session {
                    let messages_snapshot: Vec<Message> = session
                        .messages()
                        .iter()
                        .filter(|m| !m.role.is_system())
                        .cloned()
                        .collect();
                    let compressed_count = session.compressed_messages().len();
                    if !messages_snapshot.is_empty() || compressed_count > 0 {
                        if compressed_count > 0 {
                            println!(
                                "{}",
                                dimmed_text(&format!(
                                    "({compressed_count} earlier messages not shown — compressed for context)"
                                ))
                            );
                            println!();
                        }
                        for message in &messages_snapshot {
                            match message.role {
                                MessageRole::User => {
                                    if let Some(text) = message.content.as_text() {
                                        println!("{}", dimmed_text("You:"));
                                        println!("{text}");
                                        println!();
                                    }
                                }
                                MessageRole::Assistant => {
                                    if let Some(text) = message.content.as_text() {
                                        app.print_markdown(text)?;
                                        println!();
                                    }
                                }
                                _ => {}
                            }
                        }
                        println!("{}", dimmed_text("─── ↑ previous conversation ↑ ───"));
                        println!();
                    }
                }
            }
            ".install" => {
                let trimmed = args.map(str::trim).unwrap_or("");
@@ -945,9 +1150,13 @@ pub async fn run_repl_command(
            _ => unknown_command()?,
        },
        None => {
-            reset_continuation(ctx);
+            if let Some(cmd) = try_extract_shell_command(line) {
-            let input = Input::from_str(ctx, line, None)?;
+                handle_shell_passthrough(cmd)?;
-            ask(ctx, abort_signal.clone(), input, true).await?;
+            } else {
                reset_continuation(ctx);
                let input = Input::from_str(ctx, line, None)?;
                ask(ctx, abort_signal.clone(), input, true).await?;
            }
        }
    }
@@ -1011,6 +1220,20 @@ async fn ask(
        )
        .await
    } else {
        match check_pending_agents_guardrail(ctx) {
            GuardrailAction::Inject(prompt) => {
                let guardrail_input = Input::from_str(ctx, &prompt, None)?;
                return ask(ctx, abort_signal, guardrail_input, false).await;
            }
            GuardrailAction::ForceTerminate(ids) => {
                warn!(
                    "Pending-agent guardrail force-cancelled {} agent(s) after max reminders: {:?}",
                    ids.len(),
                    ids
                );
            }
            GuardrailAction::NoAction => {}
        }
        let do_continue = should_continue(ctx);
        if do_continue {
@@ -1149,10 +1372,12 @@ fn dump_repl_help() {
        .join("\n");
    println!(
        r###"{head}
 {:<24} Run an arbitrary shell command (stdout/stderr stream to your terminal; Ctrl+C interrupts)
 Type ::: to start multi-line editing, type ::: to finish it.
 Press Ctrl+O to open an editor for editing the input buffer.
 Press Ctrl+C to cancel the response, Ctrl+D to exit the REPL."###,
        "!<command>",
    );
 }
@@ -1168,6 +1393,25 @@ fn parse_command(line: &str) -> Option<(&str, Option<&str>)> {
    }
 }
 fn try_extract_shell_command(line: &str) -> Option<&str> {
    let rest = line.strip_prefix('!')?;
    Some(rest.trim_start())
 }
 fn handle_shell_passthrough(cmd: &str) -> Result<()> {
    if cmd.is_empty() {
        eprintln!("Usage: !<command>");
        return Ok(());
    }
    let status = run_command(&SHELL.cmd, &[&SHELL.arg, cmd], None)?;
    if status != 0 {
        eprintln!("[exit {status}]");
    }
    Ok(())
 }
 fn split_first_arg(args: Option<&str>) -> Option<(&str, Option<&str>)> {
    args.map(|v| match v.split_once(' ') {
        Some((subcmd, args)) => (subcmd, Some(args.trim())),
@@ -1326,8 +1570,8 @@ mod tests {
    }
    #[test]
-    fn repl_commands_has_44_entries() {
+    fn repl_commands_has_50_entries() {
-        assert_eq!(REPL_COMMANDS.len(), 44);
+        assert_eq!(REPL_COMMANDS.len(), 50);
    }
    #[test]
@@ -1502,6 +1746,57 @@ mod tests {
        assert_eq!(parse_command("."), Some((".", None)));
    }
    #[test]
    fn try_extract_shell_command_strips_bang() {
        assert_eq!(try_extract_shell_command("!ls"), Some("ls"));
        assert_eq!(try_extract_shell_command("!ls -la"), Some("ls -la"));
    }
    #[test]
    fn try_extract_shell_command_trims_inner_whitespace() {
        assert_eq!(try_extract_shell_command("!   echo hi"), Some("echo hi"));
        assert_eq!(try_extract_shell_command("! ls"), Some("ls"));
    }
    #[test]
    fn try_extract_shell_command_only_bang_yields_empty() {
        assert_eq!(try_extract_shell_command("!"), Some(""));
        assert_eq!(try_extract_shell_command("!   "), Some(""));
    }
    #[test]
    fn try_extract_shell_command_rejects_leading_whitespace() {
        assert!(try_extract_shell_command(" !ls").is_none());
        assert!(try_extract_shell_command("\t!ls").is_none());
    }
    #[test]
    fn try_extract_shell_command_rejects_inline_bang() {
        assert!(try_extract_shell_command("echo !foo").is_none());
        assert!(try_extract_shell_command("hello world").is_none());
    }
    #[test]
    fn try_extract_shell_command_strips_one_leading_bang() {
        assert_eq!(try_extract_shell_command("!!ls"), Some("!ls"));
    }
    #[test]
    fn try_extract_shell_command_preserves_pipes_and_redirects() {
        assert_eq!(
            try_extract_shell_command("!ls -la | grep yaml"),
            Some("ls -la | grep yaml")
        );
        assert_eq!(
            try_extract_shell_command("!cat foo.txt > /tmp/out"),
            Some("cat foo.txt > /tmp/out")
        );
        assert_eq!(
            try_extract_shell_command(r#"!echo "$HOME""#),
            Some(r#"echo "$HOME""#)
        );
    }
    #[test]
    fn split_first_arg_none_input() {
        assert!(split_first_arg(None).is_none());
@@ -0,0 +1,442 @@
 use std::env;
 use std::fs;
 use std::fs::{read_dir, read_to_string};
 use std::path::{Path, PathBuf};
 use anyhow::{Context, Result};
 use serde_yaml::Value;
 use sha2::{Digest, Sha256};
 use crate::config::paths;
 const SBX_MIXIN_FILE_NAME: &str = "sbx-mixin.yaml";
 const KIT_SPEC_FILE_NAME: &str = "spec.yaml";
 #[derive(Debug, Clone)]
 pub struct DiscoveredMixin {
    pub path: PathBuf,
    pub label: String,
    pub install_count: usize,
    pub domain_count: usize,
 }
 impl DiscoveredMixin {
    pub fn kit_path(&self) -> Result<PathBuf> {
        if self.path.is_dir() {
            return Ok(self.path.clone());
        }
        wrap_mixin_as_kit(&self.path)
    }
 }
 pub fn wrap_mixin_as_kit(mixin_path: &Path) -> Result<PathBuf> {
    let bytes = fs::read(mixin_path)
        .with_context(|| format!("Failed to read sbx mixin {}", mixin_path.display()))?;
    let mut hasher = Sha256::new();
    hasher.update(&bytes);
    let hash = format!("{:x}", hasher.finalize());
    let kit_dir = paths::sbx_mixin_kits_dir().join(&hash);
    let spec_path = kit_dir.join(KIT_SPEC_FILE_NAME);
    if let Ok(existing) = fs::read(&spec_path)
        && existing == bytes
    {
        return Ok(kit_dir);
    }
    fs::create_dir_all(&kit_dir)
        .with_context(|| format!("Failed to create mixin kit dir {}", kit_dir.display()))?;
    fs::write(&spec_path, &bytes)
        .with_context(|| format!("Failed to write {}", spec_path.display()))?;
    debug!(
        "Wrapped mixin {} as kit at {}",
        mixin_path.display(),
        kit_dir.display()
    );
    Ok(kit_dir)
 }
 pub fn discover() -> Result<Vec<DiscoveredMixin>> {
    let mut out = Vec::new();
    push_if_exists(&mut out, paths::sbx_mixin_file())?;
    push_if_exists(&mut out, paths::global_tools_sbx_mixin_file())?;
    for path in collect_subdir_mixins(&paths::functions_dir()) {
        out.push(read_mixin(path)?);
    }
    for path in collect_subdir_mixins(&paths::agents_data_dir()) {
        out.push(read_mixin(path)?);
    }
    if let Ok(cwd) = env::current_dir()
        && let Some(path) = paths::find_workspace_sbx_mixin(&cwd)
    {
        out.push(read_mixin(path)?);
    }
    Ok(out)
 }
 pub fn summarize(path: &Path) -> Result<(usize, usize)> {
    let content = read_to_string(path)
        .with_context(|| format!("Failed to read sbx mixin {}", path.display()))?;
    let value: Value = serde_yaml::from_str(&content)
        .with_context(|| format!("Failed to parse sbx mixin {}", path.display()))?;
    let installs = value
        .get("commands")
        .and_then(|c| c.get("install"))
        .and_then(|i| i.as_sequence())
        .map(|s| s.len())
        .unwrap_or(0);
    let domains = value
        .get("network")
        .and_then(|n| n.get("allowedDomains"))
        .and_then(|d| d.as_sequence())
        .map(|s| s.len())
        .unwrap_or(0);
    Ok((installs, domains))
 }
 pub fn log_discovery(mixins: &[DiscoveredMixin], disabled: bool) {
    if disabled {
        info!("Mixin discovery disabled via --no-mixins.");
        return;
    }
    if mixins.is_empty() {
        info!("No sbx mixins discovered.");
        return;
    }
    let header = format!("Applying {} sbx mixin(s):", mixins.len());
    info!("{header}");
    println!("{header}");
    for m in mixins {
        let line = format!(
            "  {}  (adds: {} install{}, {} domain{})",
            m.label,
            m.install_count,
            if m.install_count == 1 { "" } else { "s" },
            m.domain_count,
            if m.domain_count == 1 { "" } else { "s" },
        );
        info!("{line}");
        println!("{line}");
    }
 }
 fn push_if_exists(out: &mut Vec<DiscoveredMixin>, path: PathBuf) -> Result<()> {
    if path.exists() {
        out.push(read_mixin(path)?);
    }
    Ok(())
 }
 fn read_mixin(path: PathBuf) -> Result<DiscoveredMixin> {
    let label = path.display().to_string();
    let (install_count, domain_count) = summarize(&path)?;
    Ok(DiscoveredMixin {
        path,
        label,
        install_count,
        domain_count,
    })
 }
 fn collect_subdir_mixins(dir: &Path) -> Vec<PathBuf> {
    let mut result = Vec::new();
    let Ok(rd) = read_dir(dir) else { return result };
    let mut entries: Vec<_> = rd
        .flatten()
        .filter(|e| e.file_type().map(|t| t.is_dir()).unwrap_or(false))
        .collect();
    entries.sort_by_key(|e| e.file_name());
    for entry in entries {
        let candidate = entry.path().join(SBX_MIXIN_FILE_NAME);
        if candidate.exists() {
            result.push(candidate);
        }
    }
    result
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::fs;
    use std::time;
    fn unique_root(prefix: &str) -> PathBuf {
        let nanos = time::SystemTime::now()
            .duration_since(time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        let root = env::temp_dir().join(format!("coyote-{prefix}-{nanos}"));
        fs::create_dir_all(&root).unwrap();
        root
    }
    #[test]
    fn summarize_counts_installs_and_domains() {
        let root = unique_root("sbx-mixin-counts");
        let path = root.join("sbx-mixin.yaml");
        fs::write(
            &path,
            r#"
 schemaVersion: "1"
 kind: mixin
 commands:
  install:
    - command: "echo hi"
    - command: "echo bye"
 network:
  allowedDomains:
    - "a.example.com:443"
    - "b.example.com:443"
    - "c.example.com:443"
 "#,
        )
        .unwrap();
        assert_eq!(summarize(&path).unwrap(), (2, 3));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn summarize_treats_missing_blocks_as_zero() {
        let root = unique_root("sbx-mixin-empty");
        let path = root.join("sbx-mixin.yaml");
        fs::write(&path, "schemaVersion: \"1\"\nkind: mixin\n").unwrap();
        assert_eq!(summarize(&path).unwrap(), (0, 0));
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn summarize_returns_err_on_malformed_yaml() {
        let root = unique_root("sbx-mixin-bad");
        let path = root.join("sbx-mixin.yaml");
        fs::write(&path, "this: is: not: yaml: ::").unwrap();
        let err = summarize(&path).unwrap_err();
        let msg = format!("{err:#}");
        assert!(
            msg.contains(&path.display().to_string()),
            "expected error to mention path; got: {msg}"
        );
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn collect_subdir_mixins_sorts_and_skips_missing() {
        let root = unique_root("sbx-mixin-subdirs");
        for name in ["zebra", "apple", "no-mixin", "mango"] {
            let dir = root.join(name);
            fs::create_dir_all(&dir).unwrap();
            if name != "no-mixin" {
                fs::write(dir.join("sbx-mixin.yaml"), "kind: mixin\n").unwrap();
            }
        }
        let found = collect_subdir_mixins(&root);
        let names: Vec<String> = found
            .iter()
            .map(|p| {
                p.parent()
                    .unwrap()
                    .file_name()
                    .unwrap()
                    .to_string_lossy()
                    .to_string()
            })
            .collect();
        assert_eq!(names, vec!["apple", "mango", "zebra"]);
        let _ = fs::remove_dir_all(&root);
    }
    #[test]
    fn collect_subdir_mixins_returns_empty_for_missing_dir() {
        let absent = env::temp_dir().join("coyote-definitely-not-here-xyz");
        let found = collect_subdir_mixins(&absent);
        assert!(found.is_empty());
    }
    mod wrap_as_kit {
        use super::*;
        use serial_test::serial;
        use std::ffi::OsString;
        struct TestCacheDirGuard {
            key: String,
            previous: Option<OsString>,
            path: PathBuf,
        }
        impl TestCacheDirGuard {
            fn new() -> Self {
                let key = crate::utils::get_env_name("cache_dir");
                let previous = env::var_os(&key);
                let nanos = time::SystemTime::now()
                    .duration_since(time::UNIX_EPOCH)
                    .unwrap()
                    .as_nanos();
                let path = env::temp_dir().join(format!("coyote-mixin-wrap-cache-{nanos}"));
                fs::create_dir_all(&path).unwrap();
                unsafe {
                    env::set_var(&key, &path);
                }
                Self {
                    key,
                    previous,
                    path,
                }
            }
        }
        impl Drop for TestCacheDirGuard {
            fn drop(&mut self) {
                unsafe {
                    match &self.previous {
                        Some(v) => env::set_var(&self.key, v),
                        None => env::remove_var(&self.key),
                    }
                }
                let _ = fs::remove_dir_all(&self.path);
            }
        }
        fn write_mixin(name: &str, content: &str) -> PathBuf {
            let root = unique_root(&format!("wrap-src-{name}"));
            let path = root.join("sbx-mixin.yaml");
            fs::write(&path, content).unwrap();
            path
        }
        #[test]
        #[serial]
        fn wrap_mixin_as_kit_creates_spec_yaml_with_original_content() {
            let _guard = TestCacheDirGuard::new();
            let content = "schemaVersion: \"1\"\nkind: mixin\nname: probe\n";
            let mixin = write_mixin("content", content);
            let kit_dir = wrap_mixin_as_kit(&mixin).unwrap();
            let spec = kit_dir.join("spec.yaml");
            assert!(spec.exists(), "spec.yaml must exist in wrapped kit dir");
            assert_eq!(fs::read_to_string(&spec).unwrap(), content);
        }
        #[test]
        #[serial]
        fn wrap_mixin_as_kit_is_deterministic_for_identical_content() {
            let _guard = TestCacheDirGuard::new();
            let content = "schemaVersion: \"1\"\nkind: mixin\nname: probe\n";
            let mixin_one = write_mixin("dedup-1", content);
            let mixin_two = write_mixin("dedup-2", content);
            let kit_a = wrap_mixin_as_kit(&mixin_one).unwrap();
            let kit_b = wrap_mixin_as_kit(&mixin_two).unwrap();
            assert_eq!(
                kit_a, kit_b,
                "same content should share the same content-addressed kit dir"
            );
        }
        #[test]
        #[serial]
        fn wrap_mixin_as_kit_different_content_yields_different_dirs() {
            let _guard = TestCacheDirGuard::new();
            let mixin_a = write_mixin("diff-a", "kind: mixin\nname: a\n");
            let mixin_b = write_mixin("diff-b", "kind: mixin\nname: b\n");
            let kit_a = wrap_mixin_as_kit(&mixin_a).unwrap();
            let kit_b = wrap_mixin_as_kit(&mixin_b).unwrap();
            assert_ne!(
                kit_a, kit_b,
                "different content must hash to different kit dirs"
            );
        }
        #[test]
        #[serial]
        fn wrap_mixin_as_kit_is_idempotent_on_cache_hit() {
            let _guard = TestCacheDirGuard::new();
            let mixin = write_mixin("idempotent", "kind: mixin\nname: probe\n");
            let kit_first = wrap_mixin_as_kit(&mixin).unwrap();
            let spec = kit_first.join("spec.yaml");
            let mtime_first = fs::metadata(&spec).unwrap().modified().unwrap();
            std::thread::sleep(std::time::Duration::from_millis(10));
            let kit_second = wrap_mixin_as_kit(&mixin).unwrap();
            let mtime_second = fs::metadata(kit_second.join("spec.yaml"))
                .unwrap()
                .modified()
                .unwrap();
            assert_eq!(kit_first, kit_second);
            assert_eq!(
                mtime_first, mtime_second,
                "cache hit must not rewrite spec.yaml"
            );
        }
        #[test]
        #[serial]
        fn kit_path_passes_through_existing_directory() {
            let _guard = TestCacheDirGuard::new();
            let dir = unique_root("kit-path-dir-passthrough");
            let m = DiscoveredMixin {
                path: dir.clone(),
                label: "vault".into(),
                install_count: 1,
                domain_count: 1,
            };
            assert_eq!(m.kit_path().unwrap(), dir);
        }
        #[test]
        #[serial]
        fn kit_path_wraps_file_into_kit_dir() {
            let _guard = TestCacheDirGuard::new();
            let mixin = write_mixin("kit-path-wrap", "kind: mixin\nname: probe\n");
            let m = DiscoveredMixin {
                path: mixin.clone(),
                label: mixin.display().to_string(),
                install_count: 0,
                domain_count: 0,
            };
            let wrapped = m.kit_path().unwrap();
            assert!(wrapped.is_dir(), "kit_path of a file should be a directory");
            assert!(wrapped.join("spec.yaml").exists());
            assert_ne!(
                wrapped, mixin,
                "kit_path should not return the original file path"
            );
        }
    }
 }
@@ -0,0 +1,964 @@
 use anyhow::{Context, Result, anyhow, bail};
 use rust_embed::RustEmbed;
 use sha2::{Digest, Sha256};
 use std::env;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use which::which;
 mod mixins;
 use gman::providers::SupportedProvider;
 use crate::config::paths;
 use crate::sandbox::mixins::DiscoveredMixin;
 use crate::utils::run_command_with_output;
 use crate::vault::Vault;
 const SBX_BINARY: &str = "sbx";
 pub(crate) const SANDBOX_ENV_FLAG: &str = "IS_SANDBOX";
 const SANDBOX_AGENT: &str = "coyote";
 #[derive(RustEmbed)]
 #[folder = "assets/sbx-kit/"]
 struct EmbeddedKit;
 #[derive(RustEmbed)]
 #[folder = "assets/sbx-vault-mixins/"]
 struct EmbeddedVaultMixins;
 pub fn launch(name: Option<String>, fresh: bool, no_mixins: bool) -> Result<()> {
    ensure_sbx_installed()?;
    bail_if_nested()?;
    let name = resolve_name(name)?;
    let kit_path = resolve_kit_path()?;
    let discovered = if no_mixins {
        Vec::new()
    } else {
        let mut all = mixins::discover()?;
        if let Ok(vault) = Vault::init_bare()
            && let Some(vault_mixin) = extract_vault_mixin(&vault.provider)?
        {
            all.insert(0, vault_mixin);
        }
        all
    };
    if sandbox_exists(&name)? {
        info!("Re-attaching to existing sandbox '{name}'");
        if fresh {
            debug!("--fresh ignored: re-attaching to existing sandbox '{name}'");
        }
        if no_mixins {
            debug!("--no-mixins ignored: re-attaching to existing sandbox '{name}'");
        }
    } else {
        mixins::log_discovery(&discovered, no_mixins);
        if fresh {
            let msg = format!("Creating fresh sandbox '{name}' (no host config will be copied)");
            info!("{msg}");
            println!("{msg}");
            create_sandbox(&name, &kit_path, &discovered)?;
        } else {
            create_sandbox(&name, &kit_path, &discovered)?;
            copy_host_files(&name)?;
        }
    }
    exec_run(&name, &kit_path)
 }
 fn ensure_sbx_installed() -> Result<()> {
    which(SBX_BINARY).map_err(|_| {
        anyhow!(
            "`sbx` binary not found in PATH.\n\n\
             Install Docker Sandboxes:\n  https://docs.docker.com/ai/sandboxes/get-started/"
        )
    })?;
    Ok(())
 }
 fn bail_if_nested() -> Result<()> {
    if env::var_os(SANDBOX_ENV_FLAG).is_some() {
        bail!("Refusing to nest sandboxes: ${SANDBOX_ENV_FLAG} is set, already inside one");
    }
    Ok(())
 }
 fn resolve_name(name: Option<String>) -> Result<String> {
    if let Some(n) = name {
        let trimmed = n.trim();
        if !trimmed.is_empty() {
            let sanitized = sanitize_name(trimmed);
            if sanitized.is_empty() {
                bail!("Sandbox name '{trimmed}' sanitizes to an empty string");
            }
            return Ok(sanitized);
        }
    }
    let cwd = env::current_dir().context("Failed to determine current directory")?;
    let basename = cwd
        .file_name()
        .and_then(|s| s.to_str())
        .ok_or_else(|| anyhow!("Could not derive sandbox name from current directory"))?;
    let sanitized = sanitize_name(basename);
    if sanitized.is_empty() {
        bail!("Could not derive a valid sandbox name from '{basename}'; pass --sandbox <NAME>");
    }
    Ok(sanitized)
 }
 fn sanitize_name(input: &str) -> String {
    let mut out = String::with_capacity(input.len());
    let mut last_was_dash = false;
    for ch in input.chars() {
        let lower = ch.to_ascii_lowercase();
        if lower.is_ascii_alphanumeric() {
            out.push(lower);
            last_was_dash = false;
        } else if !last_was_dash {
            out.push('-');
            last_was_dash = true;
        }
    }
    out.trim_matches('-').to_string()
 }
 fn resolve_kit_path() -> Result<PathBuf> {
    if let Some(path) = paths::sandbox_kit_override() {
        if !path.exists() {
            bail!(
                "$COYOTE_SANDBOX_KIT is set but path does not exist: {}",
                path.display()
            );
        }
        debug!(
            "Using kit override from $COYOTE_SANDBOX_KIT: {}",
            path.display()
        );
        return Ok(path);
    }
    extract_embedded_kit()
 }
 fn extract_embedded_kit() -> Result<PathBuf> {
    let cache_root = paths::sbx_kit_dir();
    let new_hash = compute_kit_hash()?;
    let hash_file = paths::sbx_kit_hash_file();
    if let Ok(existing) = fs::read_to_string(&hash_file)
        && existing == new_hash
    {
        return Ok(cache_root);
    }
    if cache_root.exists() {
        fs::remove_dir_all(&cache_root)
            .with_context(|| format!("Failed to clear stale kit at {}", cache_root.display()))?;
    }
    fs::create_dir_all(&cache_root)
        .with_context(|| format!("Failed to create {}", cache_root.display()))?;
    for entry in EmbeddedKit::iter() {
        let file = EmbeddedKit::get(&entry)
            .ok_or_else(|| anyhow!("Embedded kit file missing during extraction: {entry}"))?;
        let dest = cache_root.join(entry.as_ref());
        if let Some(parent) = dest.parent() {
            fs::create_dir_all(parent)
                .with_context(|| format!("Failed to create {}", parent.display()))?;
        }
        fs::write(&dest, &file.data)
            .with_context(|| format!("Failed to write {}", dest.display()))?;
    }
    fs::write(&hash_file, &new_hash)
        .with_context(|| format!("Failed to write {}", hash_file.display()))?;
    debug!("Extracted embedded sbx-kit to {}", cache_root.display());
    Ok(cache_root)
 }
 fn compute_kit_hash() -> Result<String> {
    let mut hasher = Sha256::new();
    let mut entries: Vec<_> = EmbeddedKit::iter().collect();
    entries.sort();
    for entry in &entries {
        let file = EmbeddedKit::get(entry)
            .ok_or_else(|| anyhow!("Embedded kit file missing during hash: {entry}"))?;
        hasher.update(entry.as_bytes());
        hasher.update(b"\0");
        hasher.update(&file.data);
    }
    Ok(format!("{:x}", hasher.finalize()))
 }
 fn extract_vault_mixin(provider: &SupportedProvider) -> Result<Option<DiscoveredMixin>> {
    let provider_dir = match provider {
        SupportedProvider::Local { .. } => return Ok(None),
        SupportedProvider::AwsSecretsManager { .. } => "aws_secrets_manager",
        SupportedProvider::GcpSecretManager { .. } => "gcp_secret_manager",
        SupportedProvider::AzureKeyVault { .. } => "azure_key_vault",
        SupportedProvider::Gopass { .. } => "gopass",
        SupportedProvider::OnePassword { .. } => "one_password",
    };
    let cache_root = extract_vault_mixins_cache()?;
    let provider_root = cache_root.join(provider_dir);
    let spec_path = provider_root.join("spec.yaml");
    if !spec_path.exists() {
        bail!(
            "Embedded vault mixin for '{provider_dir}' is missing spec.yaml at {}",
            spec_path.display()
        );
    }
    let label = format!("<built-in: vault-{provider_dir}>");
    let (install_count, domain_count) = mixins::summarize(&spec_path)?;
    Ok(Some(DiscoveredMixin {
        path: provider_root,
        label,
        install_count,
        domain_count,
    }))
 }
 fn extract_vault_mixins_cache() -> Result<PathBuf> {
    let cache_root = paths::sbx_vault_mixins_dir();
    let new_hash = compute_vault_mixins_hash()?;
    let hash_file = paths::sbx_vault_mixins_hash_file();
    if let Ok(existing) = fs::read_to_string(&hash_file)
        && existing == new_hash
    {
        return Ok(cache_root);
    }
    if cache_root.exists() {
        fs::remove_dir_all(&cache_root).with_context(|| {
            format!(
                "Failed to clear stale vault mixins at {}",
                cache_root.display()
            )
        })?;
    }
    fs::create_dir_all(&cache_root)
        .with_context(|| format!("Failed to create {}", cache_root.display()))?;
    for entry in EmbeddedVaultMixins::iter() {
        let file = EmbeddedVaultMixins::get(&entry).ok_or_else(|| {
            anyhow!("Embedded vault mixin file missing during extraction: {entry}")
        })?;
        let dest = cache_root.join(entry.as_ref());
        if let Some(parent) = dest.parent() {
            fs::create_dir_all(parent)
                .with_context(|| format!("Failed to create {}", parent.display()))?;
        }
        fs::write(&dest, &file.data)
            .with_context(|| format!("Failed to write {}", dest.display()))?;
    }
    fs::write(&hash_file, &new_hash)
        .with_context(|| format!("Failed to write {}", hash_file.display()))?;
    debug!(
        "Extracted embedded sbx-vault-mixins to {}",
        cache_root.display()
    );
    Ok(cache_root)
 }
 fn compute_vault_mixins_hash() -> Result<String> {
    let mut hasher = Sha256::new();
    let mut entries: Vec<_> = EmbeddedVaultMixins::iter().collect();
    entries.sort();
    for entry in &entries {
        let file = EmbeddedVaultMixins::get(entry)
            .ok_or_else(|| anyhow!("Embedded vault mixin file missing during hash: {entry}"))?;
        hasher.update(entry.as_bytes());
        hasher.update(b"\0");
        hasher.update(&file.data);
    }
    Ok(format!("{:x}", hasher.finalize()))
 }
 fn sandbox_exists(name: &str) -> Result<bool> {
    let (success, stdout, stderr) =
        run_command_with_output(SBX_BINARY, &["ls"], None).context("Failed to run `sbx ls`")?;
    if !success {
        bail!("`sbx ls` failed: {stderr}");
    }
    Ok(stdout
        .lines()
        .skip(1)
        .any(|line| line.split_whitespace().next() == Some(name)))
 }
 fn create_sandbox(name: &str, kit_path: &Path, mixins: &[DiscoveredMixin]) -> Result<()> {
    info!("Creating sandbox '{name}'");
    let args = build_create_args(name, kit_path, mixins)?;
    debug!("sbx {}", args.join(" "));
    let status = Command::new(SBX_BINARY)
        .args(&args)
        .stdin(Stdio::inherit())
        .stdout(Stdio::inherit())
        .stderr(Stdio::inherit())
        .status()
        .context("Failed to spawn `sbx create`")?;
    if !status.success() {
        bail!("`sbx create` exited with {status}");
    }
    Ok(())
 }
 fn build_create_args(
    name: &str,
    kit_path: &Path,
    mixins: &[DiscoveredMixin],
 ) -> Result<Vec<String>> {
    let kit_str = kit_path
        .to_str()
        .ok_or_else(|| anyhow!("Kit path is not valid UTF-8: {}", kit_path.display()))?;
    let mut args = vec![
        "create".to_string(),
        "--name".to_string(),
        name.to_string(),
        "--kit".to_string(),
        kit_str.to_string(),
    ];
    for mixin in mixins {
        let mixin_kit = mixin.kit_path()?;
        let mixin_str = mixin_kit
            .to_str()
            .ok_or_else(|| anyhow!("Mixin kit path is not valid UTF-8: {}", mixin_kit.display()))?
            .to_string();
        args.push("--kit".to_string());
        args.push(mixin_str);
    }
    args.push(SANDBOX_AGENT.to_string());
    args.push(".".to_string());
    Ok(args)
 }
 fn copy_host_files(name: &str) -> Result<()> {
    let config_dir = paths::config_dir();
    let home_dir = dirs::home_dir().context("Could not determine home directory")?;
    if config_dir.exists() {
        let sandbox_config_dir = "/home/agent/.config/coyote";
        ensure_sandbox_dir(name, sandbox_config_dir)?;
        let dest = format!("{name}:{sandbox_config_dir}/");
        for entry in fs::read_dir(&config_dir)
            .with_context(|| format!("Failed to read {}", config_dir.display()))?
        {
            let entry = entry?;
            let path = entry.path();
            sbx_cp(&path.display().to_string(), &dest)?;
        }
        chown_agent_recursive(name, sandbox_config_dir)?;
    } else {
        debug!(
            "Skipping config copy: {} does not exist",
            config_dir.display()
        );
    }
    match resolve_vault_password_file() {
        Some(password_file) if password_file.exists() => {
            let dest_path = host_to_sandbox_path(&password_file, &home_dir, cfg!(windows))?;
            if let Some(parent) = sandbox_path_parent(&dest_path)
                && !parent.is_empty()
            {
                ensure_sandbox_dir(name, parent)?;
            }
            let dest = format!("{name}:{dest_path}");
            sbx_cp(&password_file.display().to_string(), &dest)?;
            chown_agent_recursive(name, &dest_path)?;
        }
        Some(password_file) => {
            debug!(
                "Skipping vault password copy: {} does not exist",
                password_file.display()
            );
        }
        None => {
            debug!("Skipping vault password copy: no local vault provider configured");
        }
    }
    Ok(())
 }
 fn host_to_sandbox_path(
    host_path: &Path,
    home_dir: &Path,
    is_windows_host: bool,
 ) -> Result<String> {
    let host_str = host_path.to_str().context("Host path is not valid UTF-8")?;
    let home_str = home_dir
        .to_str()
        .context("Home directory is not valid UTF-8")?;
    if let Some(rel) = strip_host_home(host_str, home_str) {
        let unixified = rel.replace('\\', "/");
        return Ok(format!("/home/agent/{unixified}"));
    }
    if is_windows_host {
        bail!(
            "Path '{host_str}' is outside your Windows user profile ({home_str}). \
             Sandbox mode cannot copy files from outside %USERPROFILE% into a Linux \
             sandbox. Move the file under your user profile and update your config \
             accordingly."
        );
    }
    Ok(host_str.to_string())
 }
 fn strip_host_home(path: &str, home: &str) -> Option<String> {
    let path_norm: String = path
        .chars()
        .map(|c| if c == '\\' { '/' } else { c })
        .collect();
    let home_norm: String = home
        .chars()
        .map(|c| if c == '\\' { '/' } else { c })
        .collect();
    let home_norm = home_norm.trim_end_matches('/');
    if home_norm.is_empty() || path_norm.len() <= home_norm.len() {
        return None;
    }
    let (head, tail) = path_norm.split_at(home_norm.len());
    if head != home_norm || !tail.starts_with('/') {
        return None;
    }
    Some(tail[1..].to_string())
 }
 fn sandbox_path_parent(linux_path: &str) -> Option<&str> {
    linux_path.rsplit_once('/').map(|(parent, _)| parent)
 }
 fn ensure_sandbox_dir(sandbox: &str, dir: &str) -> Result<()> {
    let dir_q = shell_words::quote(dir);
    let cmd = format!("sudo mkdir -p {dir_q} && sudo chown agent:agent {dir_q}");
    debug!("sbx exec {sandbox}: {cmd}");
    let status = Command::new(SBX_BINARY)
        .args(["exec", sandbox, "sh", "-c", &cmd])
        .stdin(Stdio::inherit())
        .stdout(Stdio::inherit())
        .stderr(Stdio::inherit())
        .status()
        .context("Failed to spawn `sbx exec` to prepare destination directory")?;
    if !status.success() {
        bail!("Preparing sandbox directory '{dir}' failed: sbx exec exited with {status}");
    }
    Ok(())
 }
 fn resolve_vault_password_file() -> Option<PathBuf> {
    Vault::init_bare().ok()?.local_password_file().ok()
 }
 fn sbx_cp(src: &str, dest: &str) -> Result<()> {
    debug!("sbx cp {src} {dest}");
    let status = Command::new(SBX_BINARY)
        .args(["cp", src, dest])
        .stdin(Stdio::inherit())
        .stdout(Stdio::inherit())
        .stderr(Stdio::inherit())
        .status()
        .context("Failed to spawn `sbx cp`")?;
    if !status.success() {
        bail!("`sbx cp {src} {dest}` exited with {status}");
    }
    Ok(())
 }
 fn exec_run(name: &str, kit_path: &Path) -> Result<()> {
    let kit_str = kit_path
        .to_str()
        .ok_or_else(|| anyhow!("Kit path is not valid UTF-8: {}", kit_path.display()))?;
    debug!("sbx run --name {name} --kit {kit_str}");
    let status = Command::new(SBX_BINARY)
        .args(["run", "--name", name, "--kit", kit_str])
        .stdin(Stdio::inherit())
        .stdout(Stdio::inherit())
        .stderr(Stdio::inherit())
        .status()
        .context("Failed to spawn `sbx run`")?;
    if !status.success() {
        bail!("`sbx run` exited with {status}");
    }
    Ok(())
 }
 fn chown_agent_recursive(sandbox: &str, path: &str) -> Result<()> {
    let path_q = shell_words::quote(path);
    let cmd = format!("sudo chown -R agent:agent {path_q}");
    debug!("sbx exec {sandbox}: {cmd}");
    let status = Command::new(SBX_BINARY)
        .args(["exec", sandbox, "sh", "-c", &cmd])
        .stdin(Stdio::inherit())
        .stdout(Stdio::inherit())
        .stderr(Stdio::inherit())
        .status()
        .context("Failed to spawn `sbx exec` to chown copied files")?;
    if !status.success() {
        bail!("Chowning '{path}' in sandbox failed: sbx exec exited with {status}");
    }
    Ok(())
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn sanitize_name_lowercases() {
        assert_eq!(sanitize_name("Foo"), "foo");
    }
    #[test]
    fn sanitize_name_replaces_non_alphanumeric() {
        assert_eq!(sanitize_name("hello world!"), "hello-world");
    }
    #[test]
    fn sanitize_name_collapses_dash_runs() {
        assert_eq!(sanitize_name("a___b"), "a-b");
    }
    #[test]
    fn sanitize_name_trims_dashes() {
        assert_eq!(sanitize_name("---hi---"), "hi");
    }
    #[test]
    fn sanitize_name_handles_mixed_input() {
        assert_eq!(sanitize_name("My Project (v2)"), "my-project-v2");
    }
    #[test]
    fn sanitize_name_all_invalid_yields_empty() {
        assert_eq!(sanitize_name("///"), "");
    }
    #[test]
    fn resolve_name_uses_explicit_arg() {
        let n = resolve_name(Some("explicit-name".to_string())).unwrap();
        assert_eq!(n, "explicit-name");
    }
    #[test]
    fn resolve_name_sanitizes_explicit_arg() {
        let n = resolve_name(Some("My Sandbox!".to_string())).unwrap();
        assert_eq!(n, "my-sandbox");
    }
    #[test]
    fn resolve_name_rejects_empty_after_sanitize() {
        let err = resolve_name(Some("///".to_string()));
        assert!(err.is_err());
    }
    #[test]
    fn resolve_name_falls_back_to_cwd_when_none() {
        let n = resolve_name(None).unwrap();
        assert!(!n.is_empty());
        assert!(n.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'));
    }
    #[test]
    fn compute_kit_hash_is_deterministic() {
        let h1 = compute_kit_hash().unwrap();
        let h2 = compute_kit_hash().unwrap();
        assert_eq!(h1, h2);
        assert_eq!(h1.len(), 64);
    }
    #[test]
    fn build_create_args_emits_base_kit_before_mixins() {
        let kit = PathBuf::from("/cache/sbx-kit");
        let unique = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        let dir_a = env::temp_dir().join(format!("coyote-mixin-a-{unique}"));
        let dir_b = env::temp_dir().join(format!("coyote-mixin-b-{unique}"));
        fs::create_dir_all(&dir_a).unwrap();
        fs::create_dir_all(&dir_b).unwrap();
        let mixins = vec![
            DiscoveredMixin {
                path: dir_a.clone(),
                label: "user".into(),
                install_count: 0,
                domain_count: 0,
            },
            DiscoveredMixin {
                path: dir_b.clone(),
                label: "sql".into(),
                install_count: 0,
                domain_count: 0,
            },
        ];
        let args = build_create_args("my-box", &kit, &mixins).unwrap();
        assert_eq!(
            args,
            vec![
                "create".to_string(),
                "--name".to_string(),
                "my-box".to_string(),
                "--kit".to_string(),
                "/cache/sbx-kit".to_string(),
                "--kit".to_string(),
                dir_a.display().to_string(),
                "--kit".to_string(),
                dir_b.display().to_string(),
                "coyote".to_string(),
                ".".to_string(),
            ]
        );
        let _ = fs::remove_dir_all(&dir_a);
        let _ = fs::remove_dir_all(&dir_b);
    }
    #[test]
    fn build_create_args_with_no_mixins_omits_mixin_kits() {
        let kit = PathBuf::from("/cache/sbx-kit");
        let args = build_create_args("box", &kit, &[]).unwrap();
        assert_eq!(
            args,
            vec![
                "create".to_string(),
                "--name".to_string(),
                "box".to_string(),
                "--kit".to_string(),
                "/cache/sbx-kit".to_string(),
                "coyote".to_string(),
                ".".to_string(),
            ]
        );
    }
    mod vault_mixins {
        use super::*;
        use crate::utils::get_env_name;
        use gman::providers::aws_secrets_manager::AwsSecretsManagerProvider;
        use gman::providers::azure_key_vault::AzureKeyVaultProvider;
        use gman::providers::gcp_secret_manager::GcpSecretManagerProvider;
        use gman::providers::gopass::GopassProvider;
        use gman::providers::local::LocalProvider;
        use gman::providers::one_password::OnePasswordProvider;
        use serial_test::serial;
        use std::time::{SystemTime, UNIX_EPOCH};
        struct TestCacheDirGuard {
            key: String,
            previous: Option<std::ffi::OsString>,
            path: PathBuf,
        }
        impl TestCacheDirGuard {
            fn new() -> Self {
                let key = get_env_name("cache_dir");
                let previous = env::var_os(&key);
                let unique = SystemTime::now()
                    .duration_since(UNIX_EPOCH)
                    .unwrap()
                    .as_nanos();
                let path = env::temp_dir().join(format!("coyote-sandbox-vault-tests-{unique}"));
                fs::create_dir_all(&path).unwrap();
                unsafe {
                    env::set_var(&key, &path);
                }
                Self {
                    key,
                    previous,
                    path,
                }
            }
        }
        impl Drop for TestCacheDirGuard {
            fn drop(&mut self) {
                unsafe {
                    match &self.previous {
                        Some(v) => env::set_var(&self.key, v),
                        None => env::remove_var(&self.key),
                    }
                }
                let _ = fs::remove_dir_all(&self.path);
            }
        }
        #[test]
        fn returns_none_for_local() {
            let p = SupportedProvider::Local {
                provider_def: LocalProvider::default(),
            };
            assert!(extract_vault_mixin(&p).unwrap().is_none());
        }
        #[test]
        #[serial]
        fn returns_some_for_aws() {
            let _guard = TestCacheDirGuard::new();
            let p = SupportedProvider::AwsSecretsManager {
                provider_def: AwsSecretsManagerProvider {
                    aws_profile: None,
                    aws_region: None,
                },
            };
            let m = extract_vault_mixin(&p)
                .unwrap()
                .expect("expected vault mixin");
            assert!(m.path.join("spec.yaml").exists());
            assert!(m.label.contains("aws_secrets_manager"));
        }
        #[test]
        #[serial]
        fn returns_some_for_gcp() {
            let _guard = TestCacheDirGuard::new();
            let p = SupportedProvider::GcpSecretManager {
                provider_def: GcpSecretManagerProvider {
                    gcp_project_id: None,
                },
            };
            let m = extract_vault_mixin(&p)
                .unwrap()
                .expect("expected vault mixin");
            assert!(m.path.join("spec.yaml").exists());
            assert!(m.label.contains("gcp_secret_manager"));
        }
        #[test]
        #[serial]
        fn returns_some_for_one_password() {
            let _guard = TestCacheDirGuard::new();
            let p = SupportedProvider::OnePassword {
                provider_def: OnePasswordProvider {
                    vault: None,
                    account: None,
                },
            };
            let m = extract_vault_mixin(&p)
                .unwrap()
                .expect("expected vault mixin");
            assert!(m.path.join("spec.yaml").exists());
            assert!(m.label.contains("one_password"));
        }
        #[test]
        #[serial]
        fn returns_some_for_azure() {
            let _guard = TestCacheDirGuard::new();
            let p = SupportedProvider::AzureKeyVault {
                provider_def: AzureKeyVaultProvider { vault_name: None },
            };
            let m = extract_vault_mixin(&p)
                .unwrap()
                .expect("expected vault mixin");
            assert!(m.path.join("spec.yaml").exists());
            assert!(m.label.contains("azure_key_vault"));
        }
        #[test]
        #[serial]
        fn returns_some_for_gopass() {
            let _guard = TestCacheDirGuard::new();
            let p = SupportedProvider::Gopass {
                provider_def: GopassProvider { store: None },
            };
            let m = extract_vault_mixin(&p)
                .unwrap()
                .expect("expected vault mixin");
            assert!(m.path.join("spec.yaml").exists());
            assert!(m.label.contains("gopass"));
        }
        #[test]
        fn hash_is_deterministic() {
            let h1 = compute_vault_mixins_hash().unwrap();
            let h2 = compute_vault_mixins_hash().unwrap();
            assert_eq!(h1, h2);
            assert_eq!(h1.len(), 64);
        }
    }
    mod host_to_sandbox_path_tests {
        use super::*;
        #[test]
        fn linux_under_home() {
            let dest = host_to_sandbox_path(
                Path::new("/home/atusa/.coyote_password"),
                Path::new("/home/atusa"),
                false,
            )
            .unwrap();
            assert_eq!(dest, "/home/agent/.coyote_password");
        }
        #[test]
        fn linux_nested_under_home() {
            let dest = host_to_sandbox_path(
                Path::new("/home/atusa/.config/coyote/.password"),
                Path::new("/home/atusa"),
                false,
            )
            .unwrap();
            assert_eq!(dest, "/home/agent/.config/coyote/.password");
        }
        #[test]
        fn linux_outside_home_returns_verbatim() {
            let dest = host_to_sandbox_path(
                Path::new("/etc/coyote/.password"),
                Path::new("/home/atusa"),
                false,
            )
            .unwrap();
            assert_eq!(dest, "/etc/coyote/.password");
        }
        #[test]
        fn macos_under_home_with_spaces() {
            let dest = host_to_sandbox_path(
                Path::new("/Users/atusa/Library/Application Support/coyote/.password"),
                Path::new("/Users/atusa"),
                false,
            )
            .unwrap();
            assert_eq!(
                dest,
                "/home/agent/Library/Application Support/coyote/.password"
            );
        }
        #[test]
        fn windows_under_home_converts_backslashes() {
            let dest = host_to_sandbox_path(
                Path::new(r"C:\Users\atusa\.coyote_password"),
                Path::new(r"C:\Users\atusa"),
                true,
            )
            .unwrap();
            assert_eq!(dest, "/home/agent/.coyote_password");
        }
        #[test]
        fn windows_nested_under_home() {
            let dest = host_to_sandbox_path(
                Path::new(r"C:\Users\atusa\Documents\my\vault.txt"),
                Path::new(r"C:\Users\atusa"),
                true,
            )
            .unwrap();
            assert_eq!(dest, "/home/agent/Documents/my/vault.txt");
        }
        #[test]
        fn windows_outside_home_bails_with_clear_error() {
            let err = host_to_sandbox_path(
                Path::new(r"C:\Program Files\Coyote\vault.txt"),
                Path::new(r"C:\Users\atusa"),
                true,
            )
            .unwrap_err();
            let msg = err.to_string();
            assert!(
                msg.contains("Program Files"),
                "error should name the offending path: {msg}"
            );
            assert!(
                msg.contains("user profile"),
                "error should explain the limitation: {msg}"
            );
        }
        #[test]
        fn windows_tolerates_trailing_slash_in_home() {
            let dest = host_to_sandbox_path(
                Path::new(r"C:\Users\atusa\foo"),
                Path::new(r"C:\Users\atusa\"),
                true,
            )
            .unwrap();
            assert_eq!(dest, "/home/agent/foo");
        }
        #[test]
        fn sandbox_path_parent_extracts_parent_for_nested() {
            assert_eq!(
                sandbox_path_parent("/home/agent/.coyote_password"),
                Some("/home/agent")
            );
            assert_eq!(
                sandbox_path_parent("/etc/coyote/.password"),
                Some("/etc/coyote")
            );
        }
        #[test]
        fn sandbox_path_parent_handles_edge_cases() {
            assert_eq!(sandbox_path_parent("/file"), Some(""));
            assert_eq!(sandbox_path_parent("noparent"), None);
        }
    }
 }
@@ -5,6 +5,7 @@ pub mod taskqueue;
 use crate::utils::AbortSignal;
 use fmt::{Debug, Formatter};
 use mailbox::Inbox;
 use parking_lot::RwLock;
 use taskqueue::TaskQueue;
 use anyhow::{Result, bail};
@@ -33,6 +34,7 @@ pub struct AgentHandle {
    pub inbox: Arc<Inbox>,
    pub abort_signal: AbortSignal,
    pub join_handle: JoinHandle<Result<AgentResult>>,
    pub child_supervisor: Option<Arc<RwLock<Supervisor>>>,
 }
 pub struct Supervisor {
@@ -103,6 +105,10 @@ impl Supervisor {
        self.handles.get(id).map(|h| &h.inbox)
    }
    pub fn abort_signal_for(&self, id: &str) -> Option<AbortSignal> {
        self.handles.get(id).map(|h| h.abort_signal.clone())
    }
    pub fn list_agents(&self) -> Vec<(&str, &str)> {
        self.handles
            .values()
@@ -115,6 +121,15 @@ impl Supervisor {
            handle.abort_signal.set_ctrlc();
        }
    }
    pub fn cancel_recursive(&self) {
        for handle in self.handles.values() {
            handle.abort_signal.set_ctrlc();
            if let Some(child_sup) = handle.child_supervisor.as_ref() {
                child_sup.read().cancel_recursive();
            }
        }
    }
 }
 impl Debug for Supervisor {
@@ -152,6 +167,7 @@ mod tests {
            inbox: Arc::new(Inbox::new()),
            abort_signal: create_abort_signal(),
            join_handle,
            child_supervisor: None,
        }
    }
@@ -1,9 +1,11 @@
 use crate::config::paths;
 use colored::Colorize;
 use fancy_regex::Regex;
-use std::fs::File;
+use std::fs::{self, File};
 use std::io::{BufRead, BufReader, Seek, SeekFrom};
 use std::process;
 use std::time::Duration;
 use tokio::time::sleep;
 pub async fn tail_logs(no_color: bool) {
    let re = Regex::new(r"^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3})\s+<(?P<opid>[^\s>]+)>\s+\[(?P<level>[A-Z]+)\]\s+(?P<logger>[^:]+):(?P<line>\d+)\s+-\s+(?P<message>.*)$").unwrap();
@@ -16,20 +18,43 @@ pub async fn tail_logs(no_color: bool) {
        process::exit(1);
    };
-    let mut lines = reader.lines();
+    let mut line_buf = String::new();
    loop {
-        if let Some(Ok(line)) = lines.next() {
+        match reader.read_line(&mut line_buf) {
-            if no_color {
+            Ok(0) => {
-                println!("{line}");
+                if file_was_rotated(&file_path, &mut reader) {
-            } else {
+                    let file = File::open(&file_path).expect("Cannot open file");
-                let colored_line = colorize_log_line(&line, &re);
+                    reader = BufReader::new(file);
-                println!("{colored_line}");
+                }
                sleep(Duration::from_millis(100)).await;
            }
            Ok(_) => {
                let line = line_buf.trim_end();
                if no_color {
                    println!("{line}");
                } else {
                    let colored_line = colorize_log_line(line, &re);
                    println!("{colored_line}");
                }
                line_buf.clear();
            }
            Err(_) => {
                line_buf.clear();
                sleep(Duration::from_millis(100)).await;
            }
        }
    }
 }
 fn file_was_rotated(path: &std::path::Path, reader: &mut BufReader<File>) -> bool {
    let current_pos = reader.stream_position().unwrap_or(0);
    match fs::metadata(path) {
        Ok(metadata) => metadata.len() < current_pos,
        Err(_) => true,
    }
 }
 fn colorize_log_line(line: &str, re: &Regex) -> String {
    if let Some(caps) = re.captures(line).expect("Failed to capture log line") {
        let level = &caps["level"];
@@ -17,7 +17,7 @@ use gman::providers::SecretProvider;
 use gman::providers::SupportedProvider;
 use gman::providers::local::LocalProvider;
 use inquire::{Password, PasswordDisplayMode, required};
-use log::warn;
+use log::{info, warn};
 use serde_yaml::Value;
 use std::sync::{Arc, LazyLock};
 use tokio::runtime::Handle;
@@ -25,6 +25,31 @@ use uuid::Uuid;
 pub static SECRET_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\{([^{}]+)}}").unwrap());
 fn apply_sandboxed_home_translation(provider_def: &mut LocalProvider) {
    let Some(ref pf) = provider_def.password_file else {
        return;
    };
    if pf.exists() {
        return;
    }
    let Some(translated) = paths::translate_sandboxed_home_path(pf) else {
        return;
    };
    if !translated.exists() {
        return;
    }
    info!(
        "vault password file '{}' not found; resolved to sandboxed path '{}'",
        pf.display(),
        translated.display()
    );
    provider_def.password_file = Some(translated);
 }
 #[derive(Debug, Default, Clone)]
 pub struct Vault {
    pub(crate) provider: SupportedProvider,
@@ -92,6 +117,7 @@ impl Vault {
        };
        if let SupportedProvider::Local { provider_def } = &mut provider {
            apply_sandboxed_home_translation(provider_def);
            ensure_password_file_initialized(provider_def)?;
        }