From 9d2e936e7f305184615b38002ce3bc18f0d6f233 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Sat, 4 Jul 2026 12:50:37 -0600 Subject: [PATCH] feat: Created the step-runner graph agent for more deterministic coding workflows to produce even more reliable and higher-quality results --- assets/agents/sisyphus/config.yaml | 5 + assets/agents/step-runner/README.md | 93 +++ assets/agents/step-runner/graph.yaml | 599 ++++++++++++++++++ .../step-runner/scripts/check_handoff.sh | 54 ++ .../step-runner/scripts/fix_loop_gate.sh | 60 ++ .../step-runner/scripts/resolve_step.sh | 152 +++++ .../step-runner/scripts/revise_from_choice.sh | 27 + .../step-runner/scripts/route_coder_result.sh | 27 + .../step-runner/scripts/route_review.sh | 38 ++ .../step-runner/scripts/route_staleness.sh | 23 + .../agents/step-runner/scripts/route_sweep.sh | 23 + .../step-runner/scripts/verify_build.sh | 57 ++ .../step-runner/scripts/verify_format_lint.sh | 79 +++ .../step-runner/scripts/verify_tests.sh | 57 ++ src/config/request_context.rs | 39 ++ 15 files changed, 1333 insertions(+) create mode 100644 assets/agents/step-runner/README.md create mode 100644 assets/agents/step-runner/graph.yaml create mode 100755 assets/agents/step-runner/scripts/check_handoff.sh create mode 100755 assets/agents/step-runner/scripts/fix_loop_gate.sh create mode 100755 assets/agents/step-runner/scripts/resolve_step.sh create mode 100755 assets/agents/step-runner/scripts/revise_from_choice.sh create mode 100755 assets/agents/step-runner/scripts/route_coder_result.sh create mode 100755 assets/agents/step-runner/scripts/route_review.sh create mode 100755 assets/agents/step-runner/scripts/route_staleness.sh create mode 100755 assets/agents/step-runner/scripts/route_sweep.sh create mode 100755 assets/agents/step-runner/scripts/verify_build.sh create mode 100755 assets/agents/step-runner/scripts/verify_format_lint.sh create mode 100755 assets/agents/step-runner/scripts/verify_tests.sh diff --git a/assets/agents/sisyphus/config.yaml b/assets/agents/sisyphus/config.yaml index 7a61eb9..10060b9 100644 --- a/assets/agents/sisyphus/config.yaml +++ b/assets/agents/sisyphus/config.yaml @@ -132,6 +132,7 @@ instructions: | | `librarian` | Find official docs, OSS examples, web best practices for EXTERNAL libraries | Read-only, returns citation-backed findings, fan out 1-3 in parallel | | `coder` | Write/edit files, implement features | Graph agent: plan → approval → implement → verify build+tests → self_review → bounded fix-loop | | `oracle` | Architecture, complex debugging, review, plan review | Advisory, blocking — never answer the user before collecting Oracle results | + | `step-runner` | Execute ONE step of a phased plan repo (Phase 8) | Graph agent: orient → staleness check → coder → verify → handoff → user approval gate | ### When to fire `librarian` (external grep) vs `explore` (internal grep) @@ -333,6 +334,10 @@ instructions: | ### Execution lifecycle (one step at a time) + **Default: delegate the whole step to `step-runner`** — a graph agent that enforces the step protocol as graph edges (orient → staleness check → coder → verify → edge-case sweep → optional independent review → validated handoff → user approval gate): `agent__spawn --agent step-runner --prompt "Execute step of the plan at "`. It returns `STEP_COMPLETE` / `STEP_BLOCKED` / `STEP_REJECTED` / `STEP_FAILED`. Relay its escalations (deviation gate, approval gate) promptly. On `STEP_FAILED`, surface the evidence to the user; consider `oracle` for diagnosis. + + Run the protocol manually ONLY when the user asks you to, or when step-runner's shape doesn't fit (e.g. a docs-only step with nothing to build). Then: + 1. Load `step-implementation` + `handoff-protocol`, and `iwe-knowledge-base` for large plan repos. 2. Follow the step protocol phase by phase: orient (previous handoff + `NOTES.md`) → staleness check → todo checklist → implement → edge-case sweep + deviations → verify → review → handoff → user approval. 3. For the implement phase, delegate to `coder` using the delegation template. Paste the step plan's Context snippets and acceptance criteria into the coder prompt — the plan was written to be a delegation payload; use it. diff --git a/assets/agents/step-runner/README.md b/assets/agents/step-runner/README.md new file mode 100644 index 0000000..81ae5e2 --- /dev/null +++ b/assets/agents/step-runner/README.md @@ -0,0 +1,93 @@ +# Step-Runner + +A graph-based agent that executes **one step** of a phased implementation +plan, with the step protocol from the `step-implementation` skill enforced +as graph edges rather than prose. Designed to be delegated to by +**[Sisyphus](../sisyphus/README.md)**; delegates implementation to +**[Coder](../coder/README.md)** and independent review to +**[code-reviewer](../code-reviewer/README.md)**. + +It expects a plan repo authored per the `plan-authoring` skill: + +``` +plans/ + steps/NN-.md # step plans with frontmatter (step/title/depends_on/status) + handoffs/NN-.md # written by this agent, validated by a deterministic gate + NOTES.md # rolling durable facts +``` + +## Workflow + +``` +resolve_step (script) locate plan + previous handoff, check depends_on, + ↓ mark plan in-progress [→ gate_blocked if deps unsatisfied] +orient (llm, read-only) merge handoff directives + staleness-check the plan + ↓ +route_staleness (script) major deviation → gate_deviation (approval) + ↓ +implement (agent → coder) coder runs its own build/test/self-review fix-loop + ↓ +route_coder_result (script) COMPLETE → verify | REJECTED / FAILED → end + ↓ +verify_format_lint (script) format BEFORE evidence, then lint +verify_build (script) step-level build/typecheck +verify_tests (script) FULL test suite + ↓ [failures → fix_loop_gate, back-edge to implement] +edge_case_sweep (llm) missed edge cases; annotate downstream plans + ↓ (Edge cases sections ONLY - scope changes become proposals) +route_sweep (script) 5+ files or architectural boundary → independent_review +independent_review (agent) code-reviewer; 🔴 findings loop back to implement (bounded) + ↓ +write_handoff (llm) evidence-backed handoff per handoff-protocol + NOTES.md +check_handoff (script) deterministic schema gate; marks plan status complete + ↓ +gate_user_review (approval) HARD STOP - approve, or send revision comments + ↓ (revisions loop through implement → verify → handoff again) +end_success / end_blocked / end_rejected / end_failure +``` + +End nodes emit sentinel outcomes for the caller: + +- `STEP_COMPLETE` — step implemented, verified, handoff written, user approved. +- `STEP_BLOCKED` — `depends_on` unsatisfied and the user declined to proceed. +- `STEP_REJECTED` — user aborted at the deviation gate, or the coder's plan + was rejected at its approval gate. +- `STEP_FAILED` — coder failed, the step-level fix budget was exhausted, or + the handoff failed validation twice. + +## Usage + +```sh +# From the project root: run the next in-progress/pending step +coyote -a step-runner "Execute the next step" + +# A specific step (also parsed from the prompt: "execute step 3") +coyote -a step-runner --agent-variable step 3 "Execute step 3" + +# Plan repo somewhere else +coyote -a step-runner --agent-variable plans_dir docs/plans "Execute the next step" +``` + +**Invoke from the project root.** The coder sub-agent resolves its own +`project_dir` from the invocation directory; overriding `project_dir` here +does not propagate to the spawned coder. + +## Tuning + +`graph.yaml` `initial_state` exposes: + +- `max_fix_attempts` (default `2`) — step-level fix budget (the coder has + its own internal budget of 3). +- `max_review_attempts` (default `1`) — bounded 🔴-finding fix loops after + independent review. + +Environment overrides honored by the script nodes: + +- `FORMAT_CMD` / `LINT_CMD` — formatting and linting (otherwise a per-type + heuristic formats, and linting defers to the build/check command). +- `BUILD_CMD` / `TEST_CMD` — skip project-type detection (same as coder). +- `STEP_AUTOAPPROVE=1` — bypass the deviation gate (non-interactive runs). +- `STEP_SKIP_REVIEW=1` — never spawn the independent reviewer. + +The final user approval gate is never bypassed by an environment variable - +it is the point of the workflow. diff --git a/assets/agents/step-runner/graph.yaml b/assets/agents/step-runner/graph.yaml new file mode 100644 index 0000000..3a6aa14 --- /dev/null +++ b/assets/agents/step-runner/graph.yaml @@ -0,0 +1,599 @@ +name: step-runner +description: | + Executes ONE step of a phased implementation plan (plans/ repo) with the + step protocol enforced as graph edges: orient -> staleness check -> + implement (coder) -> verify -> edge-case sweep -> optional independent + review -> evidence-backed handoff -> user approval gate. Designed to be + delegated to by sisyphus. +version: "1.0" + +global_tools: + - fs_cat.sh + - fs_ls.sh + - fs_write.sh + - fs_patch.sh + - execute_command.sh + +skills_enabled: true +enabled_skills: + - step-implementation + - handoff-protocol + - code-review + - ai-slop-remover + +variables: + - name: project_dir + description: | + Absolute path to the project directory. Defaults to "." (the directory + coyote was invoked from). The coder sub-agent resolves its own + project_dir the same way, so invoke step-runner FROM the project root + unless you override this for both. + default: "." + - name: plans_dir + description: | + Path to the plan repo. Relative paths resolve against project_dir. + Expected layout: /steps/NN-.md, + /handoffs/, /NOTES.md. + default: "plans" + - name: step + description: | + Which step to execute: a step number, or "next" to pick the first + in-progress (resume) or pending step plan. + default: "next" + +settings: + max_loop_iterations: 20 + log_state_snapshots: true + validate_before_run: true + timeout: 7200 + +initial_state: + project_dir: "" + plans_dir: "" + step_number: 0 + step_slug: "" + step_title: "" + step_plan_path: "" + step_plan: "" + prev_handoff_path: "(none)" + prev_handoff: "(none - this is the first step)" + notes_path: "" + notes: "(none)" + handoff_path: "" + blocking_reason: "" + plan_summary: "" + implementation_brief: "" + staleness_report: "" + has_major_deviation: false + deviation_summary: "" + user_feedback: "" + fix_instructions: "" + fix_attempts: 0 + max_fix_attempts: 2 + coder_result: "" + format_output: "" + lint_ok: true + lint_output: "" + build_ok: true + build_output: "" + tests_ok: true + tests_output: "" + edge_case_report: "" + downstream_updates: "" + needs_independent_review: false + review_report: "" + review_attempts: 0 + max_review_attempts: 1 + handoff_attempts: 0 + handoff_fix: "" + step_summary: "" + +start: resolve_step + +nodes: + resolve_step: + id: resolve_step + type: script + description: | + Locate the step plan, previous handoff, and NOTES.md; parse frontmatter; + check depends_on satisfaction against existing handoffs; mark the plan + in-progress. Routes to gate_blocked when dependencies are unsatisfied. + script: scripts/resolve_step.sh + timeout: 30 + fallback: end_failure + next: orient + + gate_blocked: + id: gate_blocked + type: approval + description: Escalate unsatisfied dependencies instead of building on missing ground. + question: | + Step {{step_number}} ({{step_title}}) is BLOCKED: + + {{blocking_reason}} + + Proceed anyway? + options: + - "yes" + - "no" + routes: + "yes": orient + "no": end_blocked + on_other: end_blocked + + orient: + id: orient + type: llm + description: | + Read-only orientation and staleness check: merge the previous handoff's + directives with the step plan, then verify the plan's assumptions + against the CURRENT codebase before any edit. + skills_enabled: true + enabled_skills: + - step-implementation + instructions: | + You are orienting for one step of a phased implementation plan. Load + `step-implementation` and apply its Orient and Staleness-check phases. + You are READ-ONLY in this node: no edits, no fixes. + + 1. Read the previous handoff (below). Note directives aimed at this + step, deviations that changed the codebase, and bare assertions + that need re-verification. + 2. Staleness-check the step plan against the code at {{project_dir}}: + grep the symbols it references (via execute_command), read its + Context snippets at their claimed locations with fs_cat, confirm + its Test commands exist. + 3. Classify discrepancies per the skill's deviation table: minor + (mechanics differ; correct silently in the brief) vs major (scope, + approach, interfaces, or a later step's assumptions affected). + + Produce `implementation_brief`: the corrected, self-contained marching + orders for the implementer - plan tasks in order, handoff directives + applied, minor staleness corrections folded in, acceptance criteria + restated. The implementer sees ONLY the step plan plus your brief. + prompt: | + ## Step plan ({{step_plan_path}}) + {{step_plan}} + + ## Previous handoff ({{prev_handoff_path}}) + {{prev_handoff}} + + ## Rolling project notes + {{notes}} + tools: + - fs_cat + - fs_ls + - execute_command + max_iterations: 20 + output_schema: + type: object + properties: + plan_summary: + type: string + description: 1-3 sentences summarizing what this step delivers + implementation_brief: + type: string + description: Corrected, self-contained instructions for the implementer + staleness_report: + type: string + description: Findings from checking plan assumptions against current code; "clean" if none + has_major_deviation: + type: boolean + description: True when a discrepancy changes scope, approach, or interfaces + deviation_summary: + type: string + description: Major deviations only, with the plan claim vs current reality. Empty when none + required: [plan_summary, implementation_brief, staleness_report, has_major_deviation, deviation_summary] + fallback: end_failure + next: route_staleness + + route_staleness: + id: route_staleness + type: script + description: Major deviation -> user gate; otherwise straight to implement. + script: scripts/route_staleness.sh + timeout: 5 + fallback: implement + + gate_deviation: + id: gate_deviation + type: approval + description: Major deviations are never silently absorbed - the user decides. + question: | + Step {{step_number}} ({{step_title}}): the plan no longer matches the + codebase in a way that changes scope or approach. + + {{deviation_summary}} + + Staleness report: + {{staleness_report}} + + Proceed with the corrected brief? (Answer with anything else to give + your own guidance to the implementer.) + options: + - "proceed" + - "abort" + routes: + "proceed": implement + "abort": end_rejected + on_other: implement + state_updates: + user_feedback: "{{choice}}" + + implement: + id: implement + type: agent + description: | + Delegate implementation to the coder graph agent, which runs its own + plan -> implement -> build -> tests -> self-review fix-loop internally. + agent: coder + prompt: | + ## TASK + Execute step {{step_number}} ({{step_title}}) of a phased implementation + plan for the project at {{project_dir}}. + + ## EXPECTED OUTCOME + Every task in the step plan below is implemented and its acceptance + criteria are met. Tests are derived from the Acceptance criteria + section (not from the implementation). Build and full test suite pass. + + ## MUST DO + - Follow the Orientation brief below - it supersedes the raw plan where + they disagree (it folds in corrections from the staleness check). + - Match the patterns pasted in the step plan's Context section. + - Derive tests from the plan's Acceptance criteria. + + ## MUST NOT DO + - Do not touch anything listed in the plan's Out of scope section. + - Do not modify files under {{plans_dir}}. + - Do not implement work belonging to other steps. + + ## CONTEXT + ### Step plan + {{step_plan}} + + ### Orientation brief (handoff directives + staleness corrections applied) + {{implementation_brief}} + + ### User guidance (if any) + {{user_feedback}} + + ### Fix loop status (empty on first attempt) + {{fix_instructions}} + timeout: 3600 + state_updates: + coder_result: "{{output}}" + next: route_coder_result + + route_coder_result: + id: route_coder_result + type: script + description: Route on the coder sentinel - COMPLETE verifies, REJECTED/FAILED terminate. + script: scripts/route_coder_result.sh + timeout: 5 + fallback: end_failure + + verify_format_lint: + id: verify_format_lint + type: script + description: | + Format BEFORE evidence collection (FORMAT_CMD override or per-type + heuristic), then lint (LINT_CMD, when configured). Lint failure routes + to the fix loop. + script: scripts/verify_format_lint.sh + timeout: 300 + fallback: fix_loop_gate + + verify_build: + id: verify_build + type: script + description: Step-level build/typecheck evidence, collected AFTER formatting. + script: scripts/verify_build.sh + timeout: 600 + fallback: fix_loop_gate + + verify_tests: + id: verify_tests + type: script + description: FULL test suite - regressions in untouched code fail the step too. + script: scripts/verify_tests.sh + timeout: 1200 + fallback: fix_loop_gate + + fix_loop_gate: + id: fix_loop_gate + type: script + description: | + Step-level fix budget (the coder already ran its own internal fix + loop). Loops to implement with fix_instructions, or ends as failure. + script: scripts/fix_loop_gate.sh + timeout: 5 + fallback: end_failure + + edge_case_sweep: + id: edge_case_sweep + type: llm + description: | + Post-implementation sweep: missed spots, edge cases, downstream plan + implications. May annotate downstream plans' Edge cases sections + (annotate vs propose per handoff-protocol). Also judges whether the + change warrants an independent review pass. + skills_enabled: true + enabled_skills: + - step-implementation + - handoff-protocol + instructions: | + The implementation for this step just passed build and tests. Load + `step-implementation` (edge-case sweep phase) and `handoff-protocol` + (annotate-vs-propose rules), then: + + 1. Read the changed code (the coder result below names the files). + Look for edge cases the plan missed: empty inputs, error paths, + concurrency, partial failure, compat. + 2. For each edge case belonging to a LATER step: check that step's + plan under {{plans_dir}}/steps/. If its Edge cases section already + covers it, done. If not, append an entry to that section via + fs_patch - touch NOTHING else in the file. + 3. NEVER edit a later plan's Objective, Tasks, Acceptance criteria, + or Out of scope. Scope-affecting changes become proposed diffs in + `downstream_updates` instead. + 4. Set needs_independent_review=true when the change touched 5+ files + or crosses architectural boundaries (auth, public APIs, schema, + security-sensitive paths). + + Be terse. Findings, not prose. + prompt: | + ## Coder result + {{coder_result}} + + ## Step plan + {{step_plan}} + + ## Staleness report from orientation + {{staleness_report}} + tools: + - fs_cat + - fs_ls + - fs_patch + - execute_command + max_iterations: 20 + output_schema: + type: object + properties: + edge_case_report: + type: string + description: Edge cases discovered - both handled and punted, one per line. "none" if empty + downstream_updates: + type: string + description: Annotations made (plan file + section) and proposed diffs for scope-affecting changes. "none" if empty + needs_independent_review: + type: boolean + required: [edge_case_report, downstream_updates, needs_independent_review] + fallback: write_handoff + next: route_sweep + + route_sweep: + id: route_sweep + type: script + description: Broad or boundary-crossing changes get an independent reviewer. + script: scripts/route_sweep.sh + timeout: 5 + fallback: write_handoff + + independent_review: + id: independent_review + type: agent + description: Independent review pass - the author's self-review cannot catch its own rationalizations. + agent: code-reviewer + prompt: | + Review the changes produced for step {{step_number}} ({{step_title}}) + of a phased implementation plan in {{project_dir}}. + + What the step was supposed to do: + {{plan_summary}} + + Coder summary (names the modified/created files): + {{coder_result}} + + Review the changed files against the step plan's acceptance criteria. + Preserve severity tags in your findings. + timeout: 1200 + state_updates: + review_report: "{{output}}" + next: route_review + + route_review: + id: route_review + type: script + description: Critical findings loop back to implement (bounded); otherwise proceed to handoff. + script: scripts/route_review.sh + timeout: 5 + fallback: write_handoff + + write_handoff: + id: write_handoff + type: llm + description: | + Write the evidence-backed handoff per handoff-protocol and append + durable facts to NOTES.md. The completion gate (check_handoff) + verifies the document afterward. + skills_enabled: true + enabled_skills: + - handoff-protocol + - ai-slop-remover + instructions: | + Load `handoff-protocol` and follow its writer schema EXACTLY: the + frontmatter (step, title, result) and all eight sections, writing + "None" rather than omitting a section. + + Write the handoff to {{handoff_path}} with fs_write. Paste the + verification evidence below verbatim into the Evidence section - + commands, exit codes, decisive output lines. Deviations come from the + staleness report, gate decisions, and fix loop history. Downstream + plan updates come from the sweep results. + + Then append durable, step-independent facts (if any) to {{notes_path}} + - create the file if missing, never rewrite existing entries. + + If "Gate feedback" below is non-empty, a previous handoff attempt + failed validation - fix exactly what it lists. + prompt: | + ## Step + {{step_number}} ({{step_title}}) - plan at {{step_plan_path}} + + ## Plan summary + {{plan_summary}} + + ## Coder result + {{coder_result}} + + ## Staleness report / deviations + {{staleness_report}} + + Major deviation summary (if any): {{deviation_summary}} + User guidance given (if any): {{user_feedback}} + Fix loop attempts used: {{fix_attempts}} of {{max_fix_attempts}} + + ## Edge cases discovered + {{edge_case_report}} + + ## Downstream plan updates + {{downstream_updates}} + + ## Independent review report (if any) + {{review_report}} + + ## Verification evidence (paste verbatim) + ### Format + {{format_output}} + ### Lint + {{lint_output}} + ### Build + {{build_output}} + ### Tests + {{tests_output}} + + ## Gate feedback + {{handoff_fix}} + tools: + - fs_cat + - fs_ls + - fs_write + - fs_patch + max_iterations: 15 + output_schema: + type: object + properties: + step_summary: + type: string + description: 3-6 sentence summary of the step for the user's approval decision - what was done, deviations, anything needing their attention + required: [step_summary] + fallback: end_failure + next: check_handoff + + check_handoff: + id: check_handoff + type: script + description: | + Deterministic completion gate - handoff exists with frontmatter and all + required sections. On success, marks the step plan status complete. + One retry back to write_handoff, then failure. + script: scripts/check_handoff.sh + timeout: 10 + fallback: end_failure + + gate_user_review: + id: gate_user_review + type: approval + description: The hard stop - the next step never starts without explicit approval. + question: | + ## Step {{step_number}} ({{step_title}}) - ready for review + + {{step_summary}} + + Handoff: {{handoff_path}} + Build: {{build_ok}} | Tests: {{tests_ok}} | Fix attempts: {{fix_attempts}}/{{max_fix_attempts}} + + Approve this step? (Answer with anything else to send revision + instructions straight to the implementer.) + options: + - "approve" + - "revise" + routes: + "approve": end_success + "revise": get_revision + on_other: revise_from_choice + state_updates: + user_feedback: "{{choice}}" + + get_revision: + id: get_revision + type: input + description: Collect revision instructions, then loop back through implement -> verify -> handoff. + question: "What should change? Your comments go to the implementer verbatim." + validation: "len(input) > 0" + state_updates: + fix_instructions: "{{input}}" + next: implement + + revise_from_choice: + id: revise_from_choice + type: script + description: Free-form approval answers are treated as revision instructions. + script: scripts/revise_from_choice.sh + timeout: 5 + fallback: get_revision + + end_success: + id: end_success + type: end + output: | + STEP_COMPLETE + Step: {{step_number}} ({{step_title}}) + Plan: {{step_plan_path}} + Handoff: {{handoff_path}} + Build: passed | Tests: passed | Fix attempts: {{fix_attempts}}/{{max_fix_attempts}} + + {{step_summary}} + + Downstream plan updates: + {{downstream_updates}} + + end_blocked: + id: end_blocked + type: end + output: | + STEP_BLOCKED + Step: {{step_number}} ({{step_title}}) + Reason: + {{blocking_reason}} + + end_rejected: + id: end_rejected + type: end + output: | + STEP_REJECTED + Step: {{step_number}} ({{step_title}}) + Rejected at: deviation gate or coder approval gate. + Deviation summary: + {{deviation_summary}} + Coder result (if it ran): + {{coder_result}} + + end_failure: + id: end_failure + type: end + output: | + STEP_FAILED + Step: {{step_number}} ({{step_title}}) + Fix attempts: {{fix_attempts}}/{{max_fix_attempts}} + Blocking reason (if resolution failed): {{blocking_reason}} + + Coder result: + {{coder_result}} + + Last build output: + {{build_output}} + + Last tests output: + {{tests_output}} diff --git a/assets/agents/step-runner/scripts/check_handoff.sh b/assets/agents/step-runner/scripts/check_handoff.sh new file mode 100755 index 0000000..ae75ba1 --- /dev/null +++ b/assets/agents/step-runner/scripts/check_handoff.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +set -uo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +handoff_path=$(echo "$state" | jq -r '.handoff_path // ""') +step_plan_path=$(echo "$state" | jq -r '.step_plan_path // ""') +handoff_attempts=$(echo "$state" | jq -r '.handoff_attempts // 0') + +problems="" + +if [[ ! -f "$handoff_path" ]]; then + problems="- handoff file does not exist at $handoff_path"$'\n' +else + content=$(cat "$handoff_path") + grep -qE '^result:[[:space:]]*(complete|partial|blocked)' <<< "$content" \ + || problems+="- frontmatter is missing 'result: complete|partial|blocked'"$'\n' + for section in "Summary" "Completed" "Not completed" "Deviations" "Downstream plan updates" "Edge cases discovered" "Evidence" "Notes for next step"; do + grep -qE "^##[[:space:]]+${section}" <<< "$content" \ + || problems+="- missing required section: ## ${section}"$'\n' + done +fi + +if [[ -z "$problems" ]]; then + if [[ -f "$step_plan_path" ]]; then + tmp=$(mktemp) + awk 'BEGIN{n=0} /^---[[:space:]]*$/{n++; print; next} n==1 && /^status:/{print "status: complete"; next} {print}' "$step_plan_path" > "$tmp" && mv "$tmp" "$step_plan_path" + fi + jq -nc '{"handoff_fix": "", "_next": "gate_user_review"}' + exit 0 +fi + +if (( handoff_attempts >= 1 )); then + jq -nc \ + --arg br "Handoff failed validation twice. Problems: +$problems" \ + '{"blocking_reason": $br, "_next": "end_failure"}' + exit 0 +fi + +jq -nc \ + --arg hf "The previous handoff attempt failed validation. Fix exactly these problems: +$problems" \ + '{ + "handoff_attempts": 1, + "handoff_fix": $hf, + "_next": "write_handoff" + }' diff --git a/assets/agents/step-runner/scripts/fix_loop_gate.sh b/assets/agents/step-runner/scripts/fix_loop_gate.sh new file mode 100755 index 0000000..7267811 --- /dev/null +++ b/assets/agents/step-runner/scripts/fix_loop_gate.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +fix_attempts=$(echo "$state" | jq -r '.fix_attempts // 0') +max_fix_attempts=$(echo "$state" | jq -r '.max_fix_attempts // 2') +lint_ok=$(echo "$state" | jq -r '.lint_ok | if . == null then "true" else (. | tostring) end') +build_ok=$(echo "$state" | jq -r '.build_ok | if . == null then "true" else (. | tostring) end') +tests_ok=$(echo "$state" | jq -r '.tests_ok | if . == null then "true" else (. | tostring) end') +lint_output=$(echo "$state" | jq -r '.lint_output // ""') +build_output=$(echo "$state" | jq -r '.build_output // ""') +tests_output=$(echo "$state" | jq -r '.tests_output // ""') + +if (( fix_attempts >= max_fix_attempts )); then + jq -nc \ + --argjson n "$fix_attempts" \ + '{ + "fix_attempts": $n, + "_next": "end_failure" + }' + exit 0 +fi + +next_attempts=$((fix_attempts + 1)) + +if [[ "$lint_ok" != "true" ]]; then + stage="lint" + output="$lint_output" +elif [[ "$build_ok" != "true" ]]; then + stage="build" + output="$build_output" +elif [[ "$tests_ok" != "true" ]]; then + stage="full test suite" + output="$tests_output" +else + stage="verification" + output="fix_loop_gate was reached but no failing stage was recorded. Re-run verification." +fi + +fix_instructions=$(printf '## Fix loop status (step-level attempt %d of %d)\n\nThe implementation passed the coder'"'"'s internal checks but failed step-level verification at the %s stage.\n\nOutput:\n```\n%s\n```\n\nIdentify the minimal fix and apply it. Do not refactor. Regressions in untouched code caused by this change are in scope.' \ + "$next_attempts" "$max_fix_attempts" "$stage" "$output") + +jq -nc \ + --argjson n "$next_attempts" \ + --arg 'fi' "$fix_instructions" \ + '{ + "fix_attempts": $n, + "fix_instructions": $fi, + "lint_ok": true, + "build_ok": true, + "tests_ok": true, + "_next": "implement" + }' diff --git a/assets/agents/step-runner/scripts/resolve_step.sh b/assets/agents/step-runner/scripts/resolve_step.sh new file mode 100755 index 0000000..7551880 --- /dev/null +++ b/assets/agents/step-runner/scripts/resolve_step.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash +set -uo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +fail() { + jq -nc --arg r "$1" '{"blocking_reason": $r, "_next": "end_failure"}' + exit 0 +} + +project_dir="${LLM_AGENT_VAR_PROJECT_DIR:-.}" +project_dir=$(cd "$project_dir" 2>/dev/null && pwd) || fail "project_dir does not exist: $project_dir" + +plans_dir="${LLM_AGENT_VAR_PLANS_DIR:-plans}" +[[ "$plans_dir" != /* ]] && plans_dir="$project_dir/$plans_dir" +steps_dir="$plans_dir/steps" +handoffs_dir="$plans_dir/handoffs" +notes_path="$plans_dir/NOTES.md" + +[[ -d "$steps_dir" ]] || fail "No step plans directory at $steps_dir (expected /steps/NN-.md)" + +frontmatter() { + awk '/^---[[:space:]]*$/{n++; next} n==1{print} n>=2{exit}' "$1" +} + +fm_value() { + echo "$1" | grep -E "^$2:" | head -1 | sed -E "s/^$2:[[:space:]]*//" | sed -E 's/^["'"'"']|["'"'"']$//g' +} + +step="${LLM_AGENT_VAR_STEP:-next}" +if [[ "$step" == "next" ]]; then + prompt_step=$(echo "$state" | jq -r '.initial_prompt // ""' | grep -oiE 'step[[:space:]#:]*[0-9]+' | head -1 | grep -oE '[0-9]+' || true) + [[ -n "$prompt_step" ]] && step="$prompt_step" +fi + +plan_file="" +if [[ "$step" == "next" ]]; then + first_pending="" + while IFS= read -r f; do + st=$(fm_value "$(frontmatter "$f")" "status") + if [[ "$st" == "in-progress" ]]; then + plan_file="$f" + break + fi + [[ -z "$first_pending" && ( "$st" == "pending" || -z "$st" ) ]] && first_pending="$f" + done < <(find "$steps_dir" -maxdepth 1 -name '*.md' | sort) + [[ -z "$plan_file" ]] && plan_file="$first_pending" + [[ -z "$plan_file" ]] && fail "No in-progress or pending step plans in $steps_dir" +else + [[ "$step" =~ ^[0-9]+$ ]] || fail "step must be a number or 'next'; got: $step" + padded=$(printf '%02d' "$((10#$step))") + plan_file=$(find "$steps_dir" -maxdepth 1 \( -name "${padded}-*.md" -o -name "${step}-*.md" \) | sort | head -1) + [[ -n "$plan_file" ]] || fail "No step plan matching step $step in $steps_dir" +fi + +bn=$(basename "$plan_file" .md) +num_part="${bn%%-*}" +[[ "$num_part" =~ ^[0-9]+$ ]] || fail "Step plan filename must start with a number: $bn" +step_number=$((10#$num_part)) +step_slug="${bn#*-}" + +fm=$(frontmatter "$plan_file") +step_title=$(fm_value "$fm" "title") +[[ -z "$step_title" ]] && step_title="$step_slug" + +deps=$(echo "$fm" | awk '/^depends_on:/{f=1; print; next} f && /^[[:space:]]*-/{print; next} f{exit}' | grep -oE '[0-9]+' || true) +unsatisfied="" +for dep in $deps; do + dep_padded=$(printf '%02d' "$((10#$dep))") + dep_handoff=$(find "$handoffs_dir" -maxdepth 1 \( -name "${dep_padded}-*.md" -o -name "${dep}-*.md" \) 2>/dev/null | sort | head -1) + if [[ -z "$dep_handoff" ]]; then + unsatisfied+="- step $dep: no handoff found (step not executed?)"$'\n' + continue + fi + dep_result=$(fm_value "$(frontmatter "$dep_handoff")" "result") + if [[ "$dep_result" != "complete" ]]; then + unsatisfied+="- step $dep: handoff result is '$dep_result' (not complete): $dep_handoff"$'\n' + fi +done + +prev_handoff_path="(none)" +prev_handoff="(none - this is the first step)" +prev_file="" +prev_num=0 +while IFS= read -r h; do + hn="${h##*/}" + hn="${hn%%-*}" + [[ "$hn" =~ ^[0-9]+$ ]] || continue + n=$((10#$hn)) + if (( n < step_number && n >= prev_num )); then + prev_num=$n + prev_file="$h" + fi +done < <(find "$handoffs_dir" -maxdepth 1 -name '*.md' 2>/dev/null | sort) +if [[ -n "$prev_file" ]]; then + prev_handoff_path="$prev_file" + prev_handoff=$(head -c 16000 "$prev_file") +fi + +notes="(none)" +[[ -f "$notes_path" ]] && notes=$(head -c 8000 "$notes_path") + +step_plan=$(head -c 24000 "$plan_file") +handoff_path="$handoffs_dir/$(basename "$plan_file")" + +tmp=$(mktemp) +awk 'BEGIN{n=0} /^---[[:space:]]*$/{n++; print; next} n==1 && /^status:/{print "status: in-progress"; next} {print}' "$plan_file" > "$tmp" && mv "$tmp" "$plan_file" + +next_node="orient" +blocking_reason="" +if [[ -n "$unsatisfied" ]]; then + next_node="gate_blocked" + blocking_reason="Unsatisfied dependencies:"$'\n'"$unsatisfied" +fi + +jq -nc \ + --arg pd "$project_dir" \ + --arg pl "$plans_dir" \ + --argjson sn "$step_number" \ + --arg ss "$step_slug" \ + --arg st "$step_title" \ + --arg spp "$plan_file" \ + --arg sp "$step_plan" \ + --arg php "$prev_handoff_path" \ + --arg ph "$prev_handoff" \ + --arg np "$notes_path" \ + --arg no "$notes" \ + --arg hp "$handoff_path" \ + --arg br "$blocking_reason" \ + --arg nx "$next_node" \ + '{ + "project_dir": $pd, + "plans_dir": $pl, + "step_number": $sn, + "step_slug": $ss, + "step_title": $st, + "step_plan_path": $spp, + "step_plan": $sp, + "prev_handoff_path": $php, + "prev_handoff": $ph, + "notes_path": $np, + "notes": $no, + "handoff_path": $hp, + "blocking_reason": $br, + "_next": $nx + }' diff --git a/assets/agents/step-runner/scripts/revise_from_choice.sh b/assets/agents/step-runner/scripts/revise_from_choice.sh new file mode 100755 index 0000000..9083a6a --- /dev/null +++ b/assets/agents/step-runner/scripts/revise_from_choice.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +feedback=$(echo "$state" | jq -r '.user_feedback // ""') + +if [[ -z "$feedback" ]]; then + jq -nc '{"_next": "get_revision"}' + exit 0 +fi + +fix_instructions=$(printf '## Revision requested by the user at the step approval gate\n\nAddress these comments with minimal edits, then the step re-verifies and the handoff is rewritten:\n\n%s' \ + "$feedback") + +jq -nc \ + --arg 'fi' "$fix_instructions" \ + '{ + "fix_instructions": $fi, + "_next": "implement" + }' diff --git a/assets/agents/step-runner/scripts/route_coder_result.sh b/assets/agents/step-runner/scripts/route_coder_result.sh new file mode 100755 index 0000000..0d57225 --- /dev/null +++ b/assets/agents/step-runner/scripts/route_coder_result.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +coder_result=$(echo "$state" | jq -r '.coder_result // ""') + +case "$coder_result" in + *CODER_COMPLETE*) + jq -nc '{"_next": "verify_format_lint"}' + ;; + *CODER_REJECTED*) + jq -nc '{"_next": "end_rejected"}' + ;; + *CODER_FAILED*) + jq -nc '{"blocking_reason": "coder fix-loop exhausted; see coder result", "_next": "end_failure"}' + ;; + *) + jq -nc '{"blocking_reason": "coder returned no recognizable sentinel (expected CODER_COMPLETE / CODER_REJECTED / CODER_FAILED)", "_next": "end_failure"}' + ;; +esac diff --git a/assets/agents/step-runner/scripts/route_review.sh b/assets/agents/step-runner/scripts/route_review.sh new file mode 100755 index 0000000..eb6b2b9 --- /dev/null +++ b/assets/agents/step-runner/scripts/route_review.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +review_report=$(echo "$state" | jq -r '.review_report // ""') +review_attempts=$(echo "$state" | jq -r '.review_attempts // 0') +max_review_attempts=$(echo "$state" | jq -r '.max_review_attempts // 1') + +if ! grep -qF "🔴" <<< "$review_report"; then + jq -nc '{"_next": "write_handoff"}' + exit 0 +fi + +if (( review_attempts >= max_review_attempts )); then + jq -nc '{"_next": "write_handoff"}' + exit 0 +fi + +next_review=$((review_attempts + 1)) +fix_instructions=$(printf '## Independent review findings (attempt %d of %d)\n\nAn independent reviewer flagged CRITICAL (🔴) findings. Address ONLY the 🔴 findings with minimal edits. Do not refactor unrelated code.\n\n%s' \ + "$next_review" "$max_review_attempts" "$review_report") + +jq -nc \ + --argjson n "$next_review" \ + --arg 'fi' "$fix_instructions" \ + '{ + "review_attempts": $n, + "fix_instructions": $fi, + "needs_independent_review": false, + "_next": "implement" + }' diff --git a/assets/agents/step-runner/scripts/route_staleness.sh b/assets/agents/step-runner/scripts/route_staleness.sh new file mode 100755 index 0000000..33bcdb4 --- /dev/null +++ b/assets/agents/step-runner/scripts/route_staleness.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +has_major=$(echo "$state" | jq -r '.has_major_deviation // false') + +if [[ "${STEP_AUTOAPPROVE:-0}" == "1" ]]; then + jq -nc '{"_next": "implement"}' + exit 0 +fi + +if [[ "$has_major" == "true" ]]; then + jq -nc '{"_next": "gate_deviation"}' +else + jq -nc '{"_next": "implement"}' +fi diff --git a/assets/agents/step-runner/scripts/route_sweep.sh b/assets/agents/step-runner/scripts/route_sweep.sh new file mode 100755 index 0000000..b7b5d84 --- /dev/null +++ b/assets/agents/step-runner/scripts/route_sweep.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +needs_review=$(echo "$state" | jq -r '.needs_independent_review // false') + +if [[ "${STEP_SKIP_REVIEW:-0}" == "1" ]]; then + jq -nc '{"_next": "write_handoff"}' + exit 0 +fi + +if [[ "$needs_review" == "true" ]]; then + jq -nc '{"_next": "independent_review"}' +else + jq -nc '{"_next": "write_handoff"}' +fi diff --git a/assets/agents/step-runner/scripts/verify_build.sh b/assets/agents/step-runner/scripts/verify_build.sh new file mode 100755 index 0000000..23704f3 --- /dev/null +++ b/assets/agents/step-runner/scripts/verify_build.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -uo pipefail + +# shellcheck disable=SC1091 +source "$(dirname "$0")/../../.shared/utils.sh" + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +project_dir=$(echo "$state" | jq -r '.project_dir // "."') + +if [[ -n "${BUILD_CMD:-}" ]]; then + cmd="$BUILD_CMD" +else + project_info=$(detect_project "$project_dir") + cmd=$(echo "$project_info" | jq -r '.check // .build // ""') +fi + +if [[ -z "$cmd" || "$cmd" == "null" ]]; then + jq -nc '{ + "build_ok": true, + "build_output": "(no build/check command available for this project type)", + "_next": "verify_tests" + }' + exit 0 +fi + +exit_code=0 +output=$(cd "$project_dir" && eval "$cmd" 2>&1) || exit_code=$? + +if (( exit_code == 0 )); then + jq -nc \ + --arg out "Ran: $cmd + +$output" \ + '{ + "build_ok": true, + "build_output": $out, + "_next": "verify_tests" + }' +else + jq -nc \ + --arg out "Ran: $cmd +Exit code: $exit_code + +$output" \ + '{ + "build_ok": false, + "build_output": $out, + "_next": "fix_loop_gate" + }' +fi diff --git a/assets/agents/step-runner/scripts/verify_format_lint.sh b/assets/agents/step-runner/scripts/verify_format_lint.sh new file mode 100755 index 0000000..c20e2f3 --- /dev/null +++ b/assets/agents/step-runner/scripts/verify_format_lint.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +set -uo pipefail + +# shellcheck disable=SC1091 +source "$(dirname "$0")/../../.shared/utils.sh" + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +project_dir=$(echo "$state" | jq -r '.project_dir // "."') +project_type=$(detect_project "$project_dir" | jq -r '.type // "unknown"') + +format_cmd="${FORMAT_CMD:-}" +if [[ -z "$format_cmd" ]]; then + case "$project_type" in + rust) format_cmd="cargo fmt" ;; + go) format_cmd="gofmt -w ." ;; + python) command -v ruff &>/dev/null && format_cmd="ruff format ." ;; + esac +fi + +if [[ -z "$format_cmd" ]]; then + format_output="(no format command configured for project type '$project_type'; skipped. Set FORMAT_CMD to enable.)" +else + fmt_rc=0 + fmt_out=$(cd "$project_dir" && eval "$format_cmd" 2>&1) || fmt_rc=$? + format_output="Ran: $format_cmd +Exit code: $fmt_rc + +$fmt_out" +fi + +lint_cmd="${LINT_CMD:-}" +if [[ -z "$lint_cmd" ]]; then + jq -nc \ + --arg fo "$format_output" \ + '{ + "format_output": $fo, + "lint_ok": true, + "lint_output": "(no LINT_CMD configured; linting is covered by the build/check command)", + "_next": "verify_build" + }' + exit 0 +fi + +lint_rc=0 +lint_out=$(cd "$project_dir" && eval "$lint_cmd" 2>&1) || lint_rc=$? + +if (( lint_rc == 0 )); then + jq -nc \ + --arg fo "$format_output" \ + --arg lo "Ran: $lint_cmd + +$lint_out" \ + '{ + "format_output": $fo, + "lint_ok": true, + "lint_output": $lo, + "_next": "verify_build" + }' +else + jq -nc \ + --arg fo "$format_output" \ + --arg lo "Ran: $lint_cmd +Exit code: $lint_rc + +$lint_out" \ + '{ + "format_output": $fo, + "lint_ok": false, + "lint_output": $lo, + "_next": "fix_loop_gate" + }' +fi diff --git a/assets/agents/step-runner/scripts/verify_tests.sh b/assets/agents/step-runner/scripts/verify_tests.sh new file mode 100755 index 0000000..481e126 --- /dev/null +++ b/assets/agents/step-runner/scripts/verify_tests.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -uo pipefail + +# shellcheck disable=SC1091 +source "$(dirname "$0")/../../.shared/utils.sh" + +if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then + state=$(cat "$GRAPH_STATE_FILE") +elif [[ -n "${GRAPH_STATE:-}" ]]; then + state="$GRAPH_STATE" +else + state='{}' +fi + +project_dir=$(echo "$state" | jq -r '.project_dir // "."') + +if [[ -n "${TEST_CMD:-}" ]]; then + cmd="$TEST_CMD" +else + project_info=$(detect_project "$project_dir") + cmd=$(echo "$project_info" | jq -r '.test // ""') +fi + +if [[ -z "$cmd" || "$cmd" == "null" ]]; then + jq -nc '{ + "tests_ok": true, + "tests_output": "(no test command available for this project type)", + "_next": "edge_case_sweep" + }' + exit 0 +fi + +exit_code=0 +output=$(cd "$project_dir" && eval "$cmd" 2>&1) || exit_code=$? + +if (( exit_code == 0 )); then + jq -nc \ + --arg out "Ran: $cmd + +$output" \ + '{ + "tests_ok": true, + "tests_output": $out, + "_next": "edge_case_sweep" + }' +else + jq -nc \ + --arg out "Ran: $cmd +Exit code: $exit_code + +$output" \ + '{ + "tests_ok": false, + "tests_output": $out, + "_next": "fix_loop_gate" + }' +fi diff --git a/src/config/request_context.rs b/src/config/request_context.rs index 7410a15..320e644 100644 --- a/src/config/request_context.rs +++ b/src/config/request_context.rs @@ -5116,6 +5116,45 @@ mod tests { assert!(paths::skill_file("frontend-ui-ux").exists()); } + #[test] + #[serial] + fn bundled_graph_agents_parse_and_validate() { + use crate::graph::GraphParser; + use crate::graph::validator::GraphValidator; + + let _guard = TestConfigDirGuard::new(); + + Agent::install_builtin_agents(false).unwrap(); + Skill::install_builtin_skills(false).unwrap(); + + let mut checked = Vec::new(); + for entry in std::fs::read_dir(paths::agents_data_dir()).unwrap() { + let dir = entry.unwrap().path(); + let graph_path = dir.join("graph.yaml"); + if !graph_path.exists() { + continue; + } + let name = dir.file_name().unwrap().to_string_lossy().to_string(); + let graph = GraphParser::new(&dir) + .load_from_file(&graph_path) + .unwrap_or_else(|e| panic!("graph.yaml for '{name}' failed to parse: {e}")); + let result = GraphValidator::new(&dir).validate(&graph); + assert!( + result.errors.is_empty(), + "graph.yaml for '{name}' failed validation: {:#?}", + result.errors + ); + checked.push(name); + } + checked.sort(); + for expected in ["coder", "librarian", "step-runner"] { + assert!( + checked.iter().any(|n| n == expected), + "expected bundled graph agent '{expected}' to be checked; found {checked:?}" + ); + } + } + #[test] #[serial] fn install_functions_force_preserves_user_mcp_json() {