# Graph-based agent definition (full-featured reference) # Location: /agents//graph.yaml # # A graph agent is defined by this file alone. An agent directory contains # either a config.yaml (a normal LLM-loop agent) or a graph.yaml (a graph # agent), never both. The presence of graph.yaml is what makes the agent # a graph agent. # # This file is a reference: it documents every available field, themed # around a deep web research workflow with parallel retrieval. It is not # a runnable agent as-is. The `agent:`, `script:`, and `documents:` values # point at things that would need to exist for a real agent. For a real, # runnable deep-research graph agent, see assets/agents/deep-research/. # # Full documentation: # https://github.com/Dark-Alex-17/coyote/wiki/Graph-Agents # --------------------------------------------------------------------------- # Identity # --------------------------------------------------------------------------- name: deep-research-example # Agent name (should match the directory name) description: | # Free-form prose describing the workflow A reference workflow: triage a research request, retrieve local context, branch on a script decision, run either a sub-agent or an LLM research step, then gate the result behind human approval. version: "1.0" # Graph schema version. Only "1.0" is accepted. # --------------------------------------------------------------------------- # Agent-level config (all optional) # The same knobs a normal agent's config.yaml carries. In a graph agent they # live here instead of in a config.yaml. # --------------------------------------------------------------------------- model: claude:claude-sonnet-4-6 # Default model for `llm` nodes that don't override it temperature: 0.0 # Default sampling temperature for `llm` nodes top_p: null # Default sampling top-p for `llm` nodes global_tools: # Tool universe an `llm` node's `tools:` whitelist draws from - web_search_coyote.sh - fetch_url_via_curl.sh mcp_servers: # MCP servers an `llm` node may reference via `mcp:` - ddg-search conversation_starters: # Suggested prompts surfaced in the UI - "Research the current state of WebAssembly outside the browser" # --------------------------------------------------------------------------- # Agent variables (optional) # Declared the same way as a normal agent's config.yaml `variables:` block. # Each variable becomes available to: # - LLM nodes via the template form `{{name}}` once seeded into state # (see initial_state below). # - Script nodes via the env var `LLM_AGENT_VAR_`. # Values may be overridden at runtime with # `coyote -a --agent-variable "..."`. # --------------------------------------------------------------------------- variables: - name: project_dir description: | Absolute path to the project directory. default: "." # --------------------------------------------------------------------------- # Execution settings (all optional) # --------------------------------------------------------------------------- settings: max_loop_iterations: 100 # Per-node visit cap. If one node id is entered more # than this many times, execution aborts. Default 100. timeout: 600 # Optional wall-clock cap (seconds) on the whole run, # checked between node transitions. log_state_snapshots: true # Log state before each node (debug/trace). Default true. validate_before_run: true # Run the graph validator at startup. Default true. max_concurrency: 4 # Cap on simultaneously running branches in any # super-step (static fan-out OR a `map` node). # Default 4. Per-`map` overrides this. See Parallel # Execution below. # --------------------------------------------------------------------------- # Reducers (optional, required whenever two parallel branches write the same # state key in the same super-step; otherwise the validator errors at load). # # A reducer says how two values for the same key get merged. Built-ins: # append list += [value] (single value appended to a list) # extend list += value (a list) (list-of-lists flattened by one level) # concat "a\nb" (string join with newline separator) # sum a + b (numeric add; ints stay ints) # max max(a, b) # min min(a, b) # merge {**a, **b} (dict union, RHS wins on key collision) # overwrite last-write-wins (explicit opt-in; B's value replaces A's) # # Keys not listed here have an implicit "single writer per super-step" rule: # the validator rejects any graph where two parallel branches both write a # key with no reducer. # --------------------------------------------------------------------------- reducers: sources: append # The diamond below writes `sources` from both # branches; append accumulates them into a list. context: concat # Each branch contributes prose; concat joins them. # --------------------------------------------------------------------------- # Seed state (optional) # Values placed into graph state before any node runs; reference anywhere via # {{key}}. # # Note: `initial_prompt` is seeded automatically by Coyote with the # caller's prompt. So there's no need to set it here. # --------------------------------------------------------------------------- initial_state: audience: "general reader" # Seed an empty default for any key that a strict field (a node prompt / # instructions / question / End output) references but that is only set on # some paths. `refinement` is set only if the `refine` input node runs; # seeding it "" keeps `finalize`'s strict prompt from failing on the # approve-directly path. refinement: "" # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- start: triage # ID of the first node to run (must exist in `nodes`) # --------------------------------------------------------------------------- # Nodes # Each node is keyed by its id. The `id:` inside a node must match its key # (it may also be omitted and thus Coyote fills it in from the key). # # Node types: agent | script | approval | input | llm | rag | map | end # --------------------------------------------------------------------------- nodes: # --- llm node ----------------------------------------------------------- # A one-shot LLM call (with an optional bounded tool-call loop). Runs in a # fresh isolated context. Tools are strictly opt-in (see `tools`). triage: id: triage type: llm description: Classify the research request and extract its topic. instructions: | # Optional system prompt (templated against state) You triage research requests for a {{audience}} audience. prompt: | # Required user prompt (templated against state) Classify this request and extract the core research topic: {{initial_prompt}} tools: [] # Tool whitelist. Omitted or [] = no tools at all. # A list narrows to exactly those entries. output_schema: # Optional JSON Schema. The output is parsed to JSON type: object # and its top-level object keys auto-merge into state properties: # (so `topic` / `needs_deep_dive` become {{topic}} etc). topic: { type: string } needs_deep_dive: { type: boolean } required: [topic, needs_deep_dive] state_updates: # {{output}} = this node's result (here, the parsed object) triage_result: "{{output}}" # --- Polymorphic `next` ----------------------------------------------- # A single string runs the next node sequentially (e.g. `next: retrieve`). # A list runs all listed nodes in parallel as one BSP super-step # (for more info on BSP, see https://en.wikipedia.org/wiki/Bulk_synchronous_parallel). # Their writes are merged via `reducers:` at the join. Branches converge # implicitly when they all route to the same downstream node (here, # `synthesize`). See the diamond: # # triage # / \ # retrieve web_search (run concurrently) # \ / # synthesize (join; fires once after both finish) next: [retrieve, web_search] # --- rag node (parallel branch 1 of the diamond) ------------------------ # Hybrid (vector + keyword) retrieval against a per-node knowledge base. # The knowledge base is built once, at agent load time, into # /retrieve.yaml (named after this node's id). retrieve: id: retrieve type: rag documents: # Required. Files, directories, URLs, loader paths. - ./knowledge/ # relative paths resolve against the agent directory - https://example.com/reference query: "{{topic}}" # Retrieval query (templated). Default: {{initial_prompt}}. top_k: 5 # Chunks to retrieve. Default = the KB's own top_k. timeout: 120 # Retrieval timeout in seconds. Default 120. # Knowledge-base build config (optional; used only when the KB is first # built). When embedding_model + chunk_size + chunk_overlap are all set, # the KB builds with no interactive prompts (works in non-interactive runs). embedding_model: openai:text-embedding-3-small chunk_size: 1000 chunk_overlap: 100 reranker_model: null # Optional reranker for hybrid-search results batch_size: 100 # Optional embedding-request batch size state_updates: # {{output}} = { context: , sources: [, ...] } context: "{{output.context}}" # writes `context` -> `reducers.context = concat` sources: "{{output.sources}}" # writes `sources` -> `reducers.sources = append` next: synthesize # Joins with web_search at `synthesize`. # --- llm node (parallel branch 2 of the diamond) ------------------------ # Runs concurrently with `retrieve`. Both branches write `context` and # `sources`; the validator confirms both keys have a reducer declared, and # the BSP scheduler merges them at the join. web_search: id: web_search type: llm instructions: "You are a web researcher. Cite every claim." prompt: "Web research: {{topic}}. Return findings and sources." tools: - web_search_coyote - mcp:ddg-search output_schema: type: object properties: context: { type: string } sources: type: array items: { type: string } required: [context, sources] # When `output_schema` is set, top-level keys auto-merge into state, so # `context` and `sources` are produced without needing `state_updates`. next: synthesize # Joins with retrieve at `synthesize`. # --- script node (the diamond's join; also dispatches) ----------------- # Runs a .sh / .py / .ts script. The script receives state via the # GRAPH_STATE env var (inline JSON) or GRAPH_STATE_FILE (path to a JSON # file, used when state exceeds 32 KiB). Exactly one is set. It must print # a single JSON object on stdout: keys merge into state, and the reserved # `_next` key (if present) overrides routing. # # The script also receives these env vars (parity with bash tools called # from normal agents): # GRAPH_STATE / GRAPH_STATE_FILE state payload (one of the two is set) # LLM_ROOT_DIR coyote config dir # LLM_PROMPT_UTILS_FILE path to .shared/prompt-utils.sh # LLM_AGENT_DATA_DIR this agent's data directory # LLM_AGENT_VAR_ one per declared `variables:` entry # PATH with coyote's functions bin dir prepended # CLICOLOR_FORCE / FORCE_COLOR so child tools emit ANSI colors # The script's working directory is coyote's invocation CWD (not the agent # directory), matching the behavior of bash tools. # # This node fires once: after both `retrieve` and `web_search` finish. # The BSP scheduler dedups the two incoming edges into a single frontier # entry, applies the staged branch writes through the reducers, then runs # this node against the merged state. Inside the script, `context` is the # concatenated text of both branches and `sources` is the combined list. synthesize: id: synthesize type: script script: scripts/synthesize.py # Path relative to the agent directory timeout: 30 # Seconds. Default 30. state_updates: # Applied after the stdout JSON is merged decided_for: "{{topic}}" next: summarize # Default route if the script emits no `_next` fallback: summarize # Route taken if the script fails (crash / bad JSON) # This script is expected to emit `_next: deep_dive` (or `_next: subjects_map` # to demonstrate the map node below), or no `_next` (then `next` is used). # Targets reached only via the script's dynamic `_next` get an # "unreachable" warning from the validator. This is expected for `_next`-routed # targets. # --- agent node --------------------------------------------------------- # Spawns a full Coyote sub-agent and waits for it. The child uses its own # tool stack. Agent nodes have no `tools:` field. No schema hint is # injected even when `output_schema` is set (unlike llm nodes). deep_dive: id: deep_dive type: agent agent: deep-research # Name of an existing Coyote agent to spawn prompt: | # User message sent to the child (templated) Research {{topic}} in depth. Existing context: {{context}} timeout: 600 # Optional wall-clock cap, seconds. Default 300. output_schema: # Optional. Same extraction as llm nodes type: object properties: summary: { type: string } findings: type: array items: { type: string } required: [summary, findings] state_updates: research: "{{output}}" next: review # Required for agent nodes # --- map node (Dynamic fan-out. Think: LangGraph's `Send` API) ---------------- # Spawns one parallel sub-branch per item in `over`. Each sub-branch runs # the node referenced by `branch:` with the item bound to `as:`. Outputs # collect into the array named by `collect_into:`, preserving input order. # # Reach via `synthesize`'s `_next: subjects_map`. The producer is expected # to have written a list at `subjects` (e.g. an upstream LLM node with an # `output_schema` returning {"subjects": ["a", "b", "c"]}). subjects_map: id: subjects_map type: map over: "{{subjects}}" # Required. List expression resolved from state. # Empty list is allowed. It means no branches spawn, # and thus `collect_into` is written as []. as: subject # Required. Per-branch state key holding the # current item. Read with {{subject}} inside # the branch node's prompt. branch: research_subject # Required. Node id to invoke per item. # Must point to an llm | agent | rag | script # node satisfying the map branch contract: # - no `next:` (atomic, joined at map exit) # - no `state_updates:` other than via the # map's `collect_into` channel # - no `output_schema:` (top-level merge # would clash with collect_into) # Validator enforces all three. collect_into: subject_findings # Required. State key for the array of # per-branch outputs, in input order # (not spawn-finish order). max_concurrency: 3 # Optional per-map cap. Defaults to # settings.max_concurrency above. output_key: output # Optional. State key the branch's output # appears under. Default "output". Useful # only if the branch reads its own bound # name back (rare). next: aggregate_subjects # Where to go after all sub-branches finish. # Branch node for subjects_map. Each invocation receives a different # `subject` in state. The branch is "atomic", meaning it cannot route on # its own; the surrounding `map` joins after all invocations finish. research_subject: id: research_subject type: llm instructions: "Research one subject deeply for a {{audience}} audience." prompt: "Research {{subject}}: pull the key facts and one citation." tools: - web_search_coyote # No `next:`, `state_updates:`, or `output_schema:` here. Map branches # have a strict contract (see `subjects_map.branch` comment). # Aggregator that runs after the map joins. Reads the collected list. aggregate_subjects: id: aggregate_subjects type: llm instructions: "Combine N per-subject reports into one cohesive summary." prompt: | Per-subject reports (in original input order): {{subject_findings}} state_updates: research: "{{output}}" next: review # --- llm node with a narrowed tool whitelist ---------------------------- summarize: id: summarize type: llm instructions: "You write concise research summaries for a {{audience}} audience." prompt: "Summarize the topic {{topic}}, using your tools as needed." tools: # Narrow whitelist: exactly these entries, nothing else - web_search_coyote # an exact global-tool / custom-tool name - mcp:ddg-search # `mcp:` includes that server's functions model: claude:claude-haiku-4-5 # Optional per-node model override temperature: 0.3 # Optional per-node sampling override max_attempts: 2 # Retry count on transient errors only. Default 1. max_iterations: 10 # Tool-call-loop turn cap. Default 10. fallback: review # Route here if all attempts fail timeout: 300 # Optional node wall-clock cap, seconds (unset = no timeout) state_updates: research: "{{output}}" next: review # Required for llm nodes: the success route # --- approval node ------------------------------------------------------ # Human-in-the-loop checkpoint. `user__ask` always offers a free-form # "type your own answer" option, so `on_other` is required. review: id: review type: approval question: | Proposed research result for {{topic}}: {{research}} Approve? options: # The listed choices shown to the user - "yes" - "no" routes: # Map each listed option to its next node "yes": finalize "no": rejected_end on_other: refine # Required: route for ANY answer not in `routes` state_updates: decision: "{{choice}}" # {{choice}} = the chosen option or the free-form text # --- input node --------------------------------------------------------- # Collects a free-form string from the user. refine: id: refine type: input question: "What should be changed about the research result?" default: "tighten the summary" # Optional: used if the user submits empty input. # Note: a substituted default is not re-validated, # so make sure it would satisfy `validation`. validation: "len(input) > 0" # Optional length predicate: len(input) N, # in > >= < <= == . Length only -- no regex. state_updates: refinement: "{{input}}" # {{input}} = the user's text next: finalize # Required for input nodes: the success route # --- llm node (final synthesis) ----------------------------------------- finalize: id: finalize type: llm prompt: | Produce the final research report for {{topic}}. Result so far: {{research}} Requested refinement (if any): {{refinement}} state_updates: final_report: "{{output}}" next: done # --- end nodes ---------------------------------------------------------- # Terminate the graph. `output` (templated, lenient interpolation) becomes # the graph's final result. A graph needs at least one `end` node. done: id: done type: end state_updates: # Optional: applied before `output` is rendered status: "completed" output: | [{{status}}] {{final_report}} Sources: {{sources}} rejected_end: id: rejected_end type: end output: "Research on {{topic}} was not approved."