461 lines
23 KiB
YAML
461 lines
23 KiB
YAML
# Graph-based agent definition (full-featured reference)
|
|
# Location: <coyote-config-dir>/agents/<agent-name>/graph.yaml
|
|
#
|
|
# A graph agent is defined by this file alone. An agent directory contains
|
|
# either a config.yaml (a normal LLM-loop agent) or a graph.yaml (a graph
|
|
# agent), never both. The presence of graph.yaml is what makes the agent
|
|
# a graph agent.
|
|
#
|
|
# This file is a reference: it documents every available field, themed
|
|
# around a deep web research workflow with parallel retrieval. It is not
|
|
# a runnable agent as-is. The `agent:`, `script:`, and `documents:` values
|
|
# point at things that would need to exist for a real agent. For a real,
|
|
# runnable deep-research graph agent, see assets/agents/deep-research/.
|
|
#
|
|
# Full documentation:
|
|
# https://github.com/Dark-Alex-17/coyote/wiki/Graph-Agents
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Identity
|
|
# ---------------------------------------------------------------------------
|
|
name: deep-research-example # Agent name (should match the directory name)
|
|
description: | # Free-form prose describing the workflow
|
|
A reference workflow: triage a research request, retrieve local
|
|
context, branch on a script decision, run either a sub-agent or an
|
|
LLM research step, then gate the result behind human approval.
|
|
version: "1.0" # Graph schema version. Only "1.0" is accepted.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Agent-level config (all optional)
|
|
# The same knobs a normal agent's config.yaml carries. In a graph agent they
|
|
# live here instead of in a config.yaml.
|
|
# ---------------------------------------------------------------------------
|
|
model: claude:claude-sonnet-4-6 # Default model for `llm` nodes that don't override it
|
|
temperature: 0.0 # Default sampling temperature for `llm` nodes
|
|
top_p: null # Default sampling top-p for `llm` nodes
|
|
|
|
global_tools: # Tool universe an `llm` node's `tools:` whitelist draws from
|
|
- web_search_coyote.sh
|
|
- fetch_url_via_curl.sh
|
|
|
|
mcp_servers: # MCP servers an `llm` node may reference via `mcp:<server>`
|
|
- ddg-search
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Skills policy (optional)
|
|
# Skills only attach to `llm` nodes inside a graph. Both fields are optional.
|
|
#
|
|
# skills_enabled: master switch for skills across every `llm` node in the
|
|
# graph. false here turns skills off entirely, regardless of
|
|
# per-node settings. Omitting it inherits the agent / global
|
|
# cascade (default true).
|
|
# enabled_skills: the *universe* of skill names any `llm` node in this graph
|
|
# may reference in its own `enabled_skills`. The validator
|
|
# rejects per-node entries outside this list at load time.
|
|
# Omit to inherit the agent / global cascade.
|
|
#
|
|
# Per-node usage is documented on the `triage` llm node below. There is no
|
|
# auto-load: the model uses `skill__list` / `skill__load` / `skill__unload` to
|
|
# bring skills in as it needs them, exactly like in normal-agent contexts.
|
|
# ---------------------------------------------------------------------------
|
|
skills_enabled: true
|
|
enabled_skills:
|
|
- code-review
|
|
- git-master
|
|
- ai-slop-remover
|
|
|
|
conversation_starters: # Suggested prompts surfaced in the UI
|
|
- "Research the current state of WebAssembly outside the browser"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Agent variables (optional)
|
|
# Declared the same way as a normal agent's config.yaml `variables:` block.
|
|
# Each variable becomes available to:
|
|
# - LLM nodes via the template form `{{name}}` once seeded into state
|
|
# (see initial_state below).
|
|
# - Script nodes via the env var `LLM_AGENT_VAR_<UPPER_NAME>`.
|
|
# Values may be overridden at runtime with
|
|
# `coyote -a <agent> --agent-variable <name> <value> "..."`.
|
|
# ---------------------------------------------------------------------------
|
|
variables:
|
|
- name: project_dir
|
|
description: |
|
|
Absolute path to the project directory.
|
|
default: "."
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Execution settings (all optional)
|
|
# ---------------------------------------------------------------------------
|
|
settings:
|
|
max_loop_iterations: 100 # Per-node visit cap. If one node id is entered more
|
|
# than this many times, execution aborts. Default 100.
|
|
timeout: 600 # Optional wall-clock cap (seconds) on the whole run,
|
|
# checked between node transitions.
|
|
log_state_snapshots: true # Log state before each node (debug/trace). Default true.
|
|
validate_before_run: true # Run the graph validator at startup. Default true.
|
|
max_concurrency: 4 # Cap on simultaneously running branches in any
|
|
# super-step (static fan-out OR a `map` node).
|
|
# Default 4. Per-`map` overrides this. See Parallel
|
|
# Execution below.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Reducers (optional, required whenever two parallel branches write the same
|
|
# state key in the same super-step; otherwise the validator errors at load).
|
|
#
|
|
# A reducer says how two values for the same key get merged. Built-ins:
|
|
# append list += [value] (single value appended to a list)
|
|
# extend list += value (a list) (list-of-lists flattened by one level)
|
|
# concat "a\nb" (string join with newline separator)
|
|
# sum a + b (numeric add; ints stay ints)
|
|
# max max(a, b)
|
|
# min min(a, b)
|
|
# merge {**a, **b} (dict union, RHS wins on key collision)
|
|
# overwrite last-write-wins (explicit opt-in; B's value replaces A's)
|
|
#
|
|
# Keys not listed here have an implicit "single writer per super-step" rule:
|
|
# the validator rejects any graph where two parallel branches both write a
|
|
# key with no reducer.
|
|
# ---------------------------------------------------------------------------
|
|
reducers:
|
|
sources: append # The diamond below writes `sources` from both
|
|
# branches; append accumulates them into a list.
|
|
context: concat # Each branch contributes prose; concat joins them.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Seed state (optional)
|
|
# Values placed into graph state before any node runs; reference anywhere via
|
|
# {{key}}.
|
|
#
|
|
# Note: `initial_prompt` is seeded automatically by Coyote with the
|
|
# caller's prompt. So there's no need to set it here.
|
|
# ---------------------------------------------------------------------------
|
|
initial_state:
|
|
audience: "general reader"
|
|
# Seed an empty default for any key that a strict field (a node prompt /
|
|
# instructions / question / End output) references but that is only set on
|
|
# some paths. `refinement` is set only if the `refine` input node runs;
|
|
# seeding it "" keeps `finalize`'s strict prompt from failing on the
|
|
# approve-directly path.
|
|
refinement: ""
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entry point
|
|
# ---------------------------------------------------------------------------
|
|
start: triage # ID of the first node to run (must exist in `nodes`)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Nodes
|
|
# Each node is keyed by its id. The `id:` inside a node must match its key
|
|
# (it may also be omitted and thus Coyote fills it in from the key).
|
|
#
|
|
# Node types: agent | script | approval | input | llm | rag | map | end
|
|
# ---------------------------------------------------------------------------
|
|
nodes:
|
|
|
|
# --- llm node -----------------------------------------------------------
|
|
# A one-shot LLM call (with an optional bounded tool-call loop). Runs in a
|
|
# fresh isolated context. Tools are strictly opt-in (see `tools`).
|
|
triage:
|
|
id: triage
|
|
type: llm
|
|
description: Classify the research request and extract its topic.
|
|
instructions: | # Optional system prompt (templated against state)
|
|
You triage research requests for a {{audience}} audience.
|
|
prompt: | # Required user prompt (templated against state)
|
|
Classify this request and extract the core research topic:
|
|
{{initial_prompt}}
|
|
tools: [] # Tool whitelist. Omitted or [] = no tools at all.
|
|
# A list narrows to exactly those entries.
|
|
# --- Skills on llm nodes (optional) ------------------------------------
|
|
# `enabled_skills` narrows what this node's model can see / load via the
|
|
# built-in `skill__list` / `skill__load` / `skill__unload` meta-tools.
|
|
# Must be a subset of the graph-level `enabled_skills` (the validator
|
|
# catches violations at load time). `skills_enabled: false` would
|
|
# disable skills entirely for this node (no meta-tools exposed).
|
|
# Nothing is auto-loaded: the model decides when to load a skill.
|
|
skills_enabled: true # Whether skills are enabled on this llm node; defaults to 'true'
|
|
enabled_skills:
|
|
- ai-slop-remover
|
|
output_schema: # Optional JSON Schema. The output is parsed to JSON
|
|
type: object # and its top-level object keys auto-merge into state
|
|
properties: # (so `topic` / `needs_deep_dive` become {{topic}} etc).
|
|
topic: { type: string }
|
|
needs_deep_dive: { type: boolean }
|
|
required: [topic, needs_deep_dive]
|
|
state_updates: # {{output}} = this node's result (here, the parsed object)
|
|
triage_result: "{{output}}"
|
|
# --- Polymorphic `next` -----------------------------------------------
|
|
# A single string runs the next node sequentially (e.g. `next: retrieve`).
|
|
# A list runs all listed nodes in parallel as one BSP super-step
|
|
# (for more info on BSP, see https://en.wikipedia.org/wiki/Bulk_synchronous_parallel).
|
|
# Their writes are merged via `reducers:` at the join. Branches converge
|
|
# implicitly when they all route to the same downstream node (here,
|
|
# `synthesize`). See the diamond:
|
|
#
|
|
# triage
|
|
# / \
|
|
# retrieve web_search (run concurrently)
|
|
# \ /
|
|
# synthesize (join; fires once after both finish)
|
|
next: [retrieve, web_search]
|
|
|
|
# --- rag node (parallel branch 1 of the diamond) ------------------------
|
|
# Hybrid (vector + keyword) retrieval against a per-node knowledge base.
|
|
# The knowledge base is built once, at agent load time, into
|
|
# <agent-dir>/retrieve.yaml (named after this node's id).
|
|
retrieve:
|
|
id: retrieve
|
|
type: rag
|
|
documents: # Required. Files, directories, URLs, loader paths.
|
|
- ./knowledge/ # relative paths resolve against the agent directory
|
|
- https://example.com/reference
|
|
query: "{{topic}}" # Retrieval query (templated). Default: {{initial_prompt}}.
|
|
top_k: 5 # Chunks to retrieve. Default = the KB's own top_k.
|
|
timeout: 120 # Retrieval timeout in seconds. Default 120.
|
|
# Knowledge-base build config (optional; used only when the KB is first
|
|
# built). When embedding_model + chunk_size + chunk_overlap are all set,
|
|
# the KB builds with no interactive prompts (works in non-interactive runs).
|
|
embedding_model: openai:text-embedding-3-small
|
|
chunk_size: 1000
|
|
chunk_overlap: 100
|
|
reranker_model: null # Optional reranker for hybrid-search results
|
|
batch_size: 100 # Optional embedding-request batch size
|
|
state_updates: # {{output}} = { context: <str>, sources: [<path>, ...] }
|
|
context: "{{output.context}}" # writes `context` -> `reducers.context = concat`
|
|
sources: "{{output.sources}}" # writes `sources` -> `reducers.sources = append`
|
|
next: synthesize # Joins with web_search at `synthesize`.
|
|
|
|
# --- llm node (parallel branch 2 of the diamond) ------------------------
|
|
# Runs concurrently with `retrieve`. Both branches write `context` and
|
|
# `sources`; the validator confirms both keys have a reducer declared, and
|
|
# the BSP scheduler merges them at the join.
|
|
web_search:
|
|
id: web_search
|
|
type: llm
|
|
instructions: "You are a web researcher. Cite every claim."
|
|
prompt: "Web research: {{topic}}. Return findings and sources."
|
|
tools:
|
|
- web_search_coyote
|
|
- mcp:ddg-search
|
|
output_schema:
|
|
type: object
|
|
properties:
|
|
context: { type: string }
|
|
sources:
|
|
type: array
|
|
items: { type: string }
|
|
required: [context, sources]
|
|
# When `output_schema` is set, top-level keys auto-merge into state, so
|
|
# `context` and `sources` are produced without needing `state_updates`.
|
|
next: synthesize # Joins with retrieve at `synthesize`.
|
|
|
|
# --- script node (the diamond's join; also dispatches) -----------------
|
|
# Runs a .sh / .py / .ts script. The script receives state via the
|
|
# GRAPH_STATE env var (inline JSON) or GRAPH_STATE_FILE (path to a JSON
|
|
# file, used when state exceeds 32 KiB). Exactly one is set. It must print
|
|
# a single JSON object on stdout: keys merge into state, and the reserved
|
|
# `_next` key (if present) overrides routing.
|
|
#
|
|
# The script also receives these env vars (parity with bash tools called
|
|
# from normal agents):
|
|
# GRAPH_STATE / GRAPH_STATE_FILE state payload (one of the two is set)
|
|
# LLM_ROOT_DIR coyote config dir
|
|
# LLM_PROMPT_UTILS_FILE path to .shared/prompt-utils.sh
|
|
# LLM_AGENT_DATA_DIR this agent's data directory
|
|
# LLM_AGENT_VAR_<NAME> one per declared `variables:` entry
|
|
# PATH with coyote's functions bin dir prepended
|
|
# CLICOLOR_FORCE / FORCE_COLOR so child tools emit ANSI colors
|
|
# The script's working directory is coyote's invocation CWD (not the agent
|
|
# directory), matching the behavior of bash tools.
|
|
#
|
|
# This node fires once: after both `retrieve` and `web_search` finish.
|
|
# The BSP scheduler dedups the two incoming edges into a single frontier
|
|
# entry, applies the staged branch writes through the reducers, then runs
|
|
# this node against the merged state. Inside the script, `context` is the
|
|
# concatenated text of both branches and `sources` is the combined list.
|
|
synthesize:
|
|
id: synthesize
|
|
type: script
|
|
script: scripts/synthesize.py # Path relative to the agent directory
|
|
timeout: 30 # Seconds. Default 30.
|
|
state_updates: # Applied after the stdout JSON is merged
|
|
decided_for: "{{topic}}"
|
|
next: summarize # Default route if the script emits no `_next`
|
|
fallback: summarize # Route taken if the script fails (crash / bad JSON)
|
|
# This script is expected to emit `_next: deep_dive` (or `_next: subjects_map`
|
|
# to demonstrate the map node below), or no `_next` (then `next` is used).
|
|
# Targets reached only via the script's dynamic `_next` get an
|
|
# "unreachable" warning from the validator. This is expected for `_next`-routed
|
|
# targets.
|
|
|
|
# --- agent node ---------------------------------------------------------
|
|
# Spawns a full Coyote sub-agent and waits for it. The child uses its own
|
|
# tool stack. Agent nodes have no `tools:` field. No schema hint is
|
|
# injected even when `output_schema` is set (unlike llm nodes).
|
|
deep_dive:
|
|
id: deep_dive
|
|
type: agent
|
|
agent: deep-research # Name of an existing Coyote agent to spawn
|
|
prompt: | # User message sent to the child (templated)
|
|
Research {{topic}} in depth. Existing context:
|
|
{{context}}
|
|
timeout: 600 # Optional wall-clock cap, seconds. Default 300.
|
|
output_schema: # Optional. Same extraction as llm nodes
|
|
type: object
|
|
properties:
|
|
summary: { type: string }
|
|
findings:
|
|
type: array
|
|
items: { type: string }
|
|
required: [summary, findings]
|
|
state_updates:
|
|
research: "{{output}}"
|
|
next: review # Required for agent nodes
|
|
|
|
# --- map node (Dynamic fan-out. Think: LangGraph's `Send` API) ----------------
|
|
# Spawns one parallel sub-branch per item in `over`. Each sub-branch runs
|
|
# the node referenced by `branch:` with the item bound to `as:`. Outputs
|
|
# collect into the array named by `collect_into:`, preserving input order.
|
|
#
|
|
# Reach via `synthesize`'s `_next: subjects_map`. The producer is expected
|
|
# to have written a list at `subjects` (e.g. an upstream LLM node with an
|
|
# `output_schema` returning {"subjects": ["a", "b", "c"]}).
|
|
subjects_map:
|
|
id: subjects_map
|
|
type: map
|
|
over: "{{subjects}}" # Required. List expression resolved from state.
|
|
# Empty list is allowed. It means no branches spawn,
|
|
# and thus `collect_into` is written as [].
|
|
as: subject # Required. Per-branch state key holding the
|
|
# current item. Read with {{subject}} inside
|
|
# the branch node's prompt.
|
|
branch: research_subject # Required. Node id to invoke per item.
|
|
# Must point to an llm | agent | rag | script
|
|
# node satisfying the map branch contract:
|
|
# - no `next:` (atomic, joined at map exit)
|
|
# - no `state_updates:` other than via the
|
|
# map's `collect_into` channel
|
|
# - no `output_schema:` (top-level merge
|
|
# would clash with collect_into)
|
|
# Validator enforces all three.
|
|
collect_into: subject_findings # Required. State key for the array of
|
|
# per-branch outputs, in input order
|
|
# (not spawn-finish order).
|
|
max_concurrency: 3 # Optional per-map cap. Defaults to
|
|
# settings.max_concurrency above.
|
|
output_key: output # Optional. State key the branch's output
|
|
# appears under. Default "output". Useful
|
|
# only if the branch reads its own bound
|
|
# name back (rare).
|
|
next: aggregate_subjects # Where to go after all sub-branches finish.
|
|
|
|
# Branch node for subjects_map. Each invocation receives a different
|
|
# `subject` in state. The branch is "atomic", meaning it cannot route on
|
|
# its own; the surrounding `map` joins after all invocations finish.
|
|
research_subject:
|
|
id: research_subject
|
|
type: llm
|
|
instructions: "Research one subject deeply for a {{audience}} audience."
|
|
prompt: "Research {{subject}}: pull the key facts and one citation."
|
|
tools:
|
|
- web_search_coyote
|
|
# No `next:`, `state_updates:`, or `output_schema:` here. Map branches
|
|
# have a strict contract (see `subjects_map.branch` comment).
|
|
|
|
# Aggregator that runs after the map joins. Reads the collected list.
|
|
aggregate_subjects:
|
|
id: aggregate_subjects
|
|
type: llm
|
|
instructions: "Combine N per-subject reports into one cohesive summary."
|
|
prompt: |
|
|
Per-subject reports (in original input order):
|
|
{{subject_findings}}
|
|
state_updates:
|
|
research: "{{output}}"
|
|
next: review
|
|
|
|
# --- llm node with a narrowed tool whitelist ----------------------------
|
|
summarize:
|
|
id: summarize
|
|
type: llm
|
|
instructions: "You write concise research summaries for a {{audience}} audience."
|
|
prompt: "Summarize the topic {{topic}}, using your tools as needed."
|
|
tools: # Narrow whitelist: exactly these entries, nothing else
|
|
- web_search_coyote # an exact global-tool / custom-tool name
|
|
- mcp:ddg-search # `mcp:<server>` includes that server's functions
|
|
model: claude:claude-haiku-4-5 # Optional per-node model override
|
|
temperature: 0.3 # Optional per-node sampling override
|
|
max_attempts: 2 # Retry count on transient errors only. Default 1.
|
|
max_iterations: 10 # Tool-call-loop turn cap. Default 10.
|
|
fallback: review # Route here if all attempts fail
|
|
timeout: 300 # Optional node wall-clock cap, seconds (unset = no timeout)
|
|
state_updates:
|
|
research: "{{output}}"
|
|
next: review # Required for llm nodes: the success route
|
|
|
|
# --- approval node ------------------------------------------------------
|
|
# Human-in-the-loop checkpoint. `user__ask` always offers a free-form
|
|
# "type your own answer" option, so `on_other` is required.
|
|
review:
|
|
id: review
|
|
type: approval
|
|
question: |
|
|
Proposed research result for {{topic}}:
|
|
{{research}}
|
|
|
|
Approve?
|
|
options: # The listed choices shown to the user
|
|
- "yes"
|
|
- "no"
|
|
routes: # Map each listed option to its next node
|
|
"yes": finalize
|
|
"no": rejected_end
|
|
on_other: refine # Required: route for ANY answer not in `routes`
|
|
state_updates:
|
|
decision: "{{choice}}" # {{choice}} = the chosen option or the free-form text
|
|
|
|
# --- input node ---------------------------------------------------------
|
|
# Collects a free-form string from the user.
|
|
refine:
|
|
id: refine
|
|
type: input
|
|
question: "What should be changed about the research result?"
|
|
default: "tighten the summary" # Optional: used if the user submits empty input.
|
|
# Note: a substituted default is not re-validated,
|
|
# so make sure it would satisfy `validation`.
|
|
validation: "len(input) > 0" # Optional length predicate: len(input) <op> N,
|
|
# <op> in > >= < <= == . Length only -- no regex.
|
|
state_updates:
|
|
refinement: "{{input}}" # {{input}} = the user's text
|
|
next: finalize # Required for input nodes: the success route
|
|
|
|
# --- llm node (final synthesis) -----------------------------------------
|
|
finalize:
|
|
id: finalize
|
|
type: llm
|
|
prompt: |
|
|
Produce the final research report for {{topic}}.
|
|
Result so far: {{research}}
|
|
Requested refinement (if any): {{refinement}}
|
|
state_updates:
|
|
final_report: "{{output}}"
|
|
next: done
|
|
|
|
# --- end nodes ----------------------------------------------------------
|
|
# Terminate the graph. `output` (templated, lenient interpolation) becomes
|
|
# the graph's final result. A graph needs at least one `end` node.
|
|
done:
|
|
id: done
|
|
type: end
|
|
state_updates: # Optional: applied before `output` is rendered
|
|
status: "completed"
|
|
output: |
|
|
[{{status}}] {{final_report}}
|
|
|
|
Sources: {{sources}}
|
|
|
|
rejected_end:
|
|
id: rejected_end
|
|
type: end
|
|
output: "Research on {{topic}} was not approved."
|