# Graph-based agent definition (full-featured reference)
# Location: <coyote-config-dir>/agents/<agent-name>/graph.yaml
#
# A graph agent is defined by this file alone. An agent directory contains
# either a config.yaml (a normal LLM-loop agent) or a graph.yaml (a graph
# agent), never both. The presence of graph.yaml is what makes the agent
# a graph agent.
#
# This file is a reference: it documents every available field, themed
# around a deep web research workflow with parallel retrieval. It is not
# a runnable agent as-is. The `agent:`, `script:`, and `documents:` values
# point at things that would need to exist for a real agent. For a real,
# runnable deep-research graph agent, see assets/agents/deep-research/.
#
# Full documentation:
#   https://github.com/Dark-Alex-17/coyote/wiki/Graph-Agents

# ---------------------------------------------------------------------------
# Identity
# ---------------------------------------------------------------------------
name: deep-research-example        # Agent name (should match the directory name)
description: |                     # Free-form prose describing the workflow
  A reference workflow: triage a research request, retrieve local
  context, branch on a script decision, run either a sub-agent or an
  LLM research step, then gate the result behind human approval.
version: "1.0"                     # Graph schema version. Only "1.0" is accepted.

# ---------------------------------------------------------------------------
# Agent-level config (all optional)
# The same knobs a normal agent's config.yaml carries. In a graph agent they
# live here instead of in a config.yaml.
# ---------------------------------------------------------------------------
model: claude:claude-sonnet-4-6    # Default model for `llm` nodes that don't override it
temperature: 0.0                   # Default sampling temperature for `llm` nodes
top_p: null                        # Default sampling top-p for `llm` nodes

global_tools:                      # Tool universe an `llm` node's `tools:` whitelist draws from
  - web_search_coyote.sh
  - fetch_url_via_curl.sh

mcp_servers:                       # MCP servers an `llm` node may reference via `mcp:<server>`
  - ddg-search

conversation_starters:             # Suggested prompts surfaced in the UI
  - "Research the current state of WebAssembly outside the browser"

# ---------------------------------------------------------------------------
# Agent variables (optional)
# Declared the same way as a normal agent's config.yaml `variables:` block.
# Each variable becomes available to:
#   - LLM nodes via the template form `{{name}}` once seeded into state
#     (see initial_state below).
#   - Script nodes via the env var `LLM_AGENT_VAR_<UPPER_NAME>`.
# Values may be overridden at runtime with
#   `coyote -a <agent> --agent-variable <name> <value> "..."`.
# ---------------------------------------------------------------------------
variables:
  - name: project_dir
    description: |
      Absolute path to the project directory.
    default: "."

# ---------------------------------------------------------------------------
# Execution settings (all optional)
# ---------------------------------------------------------------------------
settings:
  max_loop_iterations: 100   # Per-node visit cap. If one node id is entered more
                             # than this many times, execution aborts. Default 100.
  timeout: 600               # Optional wall-clock cap (seconds) on the whole run,
                             # checked between node transitions.
  log_state_snapshots: true  # Log state before each node (debug/trace). Default true.
  validate_before_run: true  # Run the graph validator at startup. Default true.
  max_concurrency: 4         # Cap on simultaneously running branches in any
                             # super-step (static fan-out OR a `map` node).
                             # Default 4. Per-`map` overrides this. See Parallel
                             # Execution below.

# ---------------------------------------------------------------------------
# Reducers (optional, required whenever two parallel branches write the same
# state key in the same super-step; otherwise the validator errors at load).
#
# A reducer says how two values for the same key get merged. Built-ins:
#   append    list += [value]              (single value appended to a list)
#   extend    list += value (a list)       (list-of-lists flattened by one level)
#   concat    "a\nb"                       (string join with newline separator)
#   sum       a + b                        (numeric add; ints stay ints)
#   max       max(a, b)
#   min       min(a, b)
#   merge     {**a, **b}                   (dict union, RHS wins on key collision)
#   overwrite last-write-wins              (explicit opt-in; B's value replaces A's)
#
# Keys not listed here have an implicit "single writer per super-step" rule:
# the validator rejects any graph where two parallel branches both write a
# key with no reducer.
# ---------------------------------------------------------------------------
reducers:
  sources: append             # The diamond below writes `sources` from both
                              # branches; append accumulates them into a list.
  context: concat             # Each branch contributes prose; concat joins them.

# ---------------------------------------------------------------------------
# Seed state (optional)
# Values placed into graph state before any node runs; reference anywhere via
# {{key}}.
#
# Note: `initial_prompt` is seeded automatically by Coyote with the
# caller's prompt. So there's no need to set it here.
# ---------------------------------------------------------------------------
initial_state:
  audience: "general reader"
  # Seed an empty default for any key that a strict field (a node prompt /
  # instructions / question / End output) references but that is only set on
  # some paths. `refinement` is set only if the `refine` input node runs;
  # seeding it "" keeps `finalize`'s strict prompt from failing on the
  # approve-directly path.
  refinement: ""

# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
start: triage                # ID of the first node to run (must exist in `nodes`)

# ---------------------------------------------------------------------------
# Nodes
# Each node is keyed by its id. The `id:` inside a node must match its key
# (it may also be omitted and thus Coyote fills it in from the key).
#
# Node types: agent | script | approval | input | llm | rag | map | end
# ---------------------------------------------------------------------------
nodes:

  # --- llm node -----------------------------------------------------------
  # A one-shot LLM call (with an optional bounded tool-call loop). Runs in a
  # fresh isolated context. Tools are strictly opt-in (see `tools`).
  triage:
    id: triage
    type: llm
    description: Classify the research request and extract its topic.
    instructions: |            # Optional system prompt (templated against state)
      You triage research requests for a {{audience}} audience.
    prompt: |                  # Required user prompt (templated against state)
      Classify this request and extract the core research topic:
      {{initial_prompt}}
    tools: []                  # Tool whitelist. Omitted or [] = no tools at all.
                               # A list narrows to exactly those entries.
    output_schema:             # Optional JSON Schema. The output is parsed to JSON
      type: object             # and its top-level object keys auto-merge into state
      properties:              # (so `topic` / `needs_deep_dive` become {{topic}} etc).
        topic: { type: string }
        needs_deep_dive: { type: boolean }
      required: [topic, needs_deep_dive]
    state_updates:             # {{output}} = this node's result (here, the parsed object)
      triage_result: "{{output}}"
    # --- Polymorphic `next` -----------------------------------------------
    # A single string runs the next node sequentially (e.g. `next: retrieve`).
    # A list runs all listed nodes in parallel as one BSP super-step
    # (for more info on BSP, see https://en.wikipedia.org/wiki/Bulk_synchronous_parallel).
    # Their writes are merged via `reducers:` at the join. Branches converge
    # implicitly when they all route to the same downstream node (here,
    # `synthesize`). See the diamond:
    #
    #          triage
    #          /     \
    #     retrieve  web_search   (run concurrently)
    #          \     /
    #         synthesize         (join; fires once after both finish)
    next: [retrieve, web_search]

  # --- rag node (parallel branch 1 of the diamond) ------------------------
  # Hybrid (vector + keyword) retrieval against a per-node knowledge base.
  # The knowledge base is built once, at agent load time, into
  # <agent-dir>/retrieve.yaml (named after this node's id).
  retrieve:
    id: retrieve
    type: rag
    documents:                 # Required. Files, directories, URLs, loader paths.
      - ./knowledge/           #   relative paths resolve against the agent directory
      - https://example.com/reference
    query: "{{topic}}"         # Retrieval query (templated). Default: {{initial_prompt}}.
    top_k: 5                   # Chunks to retrieve. Default = the KB's own top_k.
    timeout: 120               # Retrieval timeout in seconds. Default 120.
    # Knowledge-base build config (optional; used only when the KB is first
    # built). When embedding_model + chunk_size + chunk_overlap are all set,
    # the KB builds with no interactive prompts (works in non-interactive runs).
    embedding_model: openai:text-embedding-3-small
    chunk_size: 1000
    chunk_overlap: 100
    reranker_model: null       # Optional reranker for hybrid-search results
    batch_size: 100            # Optional embedding-request batch size
    state_updates:             # {{output}} = { context: <str>, sources: [<path>, ...] }
      context: "{{output.context}}"   # writes `context` -> `reducers.context = concat`
      sources: "{{output.sources}}"   # writes `sources` -> `reducers.sources = append`
    next: synthesize           # Joins with web_search at `synthesize`.

  # --- llm node (parallel branch 2 of the diamond) ------------------------
  # Runs concurrently with `retrieve`. Both branches write `context` and
  # `sources`; the validator confirms both keys have a reducer declared, and
  # the BSP scheduler merges them at the join.
  web_search:
    id: web_search
    type: llm
    instructions: "You are a web researcher. Cite every claim."
    prompt: "Web research: {{topic}}. Return findings and sources."
    tools:
      - web_search_coyote
      - mcp:ddg-search
    output_schema:
      type: object
      properties:
        context: { type: string }
        sources:
          type: array
          items: { type: string }
      required: [context, sources]
    # When `output_schema` is set, top-level keys auto-merge into state, so
    # `context` and `sources` are produced without needing `state_updates`.
    next: synthesize           # Joins with retrieve at `synthesize`.

  # --- script node (the diamond's join; also dispatches) -----------------
  # Runs a .sh / .py / .ts script. The script receives state via the
  # GRAPH_STATE env var (inline JSON) or GRAPH_STATE_FILE (path to a JSON
  # file, used when state exceeds 32 KiB). Exactly one is set. It must print
  # a single JSON object on stdout: keys merge into state, and the reserved
  # `_next` key (if present) overrides routing.
  #
  # The script also receives these env vars (parity with bash tools called
  # from normal agents):
  #   GRAPH_STATE / GRAPH_STATE_FILE  state payload (one of the two is set)
  #   LLM_ROOT_DIR                    coyote config dir
  #   LLM_PROMPT_UTILS_FILE           path to .shared/prompt-utils.sh
  #   LLM_AGENT_DATA_DIR              this agent's data directory
  #   LLM_AGENT_VAR_<NAME>            one per declared `variables:` entry
  #   PATH                            with coyote's functions bin dir prepended
  #   CLICOLOR_FORCE / FORCE_COLOR    so child tools emit ANSI colors
  # The script's working directory is coyote's invocation CWD (not the agent
  # directory), matching the behavior of bash tools.
  #
  # This node fires once: after both `retrieve` and `web_search` finish.
  # The BSP scheduler dedups the two incoming edges into a single frontier
  # entry, applies the staged branch writes through the reducers, then runs
  # this node against the merged state. Inside the script, `context` is the
  # concatenated text of both branches and `sources` is the combined list.
  synthesize:
    id: synthesize
    type: script
    script: scripts/synthesize.py  # Path relative to the agent directory
    timeout: 30                # Seconds. Default 30.
    state_updates:             # Applied after the stdout JSON is merged
      decided_for: "{{topic}}"
    next: summarize            # Default route if the script emits no `_next`
    fallback: summarize        # Route taken if the script fails (crash / bad JSON)
    # This script is expected to emit `_next: deep_dive` (or `_next: subjects_map`
    # to demonstrate the map node below), or no `_next` (then `next` is used).
    # Targets reached only via the script's dynamic `_next` get an
    # "unreachable" warning from the validator. This is expected for `_next`-routed
    # targets.

  # --- agent node ---------------------------------------------------------
  # Spawns a full Coyote sub-agent and waits for it. The child uses its own
  # tool stack. Agent nodes have no `tools:` field. No schema hint is
  # injected even when `output_schema` is set (unlike llm nodes).
  deep_dive:
    id: deep_dive
    type: agent
    agent: deep-research       # Name of an existing Coyote agent to spawn
    prompt: |                  # User message sent to the child (templated)
      Research {{topic}} in depth. Existing context:
      {{context}}
    timeout: 600               # Optional wall-clock cap, seconds. Default 300.
    output_schema:             # Optional. Same extraction as llm nodes
      type: object
      properties:
        summary: { type: string }
        findings:
          type: array
          items: { type: string }
      required: [summary, findings]
    state_updates:
      research: "{{output}}"
    next: review               # Required for agent nodes

  # --- map node (Dynamic fan-out. Think: LangGraph's `Send` API) ----------------
  # Spawns one parallel sub-branch per item in `over`. Each sub-branch runs
  # the node referenced by `branch:` with the item bound to `as:`. Outputs
  # collect into the array named by `collect_into:`, preserving input order.
  #
  # Reach via `synthesize`'s `_next: subjects_map`. The producer is expected
  # to have written a list at `subjects` (e.g. an upstream LLM node with an
  # `output_schema` returning {"subjects": ["a", "b", "c"]}).
  subjects_map:
    id: subjects_map
    type: map
    over: "{{subjects}}"        # Required. List expression resolved from state.
                                # Empty list is allowed. It means no branches spawn,
                                # and thus `collect_into` is written as [].
    as: subject                 # Required. Per-branch state key holding the
                                # current item. Read with {{subject}} inside
                                # the branch node's prompt.
    branch: research_subject    # Required. Node id to invoke per item.
                                # Must point to an llm | agent | rag | script
                                # node satisfying the map branch contract:
                                #   - no `next:` (atomic, joined at map exit)
                                #   - no `state_updates:` other than via the
                                #     map's `collect_into` channel
                                #   - no `output_schema:` (top-level merge
                                #     would clash with collect_into)
                                # Validator enforces all three.
    collect_into: subject_findings  # Required. State key for the array of
                                    # per-branch outputs, in input order
                                    # (not spawn-finish order).
    max_concurrency: 3          # Optional per-map cap. Defaults to
                                # settings.max_concurrency above.
    output_key: output          # Optional. State key the branch's output
                                # appears under. Default "output". Useful
                                # only if the branch reads its own bound
                                # name back (rare).
    next: aggregate_subjects    # Where to go after all sub-branches finish.

  # Branch node for subjects_map. Each invocation receives a different
  # `subject` in state. The branch is "atomic", meaning it cannot route on
  # its own; the surrounding `map` joins after all invocations finish.
  research_subject:
    id: research_subject
    type: llm
    instructions: "Research one subject deeply for a {{audience}} audience."
    prompt: "Research {{subject}}: pull the key facts and one citation."
    tools:
      - web_search_coyote
    # No `next:`, `state_updates:`, or `output_schema:` here. Map branches
    # have a strict contract (see `subjects_map.branch` comment).

  # Aggregator that runs after the map joins. Reads the collected list.
  aggregate_subjects:
    id: aggregate_subjects
    type: llm
    instructions: "Combine N per-subject reports into one cohesive summary."
    prompt: |
      Per-subject reports (in original input order):
      {{subject_findings}}
    state_updates:
      research: "{{output}}"
    next: review

  # --- llm node with a narrowed tool whitelist ----------------------------
  summarize:
    id: summarize
    type: llm
    instructions: "You write concise research summaries for a {{audience}} audience."
    prompt: "Summarize the topic {{topic}}, using your tools as needed."
    tools:                     # Narrow whitelist: exactly these entries, nothing else
      - web_search_coyote        #   an exact global-tool / custom-tool name
      - mcp:ddg-search         #   `mcp:<server>` includes that server's functions
    model: claude:claude-haiku-4-5  # Optional per-node model override
    temperature: 0.3           # Optional per-node sampling override
    max_attempts: 2            # Retry count on transient errors only. Default 1.
    max_iterations: 10         # Tool-call-loop turn cap. Default 10.
    fallback: review           # Route here if all attempts fail
    timeout: 300               # Optional node wall-clock cap, seconds (unset = no timeout)
    state_updates:
      research: "{{output}}"
    next: review               # Required for llm nodes: the success route

  # --- approval node ------------------------------------------------------
  # Human-in-the-loop checkpoint. `user__ask` always offers a free-form
  # "type your own answer" option, so `on_other` is required.
  review:
    id: review
    type: approval
    question: |
      Proposed research result for {{topic}}:
      {{research}}

      Approve?
    options:                   # The listed choices shown to the user
      - "yes"
      - "no"
    routes:                    # Map each listed option to its next node
      "yes": finalize
      "no": rejected_end
    on_other: refine           # Required: route for ANY answer not in `routes`
    state_updates:
      decision: "{{choice}}"   # {{choice}} = the chosen option or the free-form text

  # --- input node ---------------------------------------------------------
  # Collects a free-form string from the user.
  refine:
    id: refine
    type: input
    question: "What should be changed about the research result?"
    default: "tighten the summary"  # Optional: used if the user submits empty input.
                                    # Note: a substituted default is not re-validated,
                                    # so make sure it would satisfy `validation`.
    validation: "len(input) > 0"   # Optional length predicate: len(input) <op> N,
                                   # <op> in > >= < <= == . Length only -- no regex.
    state_updates:
      refinement: "{{input}}"  # {{input}} = the user's text
    next: finalize             # Required for input nodes: the success route

  # --- llm node (final synthesis) -----------------------------------------
  finalize:
    id: finalize
    type: llm
    prompt: |
      Produce the final research report for {{topic}}.
      Result so far: {{research}}
      Requested refinement (if any): {{refinement}}
    state_updates:
      final_report: "{{output}}"
    next: done

  # --- end nodes ----------------------------------------------------------
  # Terminate the graph. `output` (templated, lenient interpolation) becomes
  # the graph's final result. A graph needs at least one `end` node.
  done:
    id: done
    type: end
    state_updates:             # Optional: applied before `output` is rendered
      status: "completed"
    output: |
      [{{status}}] {{final_report}}

      Sources: {{sources}}

  rejected_end:
    id: rejected_end
    type: end
    output: "Research on {{topic}} was not approved."