loki/examples/langchain-sisyphus/sisyphus_langchain/agents/supervisor.py

"""
Sisyphus supervisor node — the orchestrator that classifies intent and routes.

Loki equivalent: assets/agents/sisyphus/config.yaml

This is the brain of the system.  In Loki, Sisyphus is the top-level agent that:
    1. Classifies every incoming request (trivial / exploration / implementation /
       architecture / ambiguous)
    2. Routes to the appropriate sub-agent (explore, coder, oracle)
    3. Manages the todo list for multi-step tasks
    4. Verifies results and decides when the task is complete

In LangGraph, the supervisor is a node that returns `Command(goto="agent_name")`
to route control.  This replaces Loki's `agent__spawn` + `agent__collect` pattern
with a declarative graph edge.

Key Loki→LangGraph mapping:
    - agent__spawn --agent explore  →  Command(goto="explore")
    - agent__spawn --agent coder    →  Command(goto="coder")
    - agent__spawn --agent oracle   →  Command(goto="oracle")
    - agent__check / agent__collect →  (implicit: graph edges return to supervisor)
    - todo__init / todo__add        →  state["todos"] updates
    - user__ask / user__confirm     →  interrupt() for human-in-the-loop

Parallel execution note:
    Loki can spawn multiple explore agents in parallel.  In LangGraph, you'd use
    the Send() API for dynamic fan-out.  For simplicity, this implementation uses
    sequential routing.  See the README for how to add parallel fan-out.
"""

from __future__ import annotations

from typing import Literal

from langchain_core.messages import SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.types import Command
from pydantic import BaseModel, Field

from sisyphus_langchain.state import SisyphusState

# ---------------------------------------------------------------------------
# Maximum iterations before forcing completion (safety valve)
# Mirrors Loki's max_auto_continues: 25
# ---------------------------------------------------------------------------
MAX_ITERATIONS = 15

# ---------------------------------------------------------------------------
# Structured output schema for the supervisor's routing decision.
#
# In Loki, the supervisor is an LLM that produces free-text and calls tools
# like agent__spawn.  In LangGraph, we use structured output to force the
# LLM into a typed routing decision — more reliable than parsing free text.
# ---------------------------------------------------------------------------
class RoutingDecision(BaseModel):
    """The supervisor's decision about what to do next."""

    intent: Literal["trivial", "exploration", "implementation", "architecture", "ambiguous"] = Field(
        description="Classified intent of the user's request."
    )
    next_agent: Literal["explore", "oracle", "coder", "FINISH"] = Field(
        description=(
            "Which agent to route to.  'explore' for research/discovery, "
            "'oracle' for architecture/design/debugging advice, "
            "'coder' for implementation, 'FINISH' if the task is complete."
        )
    )
    delegation_notes: str = Field(
        description=(
            "Brief instructions for the target agent: what to look for (explore), "
            "what to analyze (oracle), or what to implement (coder).  "
            "For FINISH, summarize what was accomplished."
        )
    )


# ---------------------------------------------------------------------------
# Supervisor system prompt — faithfully mirrors sisyphus/config.yaml
# ---------------------------------------------------------------------------
SUPERVISOR_SYSTEM_PROMPT = """\
You are Sisyphus — an orchestrator that drives coding tasks to completion.

Your job: Classify → Delegate → Verify → Complete.

## Intent Classification (BEFORE every action)

| Type            | Signal                                              | Action               |
|-----------------|-----------------------------------------------------|----------------------|
| trivial         | Single file, known location, typo fix               | Route to FINISH      |
| exploration     | "Find X", "Where is Y", "List all Z"               | Route to explore     |
| implementation  | "Add feature", "Fix bug", "Write code"              | Route to coder       |
| architecture    | See oracle triggers below                           | Route to oracle      |
| ambiguous       | Unclear scope, multiple interpretations             | Route to FINISH with a clarifying question |

## Oracle Triggers (MUST route to oracle when you see these)

Route to oracle ANY time the user asks about:
- "How should I..." / "What's the best way to..." — design/approach questions
- "Why does X keep..." / "What's wrong with..." — complex debugging
- "Should I use X or Y?" — technology or pattern choices
- "How should this be structured?" — architecture
- "Review this" / "What do you think of..." — code/design review
- Tradeoff questions, multi-component questions, vague/open-ended questions

## Agent Specializations

| Agent   | Use For                                       |
|---------|-----------------------------------------------|
| explore | Find patterns, understand code, search        |
| coder   | Write/edit files, implement features          |
| oracle  | Architecture decisions, complex debugging     |

## Workflow Patterns

### Implementation task: explore → coder
1. Route to explore to find existing patterns and conventions.
2. Review explore findings.
3. Route to coder with a structured prompt including the explore findings.
4. Verify the coder's output (check for CODER_COMPLETE or CODER_FAILED).

### Architecture question: explore + oracle
1. Route to explore to find relevant code.
2. Route to oracle with the explore findings for analysis.

### Simple question: oracle directly
For pure design/architecture questions, route to oracle directly.

## Rules

1. Always classify before acting.
2. You are a coordinator, not an implementer.
3. Route to oracle for ANY design/architecture question.
4. When routing to coder, include code patterns from explore findings.
5. Route to FINISH when the task is fully addressed.

## Current State

Iteration: {iteration_count}/{max_iterations}
Previous agent outputs: {agent_outputs}
"""


def create_supervisor_node(model_name: str = "gpt-4o", temperature: float = 0.1):
    """
    Factory that returns a supervisor node function.

    The supervisor uses a capable model for accurate routing.

    Args:
        model_name: Model identifier.
        temperature: LLM temperature (low for consistent routing).
    """
    llm = ChatOpenAI(model=model_name, temperature=temperature).with_structured_output(
        RoutingDecision
    )

    def supervisor_node(
        state: SisyphusState,
    ) -> Command[Literal["explore", "oracle", "coder", "__end__"]]:
        """
        LangGraph node: the Sisyphus supervisor.

        Classifies the user's intent, decides which agent to route to,
        and returns a Command that directs graph execution.
        """
        iteration = state.get("iteration_count", 0)

        # Safety valve — prevent infinite loops
        if iteration >= MAX_ITERATIONS:
            return Command(
                goto="__end__",
                update={
                    "final_output": "Reached maximum iterations.  Here's what was accomplished:\n"
                    + "\n".join(
                        f"- {k}: {v[:200]}" for k, v in state.get("agent_outputs", {}).items()
                    ),
                },
            )

        # Format the system prompt with current state
        prompt = SUPERVISOR_SYSTEM_PROMPT.format(
            iteration_count=iteration,
            max_iterations=MAX_ITERATIONS,
            agent_outputs=_summarize_outputs(state.get("agent_outputs", {})),
        )

        # Invoke the LLM to get a structured routing decision
        decision: RoutingDecision = llm.invoke(
            [SystemMessage(content=prompt)] + state["messages"]
        )

        # Route to FINISH
        if decision.next_agent == "FINISH":
            return Command(
                goto="__end__",
                update={
                    "intent": decision.intent,
                    "next_agent": "FINISH",
                    "final_output": decision.delegation_notes,
                },
            )

        # Route to a worker agent
        return Command(
            goto=decision.next_agent,
            update={
                "intent": decision.intent,
                "next_agent": decision.next_agent,
                "iteration_count": iteration + 1,
            },
        )

    return supervisor_node


def _summarize_outputs(outputs: dict[str, str]) -> str:
    """Summarize agent outputs for the supervisor's context window."""
    if not outputs:
        return "(none yet)"
    parts = []
    for agent, output in outputs.items():
        # Truncate long outputs to keep supervisor context manageable
        # This mirrors Loki's summarization_threshold behavior
        if len(output) > 2000:
            output = output[:2000] + "... (truncated)"
        parts.append(f"[{agent}]: {output}")
    return "\n\n".join(parts)