Files
loki/examples/langchain-sisyphus/sisyphus_langchain/agents/supervisor.py

228 lines
9.0 KiB
Python

"""
Sisyphus supervisor node — the orchestrator that classifies intent and routes.
Loki equivalent: assets/agents/sisyphus/config.yaml
This is the brain of the system. In Loki, Sisyphus is the top-level agent that:
1. Classifies every incoming request (trivial / exploration / implementation /
architecture / ambiguous)
2. Routes to the appropriate sub-agent (explore, coder, oracle)
3. Manages the todo list for multi-step tasks
4. Verifies results and decides when the task is complete
In LangGraph, the supervisor is a node that returns `Command(goto="agent_name")`
to route control. This replaces Loki's `agent__spawn` + `agent__collect` pattern
with a declarative graph edge.
Key Loki→LangGraph mapping:
- agent__spawn --agent explore → Command(goto="explore")
- agent__spawn --agent coder → Command(goto="coder")
- agent__spawn --agent oracle → Command(goto="oracle")
- agent__check / agent__collect → (implicit: graph edges return to supervisor)
- todo__init / todo__add → state["todos"] updates
- user__ask / user__confirm → interrupt() for human-in-the-loop
Parallel execution note:
Loki can spawn multiple explore agents in parallel. In LangGraph, you'd use
the Send() API for dynamic fan-out. For simplicity, this implementation uses
sequential routing. See the README for how to add parallel fan-out.
"""
from __future__ import annotations
from typing import Literal
from langchain_core.messages import SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.types import Command
from pydantic import BaseModel, Field
from sisyphus_langchain.state import SisyphusState
# ---------------------------------------------------------------------------
# Maximum iterations before forcing completion (safety valve)
# Mirrors Loki's max_auto_continues: 25
# ---------------------------------------------------------------------------
MAX_ITERATIONS = 15
# ---------------------------------------------------------------------------
# Structured output schema for the supervisor's routing decision.
#
# In Loki, the supervisor is an LLM that produces free-text and calls tools
# like agent__spawn. In LangGraph, we use structured output to force the
# LLM into a typed routing decision — more reliable than parsing free text.
# ---------------------------------------------------------------------------
class RoutingDecision(BaseModel):
"""The supervisor's decision about what to do next."""
intent: Literal["trivial", "exploration", "implementation", "architecture", "ambiguous"] = Field(
description="Classified intent of the user's request."
)
next_agent: Literal["explore", "oracle", "coder", "FINISH"] = Field(
description=(
"Which agent to route to. 'explore' for research/discovery, "
"'oracle' for architecture/design/debugging advice, "
"'coder' for implementation, 'FINISH' if the task is complete."
)
)
delegation_notes: str = Field(
description=(
"Brief instructions for the target agent: what to look for (explore), "
"what to analyze (oracle), or what to implement (coder). "
"For FINISH, summarize what was accomplished."
)
)
# ---------------------------------------------------------------------------
# Supervisor system prompt — faithfully mirrors sisyphus/config.yaml
# ---------------------------------------------------------------------------
SUPERVISOR_SYSTEM_PROMPT = """\
You are Sisyphus — an orchestrator that drives coding tasks to completion.
Your job: Classify → Delegate → Verify → Complete.
## Intent Classification (BEFORE every action)
| Type | Signal | Action |
|-----------------|-----------------------------------------------------|----------------------|
| trivial | Single file, known location, typo fix | Route to FINISH |
| exploration | "Find X", "Where is Y", "List all Z" | Route to explore |
| implementation | "Add feature", "Fix bug", "Write code" | Route to coder |
| architecture | See oracle triggers below | Route to oracle |
| ambiguous | Unclear scope, multiple interpretations | Route to FINISH with a clarifying question |
## Oracle Triggers (MUST route to oracle when you see these)
Route to oracle ANY time the user asks about:
- "How should I..." / "What's the best way to..." — design/approach questions
- "Why does X keep..." / "What's wrong with..." — complex debugging
- "Should I use X or Y?" — technology or pattern choices
- "How should this be structured?" — architecture
- "Review this" / "What do you think of..." — code/design review
- Tradeoff questions, multi-component questions, vague/open-ended questions
## Agent Specializations
| Agent | Use For |
|---------|-----------------------------------------------|
| explore | Find patterns, understand code, search |
| coder | Write/edit files, implement features |
| oracle | Architecture decisions, complex debugging |
## Workflow Patterns
### Implementation task: explore → coder
1. Route to explore to find existing patterns and conventions.
2. Review explore findings.
3. Route to coder with a structured prompt including the explore findings.
4. Verify the coder's output (check for CODER_COMPLETE or CODER_FAILED).
### Architecture question: explore + oracle
1. Route to explore to find relevant code.
2. Route to oracle with the explore findings for analysis.
### Simple question: oracle directly
For pure design/architecture questions, route to oracle directly.
## Rules
1. Always classify before acting.
2. You are a coordinator, not an implementer.
3. Route to oracle for ANY design/architecture question.
4. When routing to coder, include code patterns from explore findings.
5. Route to FINISH when the task is fully addressed.
## Current State
Iteration: {iteration_count}/{max_iterations}
Previous agent outputs: {agent_outputs}
"""
def create_supervisor_node(model_name: str = "gpt-4o", temperature: float = 0.1):
"""
Factory that returns a supervisor node function.
The supervisor uses a capable model for accurate routing.
Args:
model_name: Model identifier.
temperature: LLM temperature (low for consistent routing).
"""
llm = ChatOpenAI(model=model_name, temperature=temperature).with_structured_output(
RoutingDecision
)
def supervisor_node(
state: SisyphusState,
) -> Command[Literal["explore", "oracle", "coder", "__end__"]]:
"""
LangGraph node: the Sisyphus supervisor.
Classifies the user's intent, decides which agent to route to,
and returns a Command that directs graph execution.
"""
iteration = state.get("iteration_count", 0)
# Safety valve — prevent infinite loops
if iteration >= MAX_ITERATIONS:
return Command(
goto="__end__",
update={
"final_output": "Reached maximum iterations. Here's what was accomplished:\n"
+ "\n".join(
f"- {k}: {v[:200]}" for k, v in state.get("agent_outputs", {}).items()
),
},
)
# Format the system prompt with current state
prompt = SUPERVISOR_SYSTEM_PROMPT.format(
iteration_count=iteration,
max_iterations=MAX_ITERATIONS,
agent_outputs=_summarize_outputs(state.get("agent_outputs", {})),
)
# Invoke the LLM to get a structured routing decision
decision: RoutingDecision = llm.invoke(
[SystemMessage(content=prompt)] + state["messages"]
)
# Route to FINISH
if decision.next_agent == "FINISH":
return Command(
goto="__end__",
update={
"intent": decision.intent,
"next_agent": "FINISH",
"final_output": decision.delegation_notes,
},
)
# Route to a worker agent
return Command(
goto=decision.next_agent,
update={
"intent": decision.intent,
"next_agent": decision.next_agent,
"iteration_count": iteration + 1,
},
)
return supervisor_node
def _summarize_outputs(outputs: dict[str, str]) -> str:
"""Summarize agent outputs for the supervisor's context window."""
if not outputs:
return "(none yet)"
parts = []
for agent, output in outputs.items():
# Truncate long outputs to keep supervisor context manageable
# This mirrors Loki's summarization_threshold behavior
if len(output) > 2000:
output = output[:2000] + "... (truncated)"
parts.append(f"[{agent}]: {output}")
return "\n\n".join(parts)