Sisyphus agent recreated in LangChain to figure out how it works and how to use it

2026-04-15 12:47:38 -06:00
parent ff3419a714
commit 9bab6a0c2d
14 changed files with 1745 additions and 0 deletions
@@ -0,0 +1 @@
+"""Agent node definitions for the Sisyphus orchestrator."""
@@ -0,0 +1,145 @@
+"""
+Coder agent node — the implementation worker.
+
+Loki equivalent: assets/agents/coder/config.yaml + tools.sh
+
+In Loki, the coder is the ONLY agent that modifies files.  It:
+    - Receives a structured prompt from sisyphus with code patterns to follow
+    - Writes files via the write_file tool (never pastes code in chat)
+    - Verifies builds after every change
+    - Signals CODER_COMPLETE or CODER_FAILED
+
+In LangGraph, coder is a node with write-capable tools (read_file, write_file,
+search_content, execute_command, verify_build).  The supervisor formats a
+structured delegation prompt (Goal / Reference Files / Code Patterns /
+Conventions / Constraints) and routes to this node.
+
+Key Loki→LangGraph mapping:
+    - Loki's "Coder Delegation Format" → the supervisor builds this as a
+      HumanMessage before routing to the coder node.
+    - Loki's auto_continue (up to 15) → the supervisor can re-route to coder
+      if verification fails, up to iteration_count limits.
+    - Loki's todo system for multi-file changes → the coder updates
+      state["todos"] as it completes each file.
+"""
+
+from __future__ import annotations
+
+from langchain_core.messages import SystemMessage
+from langchain_openai import ChatOpenAI
+
+from sisyphus_langchain.state import SisyphusState
+from sisyphus_langchain.tools.filesystem import (
+    read_file,
+    search_content,
+    search_files,
+    write_file,
+)
+from sisyphus_langchain.tools.project import (
+    execute_command,
+    run_tests,
+    verify_build,
+)
+
+# ---------------------------------------------------------------------------
+# System prompt — faithfully mirrors coder/config.yaml
+# ---------------------------------------------------------------------------
+CODER_SYSTEM_PROMPT = """\
+You are a senior engineer. You write code that works on the first try.
+
+## Your Mission
+
+Given an implementation task:
+1. Check for context provided in the conversation (patterns, conventions, reference files).
+2. Fill gaps only — read files NOT already covered in context.
+3. Write the code using the write_file tool (NEVER output code in chat).
+4. Verify it compiles/builds using verify_build.
+5. Provide a summary of what you implemented.
+
+## Using Provided Context (IMPORTANT)
+
+Your prompt often contains prior findings from the explore agent: file paths,
+code patterns, and conventions.
+
+**If context is provided:**
+1. Use it as your primary reference.  Don't re-read files already summarized.
+2. Follow the code patterns shown — snippets in context ARE the style guide.
+3. Read referenced files ONLY IF you need more detail (full signatures, imports).
+4. If context includes a "Conventions" section, follow it exactly.
+
+**If context is NOT provided or is too vague:**
+Fall back to self-exploration: search for similar files, read 1-2 examples,
+match their style.
+
+## Writing Code
+
+CRITICAL: Write code using the write_file tool. NEVER paste code in chat.
+
+## Pattern Matching
+
+Before writing ANY file:
+1. Find a similar existing file.
+2. Match its style: imports, naming, structure.
+3. Follow the same patterns exactly.
+
+## Verification
+
+After writing files:
+1. Run verify_build to check compilation.
+2. If it fails, fix the error (minimal change).
+3. Don't move on until build passes.
+
+## Rules
+
+1. Write code via tools — never output code to chat.
+2. Follow patterns — read existing files first.
+3. Verify builds — don't finish without checking.
+4. Minimal fixes — if build fails, fix precisely.
+5. No refactoring — only implement what's asked.
+"""
+
+# Full tool set — coder gets write access and command execution
+CODER_TOOLS = [
+    read_file,
+    write_file,
+    search_content,
+    search_files,
+    execute_command,
+    verify_build,
+    run_tests,
+]
+
+
+def create_coder_node(model_name: str = "gpt-4o", temperature: float = 0.1):
+    """
+    Factory that returns a coder node function.
+
+    Coder needs a capable model because it writes production code.  In Loki,
+    coder uses the same model as the parent by default.
+
+    Args:
+        model_name: Model identifier.
+        temperature: LLM temperature (Loki coder uses 0.1 for consistency).
+    """
+    llm = ChatOpenAI(model=model_name, temperature=temperature).bind_tools(CODER_TOOLS)
+
+    def coder_node(state: SisyphusState) -> dict:
+        """
+        LangGraph node: run the coder agent.
+
+        Reads conversation history (including the supervisor's structured
+        delegation prompt), invokes the LLM with write-capable tools,
+        and returns the result.
+        """
+        response = llm.invoke(
+            [SystemMessage(content=CODER_SYSTEM_PROMPT)] + state["messages"]
+        )
+        return {
+            "messages": [response],
+            "agent_outputs": {
+                **state.get("agent_outputs", {}),
+                "coder": response.content,
+            },
+        }
+
+    return coder_node
@@ -0,0 +1,110 @@
+"""
+Explore agent node — the read-only codebase researcher.
+
+Loki equivalent: assets/agents/explore/config.yaml + tools.sh
+
+In Loki, the explore agent is spawned via `agent__spawn --agent explore --prompt "..."`
+and runs as an isolated subprocess with its own session.  It ends with
+"EXPLORE_COMPLETE" so the parent knows it's finished.
+
+In LangGraph, the explore agent is a *node* in the graph.  The supervisor routes
+to it via `Command(goto="explore")`.  It reads the latest message (the supervisor's
+delegation prompt), calls the LLM with read-only tools, and writes its findings
+back to the shared message list.  The graph edge then returns control to the
+supervisor.
+
+Key differences from Loki:
+    - No isolated session — shares the graph's message list (but has its own
+      system prompt and tool set, just like Loki's per-agent config).
+    - No "EXPLORE_COMPLETE" sentinel — the graph edge handles control flow.
+    - No output summarization — LangGraph's state handles context management.
+"""
+
+from __future__ import annotations
+
+from langchain_core.messages import SystemMessage
+from langchain_openai import ChatOpenAI
+
+from sisyphus_langchain.state import SisyphusState
+from sisyphus_langchain.tools.filesystem import (
+    list_directory,
+    read_file,
+    search_content,
+    search_files,
+)
+
+# ---------------------------------------------------------------------------
+# System prompt — faithfully mirrors explore/config.yaml
+# ---------------------------------------------------------------------------
+EXPLORE_SYSTEM_PROMPT = """\
+You are a codebase explorer. Your job: Search, find, report. Nothing else.
+
+## Your Mission
+
+Given a search task, you:
+1. Search for relevant files and patterns
+2. Read key files to understand structure
+3. Report findings concisely
+
+## Strategy
+
+1. **Find first, read second** — Never read a file without knowing why.
+2. **Use search_content to locate** — find exactly where things are defined.
+3. **Use search_files to discover** — find files by name pattern.
+4. **Read targeted sections** — use offset and limit to read only relevant lines.
+5. **Never read entire large files** — if a file is 500+ lines, read the relevant section only.
+
+## Output Format
+
+Always end your response with a structured findings summary:
+
+FINDINGS:
+- [Key finding 1]
+- [Key finding 2]
+- Relevant files: [list of paths]
+
+## Rules
+
+1. Be fast — don't read every file, read representative ones.
+2. Be focused — answer the specific question asked.
+3. Be concise — report findings, not your process.
+4. Never modify files — you are read-only.
+5. Limit reads — max 5 file reads per exploration.
+"""
+
+# Read-only tools — mirrors explore's tool set (no write_file, no execute_command)
+EXPLORE_TOOLS = [read_file, search_content, search_files, list_directory]
+
+
+def create_explore_node(model_name: str = "gpt-4o-mini", temperature: float = 0.1):
+    """
+    Factory that returns an explore node function bound to a specific model.
+
+    In Loki, the model is set per-agent in config.yaml.  Here we parameterize it
+    so you can use a cheap model for exploration (cost optimization).
+
+    Args:
+        model_name: OpenAI model identifier.
+        temperature: LLM temperature (Loki explore uses 0.1).
+    """
+    llm = ChatOpenAI(model=model_name, temperature=temperature).bind_tools(EXPLORE_TOOLS)
+
+    def explore_node(state: SisyphusState) -> dict:
+        """
+        LangGraph node: run the explore agent.
+
+        Reads the conversation history, applies the explore system prompt,
+        invokes the LLM with read-only tools, and returns the response.
+        """
+        response = llm.invoke(
+            [SystemMessage(content=EXPLORE_SYSTEM_PROMPT)] + state["messages"]
+        )
+        return {
+            "messages": [response],
+            "agent_outputs": {
+                **state.get("agent_outputs", {}),
+                "explore": response.content,
+            },
+        }
+
+    return explore_node
@@ -0,0 +1,124 @@
+"""
+Oracle agent node — the high-IQ architecture and debugging advisor.
+
+Loki equivalent: assets/agents/oracle/config.yaml + tools.sh
+
+In Loki, the oracle is a READ-ONLY advisor spawned for:
+    - Architecture decisions and multi-system tradeoffs
+    - Complex debugging (after 2+ failed fix attempts)
+    - Code/design review
+    - Risk assessment
+
+It uses temperature 0.2 (slightly higher than explore/coder for more creative
+reasoning) and ends with "ORACLE_COMPLETE".
+
+In LangGraph, oracle is a node that receives the full message history, reasons
+about the problem, and writes structured advice back.  It has read-only tools
+only — it never modifies files.
+
+Key Loki→LangGraph mapping:
+    - Loki oracle triggers (the "MUST spawn oracle when..." rules in sisyphus)
+      become routing conditions in the supervisor node.
+    - Oracle's structured output format (Analysis/Recommendation/Reasoning/Risks)
+      is enforced via the system prompt, same as in Loki.
+"""
+
+from __future__ import annotations
+
+from langchain_core.messages import SystemMessage
+from langchain_openai import ChatOpenAI
+
+from sisyphus_langchain.state import SisyphusState
+from sisyphus_langchain.tools.filesystem import (
+    list_directory,
+    read_file,
+    search_content,
+    search_files,
+)
+
+# ---------------------------------------------------------------------------
+# System prompt — faithfully mirrors oracle/config.yaml
+# ---------------------------------------------------------------------------
+ORACLE_SYSTEM_PROMPT = """\
+You are Oracle — a senior architect and debugger consulted for complex decisions.
+
+## Your Role
+
+You are READ-ONLY. You analyze, advise, and recommend. You do NOT implement.
+
+## When You're Consulted
+
+1. **Architecture Decisions**: Multi-system tradeoffs, design patterns, technology choices.
+2. **Complex Debugging**: After 2+ failed fix attempts, deep analysis needed.
+3. **Code Review**: Evaluating proposed designs or implementations.
+4. **Risk Assessment**: Security, performance, or reliability concerns.
+
+## Your Process
+
+1. **Understand**: Read relevant code, understand the full context.
+2. **Analyze**: Consider multiple angles and tradeoffs.
+3. **Recommend**: Provide clear, actionable advice.
+4. **Justify**: Explain your reasoning.
+
+## Output Format
+
+Structure your response as:
+
+## Analysis
+[Your understanding of the situation]
+
+## Recommendation
+[Clear, specific advice]
+
+## Reasoning
+[Why this is the right approach]
+
+## Risks/Considerations
+[What to watch out for]
+
+## Rules
+
+1. Never modify files — you advise, others implement.
+2. Be thorough — read all relevant context before advising.
+3. Be specific — general advice isn't helpful.
+4. Consider tradeoffs — there are rarely perfect solutions.
+5. Stay focused — answer the specific question asked.
+"""
+
+# Read-only tools — same set as explore (oracle never writes)
+ORACLE_TOOLS = [read_file, search_content, search_files, list_directory]
+
+
+def create_oracle_node(model_name: str = "gpt-4o", temperature: float = 0.2):
+    """
+    Factory that returns an oracle node function.
+
+    Oracle uses a more expensive model than explore because it needs deeper
+    reasoning.  In Loki, the model is inherited from the global config unless
+    overridden in oracle/config.yaml.
+
+    Args:
+        model_name: Model identifier (use a strong reasoning model).
+        temperature: LLM temperature (Loki oracle uses 0.2).
+    """
+    llm = ChatOpenAI(model=model_name, temperature=temperature).bind_tools(ORACLE_TOOLS)
+
+    def oracle_node(state: SisyphusState) -> dict:
+        """
+        LangGraph node: run the oracle agent.
+
+        Reads conversation history, applies the oracle system prompt,
+        invokes the LLM, and returns structured advice.
+        """
+        response = llm.invoke(
+            [SystemMessage(content=ORACLE_SYSTEM_PROMPT)] + state["messages"]
+        )
+        return {
+            "messages": [response],
+            "agent_outputs": {
+                **state.get("agent_outputs", {}),
+                "oracle": response.content,
+            },
+        }
+
+    return oracle_node
@@ -0,0 +1,227 @@
+"""
+Sisyphus supervisor node — the orchestrator that classifies intent and routes.
+
+Loki equivalent: assets/agents/sisyphus/config.yaml
+
+This is the brain of the system.  In Loki, Sisyphus is the top-level agent that:
+    1. Classifies every incoming request (trivial / exploration / implementation /
+       architecture / ambiguous)
+    2. Routes to the appropriate sub-agent (explore, coder, oracle)
+    3. Manages the todo list for multi-step tasks
+    4. Verifies results and decides when the task is complete
+
+In LangGraph, the supervisor is a node that returns `Command(goto="agent_name")`
+to route control.  This replaces Loki's `agent__spawn` + `agent__collect` pattern
+with a declarative graph edge.
+
+Key Loki→LangGraph mapping:
+    - agent__spawn --agent explore  →  Command(goto="explore")
+    - agent__spawn --agent coder    →  Command(goto="coder")
+    - agent__spawn --agent oracle   →  Command(goto="oracle")
+    - agent__check / agent__collect →  (implicit: graph edges return to supervisor)
+    - todo__init / todo__add        →  state["todos"] updates
+    - user__ask / user__confirm     →  interrupt() for human-in-the-loop
+
+Parallel execution note:
+    Loki can spawn multiple explore agents in parallel.  In LangGraph, you'd use
+    the Send() API for dynamic fan-out.  For simplicity, this implementation uses
+    sequential routing.  See the README for how to add parallel fan-out.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from langchain_core.messages import SystemMessage
+from langchain_openai import ChatOpenAI
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+
+from sisyphus_langchain.state import SisyphusState
+
+# ---------------------------------------------------------------------------
+# Maximum iterations before forcing completion (safety valve)
+# Mirrors Loki's max_auto_continues: 25
+# ---------------------------------------------------------------------------
+MAX_ITERATIONS = 15
+
+# ---------------------------------------------------------------------------
+# Structured output schema for the supervisor's routing decision.
+#
+# In Loki, the supervisor is an LLM that produces free-text and calls tools
+# like agent__spawn.  In LangGraph, we use structured output to force the
+# LLM into a typed routing decision — more reliable than parsing free text.
+# ---------------------------------------------------------------------------
+class RoutingDecision(BaseModel):
+    """The supervisor's decision about what to do next."""
+
+    intent: Literal["trivial", "exploration", "implementation", "architecture", "ambiguous"] = Field(
+        description="Classified intent of the user's request."
+    )
+    next_agent: Literal["explore", "oracle", "coder", "FINISH"] = Field(
+        description=(
+            "Which agent to route to.  'explore' for research/discovery, "
+            "'oracle' for architecture/design/debugging advice, "
+            "'coder' for implementation, 'FINISH' if the task is complete."
+        )
+    )
+    delegation_notes: str = Field(
+        description=(
+            "Brief instructions for the target agent: what to look for (explore), "
+            "what to analyze (oracle), or what to implement (coder).  "
+            "For FINISH, summarize what was accomplished."
+        )
+    )
+
+
+# ---------------------------------------------------------------------------
+# Supervisor system prompt — faithfully mirrors sisyphus/config.yaml
+# ---------------------------------------------------------------------------
+SUPERVISOR_SYSTEM_PROMPT = """\
+You are Sisyphus — an orchestrator that drives coding tasks to completion.
+
+Your job: Classify → Delegate → Verify → Complete.
+
+## Intent Classification (BEFORE every action)
+
+| Type            | Signal                                              | Action               |
+|-----------------|-----------------------------------------------------|----------------------|
+| trivial         | Single file, known location, typo fix               | Route to FINISH      |
+| exploration     | "Find X", "Where is Y", "List all Z"               | Route to explore     |
+| implementation  | "Add feature", "Fix bug", "Write code"              | Route to coder       |
+| architecture    | See oracle triggers below                           | Route to oracle      |
+| ambiguous       | Unclear scope, multiple interpretations             | Route to FINISH with a clarifying question |
+
+## Oracle Triggers (MUST route to oracle when you see these)
+
+Route to oracle ANY time the user asks about:
+- "How should I..." / "What's the best way to..." — design/approach questions
+- "Why does X keep..." / "What's wrong with..." — complex debugging
+- "Should I use X or Y?" — technology or pattern choices
+- "How should this be structured?" — architecture
+- "Review this" / "What do you think of..." — code/design review
+- Tradeoff questions, multi-component questions, vague/open-ended questions
+
+## Agent Specializations
+
+| Agent   | Use For                                       |
+|---------|-----------------------------------------------|
+| explore | Find patterns, understand code, search        |
+| coder   | Write/edit files, implement features          |
+| oracle  | Architecture decisions, complex debugging     |
+
+## Workflow Patterns
+
+### Implementation task: explore → coder
+1. Route to explore to find existing patterns and conventions.
+2. Review explore findings.
+3. Route to coder with a structured prompt including the explore findings.
+4. Verify the coder's output (check for CODER_COMPLETE or CODER_FAILED).
+
+### Architecture question: explore + oracle
+1. Route to explore to find relevant code.
+2. Route to oracle with the explore findings for analysis.
+
+### Simple question: oracle directly
+For pure design/architecture questions, route to oracle directly.
+
+## Rules
+
+1. Always classify before acting.
+2. You are a coordinator, not an implementer.
+3. Route to oracle for ANY design/architecture question.
+4. When routing to coder, include code patterns from explore findings.
+5. Route to FINISH when the task is fully addressed.
+
+## Current State
+
+Iteration: {iteration_count}/{max_iterations}
+Previous agent outputs: {agent_outputs}
+"""
+
+
+def create_supervisor_node(model_name: str = "gpt-4o", temperature: float = 0.1):
+    """
+    Factory that returns a supervisor node function.
+
+    The supervisor uses a capable model for accurate routing.
+
+    Args:
+        model_name: Model identifier.
+        temperature: LLM temperature (low for consistent routing).
+    """
+    llm = ChatOpenAI(model=model_name, temperature=temperature).with_structured_output(
+        RoutingDecision
+    )
+
+    def supervisor_node(
+        state: SisyphusState,
+    ) -> Command[Literal["explore", "oracle", "coder", "__end__"]]:
+        """
+        LangGraph node: the Sisyphus supervisor.
+
+        Classifies the user's intent, decides which agent to route to,
+        and returns a Command that directs graph execution.
+        """
+        iteration = state.get("iteration_count", 0)
+
+        # Safety valve — prevent infinite loops
+        if iteration >= MAX_ITERATIONS:
+            return Command(
+                goto="__end__",
+                update={
+                    "final_output": "Reached maximum iterations.  Here's what was accomplished:\n"
+                    + "\n".join(
+                        f"- {k}: {v[:200]}" for k, v in state.get("agent_outputs", {}).items()
+                    ),
+                },
+            )
+
+        # Format the system prompt with current state
+        prompt = SUPERVISOR_SYSTEM_PROMPT.format(
+            iteration_count=iteration,
+            max_iterations=MAX_ITERATIONS,
+            agent_outputs=_summarize_outputs(state.get("agent_outputs", {})),
+        )
+
+        # Invoke the LLM to get a structured routing decision
+        decision: RoutingDecision = llm.invoke(
+            [SystemMessage(content=prompt)] + state["messages"]
+        )
+
+        # Route to FINISH
+        if decision.next_agent == "FINISH":
+            return Command(
+                goto="__end__",
+                update={
+                    "intent": decision.intent,
+                    "next_agent": "FINISH",
+                    "final_output": decision.delegation_notes,
+                },
+            )
+
+        # Route to a worker agent
+        return Command(
+            goto=decision.next_agent,
+            update={
+                "intent": decision.intent,
+                "next_agent": decision.next_agent,
+                "iteration_count": iteration + 1,
+            },
+        )
+
+    return supervisor_node
+
+
+def _summarize_outputs(outputs: dict[str, str]) -> str:
+    """Summarize agent outputs for the supervisor's context window."""
+    if not outputs:
+        return "(none yet)"
+    parts = []
+    for agent, output in outputs.items():
+        # Truncate long outputs to keep supervisor context manageable
+        # This mirrors Loki's summarization_threshold behavior
+        if len(output) > 2000:
+            output = output[:2000] + "... (truncated)"
+        parts.append(f"[{agent}]: {output}")
+    return "\n\n".join(parts)
				`@@ -0,0 +1 @@`
				`"""Agent node definitions for the Sisyphus orchestrator."""`