From 9bab6a0c2d0e18550f6a8894b6ebdbf22e1a226b Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Wed, 15 Apr 2026 12:47:38 -0600 Subject: [PATCH] Sisyphus agent recreated in LangChain to figure out how it works and how to use it --- examples/langchain-sisyphus/README.md | 416 ++++++++++++++++++ examples/langchain-sisyphus/pyproject.toml | 29 ++ .../sisyphus_langchain/__init__.py | 5 + .../sisyphus_langchain/agents/__init__.py | 1 + .../sisyphus_langchain/agents/coder.py | 145 ++++++ .../sisyphus_langchain/agents/explore.py | 110 +++++ .../sisyphus_langchain/agents/oracle.py | 124 ++++++ .../sisyphus_langchain/agents/supervisor.py | 227 ++++++++++ .../sisyphus_langchain/cli.py | 155 +++++++ .../sisyphus_langchain/graph.py | 115 +++++ .../sisyphus_langchain/state.py | 100 +++++ .../sisyphus_langchain/tools/__init__.py | 1 + .../sisyphus_langchain/tools/filesystem.py | 175 ++++++++ .../sisyphus_langchain/tools/project.py | 142 ++++++ 14 files changed, 1745 insertions(+) create mode 100644 examples/langchain-sisyphus/README.md create mode 100644 examples/langchain-sisyphus/pyproject.toml create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/__init__.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/agents/__init__.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/agents/coder.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/agents/explore.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/agents/oracle.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/agents/supervisor.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/cli.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/graph.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/state.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/tools/__init__.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/tools/filesystem.py create mode 100644 examples/langchain-sisyphus/sisyphus_langchain/tools/project.py diff --git a/examples/langchain-sisyphus/README.md b/examples/langchain-sisyphus/README.md new file mode 100644 index 0000000..120dfc3 --- /dev/null +++ b/examples/langchain-sisyphus/README.md @@ -0,0 +1,416 @@ +# Sisyphus in LangChain/LangGraph + +A faithful recreation of [Loki's Sisyphus agent](../../assets/agents/sisyphus/) using [LangGraph](https://docs.langchain.com/langgraph/) — LangChain's framework for stateful, multi-agent workflows. + +This project exists to help you understand LangChain/LangGraph by mapping every concept to its Loki equivalent. + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────┐ +│ SUPERVISOR NODE │ +│ Intent classification → Routing decision → Command(goto=) │ +│ │ +│ Loki equivalent: sisyphus/config.yaml │ +│ (agent__spawn → Command, agent__collect → graph edge) │ +└──────────┬──────────────┬──────────────┬────────────────────┘ + │ │ │ + ▼ ▼ ▼ + ┌────────────┐ ┌────────────┐ ┌────────────┐ + │ EXPLORE │ │ ORACLE │ │ CODER │ + │ (research) │ │ (advise) │ │ (build) │ + │ │ │ │ │ │ + │ read-only │ │ read-only │ │ read+write │ + │ tools │ │ tools │ │ tools │ + └─────┬──────┘ └─────┬──────┘ └─────┬──────┘ + │ │ │ + └──────────────┼──────────────┘ + │ + back to supervisor +``` + +## Concept Map: Loki → LangGraph + +This is the key reference. Every row maps a Loki concept to its LangGraph equivalent. + +### Core Architecture + +| Loki Concept | LangGraph Equivalent | Where in Code | +|---|---|---| +| Agent config (config.yaml) | Node function + system prompt | `agents/explore.py`, etc. | +| Agent instructions | System prompt string | `EXPLORE_SYSTEM_PROMPT`, etc. | +| Agent tools (tools.sh) | `@tool`-decorated Python functions | `tools/filesystem.py`, `tools/project.py` | +| Agent session (chat loop) | Graph state + message list | `state.py` → `SisyphusState.messages` | +| `agent__spawn --agent X` | `Command(goto="X")` | `agents/supervisor.py` | +| `agent__collect --id` | Graph edge (implicit — workers return to supervisor) | `graph.py` → `add_edge("explore", "supervisor")` | +| `agent__check` (non-blocking) | Not needed (graph handles scheduling) | — | +| `agent__cancel` | Not needed (graph handles lifecycle) | — | +| `can_spawn_agents: true` | Node has routing logic (supervisor) | `agents/supervisor.py` | +| `max_concurrent_agents: 4` | `Send()` API for parallel fan-out | See [Parallel Execution](#parallel-execution) | +| `max_agent_depth: 3` | `recursion_limit` in config | `cli.py` → `recursion_limit: 50` | +| `summarization_threshold` | Manual truncation in supervisor | `supervisor.py` → `_summarize_outputs()` | + +### Tool System + +| Loki Concept | LangGraph Equivalent | Notes | +|---|---|---| +| `tools.sh` with `@cmd` annotations | `@tool` decorator | Loki compiles bash annotations to JSON schema; LangChain generates schema from the Python function signature + docstring | +| `@option --pattern!` (required arg) | Function parameter without default | `def search_content(pattern: str)` | +| `@option --lines` (optional arg) | Parameter with default | `def read_file(path: str, limit: int = 200)` | +| `@env LLM_OUTPUT=/dev/stdout` | Return value | LangChain tools return strings; Loki tools write to `$LLM_OUTPUT` | +| `@describe` | Docstring | The tool's docstring becomes the description the LLM sees | +| Global tools (`fs_read.sh`, etc.) | Shared tool imports | Both agents import from `tools/filesystem.py` | +| Agent-specific tools | Per-node tool binding | `llm.bind_tools(EXPLORE_TOOLS)` vs `llm.bind_tools(CODER_TOOLS)` | +| `.shared/utils.sh` | `tools/project.py` | Shared project detection utilities | +| `detect_project()` heuristic | `detect_project()` in Python | Same logic: check Cargo.toml → go.mod → package.json → etc. | +| LLM fallback for unknown projects | (omitted) | The agents themselves can reason about unknown project types | + +### State & Memory + +| Loki Concept | LangGraph Equivalent | Notes | +|---|---|---| +| Agent session (conversation history) | `SisyphusState.messages` | `Annotated[list, add_messages]` — the reducer appends instead of replacing | +| `agent_session: temp` | `MemorySaver` checkpointer | Loki's temp sessions are ephemeral; MemorySaver is in-memory (lost on restart) | +| Per-agent isolation | Per-node system prompt + tools | In Loki agents have separate sessions; in LangGraph they share messages but have different system prompts | +| `{{project_dir}}` variable | `SisyphusState.project_dir` | Loki interpolates variables into prompts; LangGraph stores them in state | +| `{{__tools__}}` injection | `llm.bind_tools()` | Loki injects tool descriptions into the prompt; LangChain attaches them to the API call | + +### Orchestration + +| Loki Concept | LangGraph Equivalent | Notes | +|---|---|---| +| Intent classification table | `RoutingDecision` structured output | Loki does this in free text; LangGraph forces typed JSON | +| Oracle triggers ("How should I...") | Supervisor prompt + structured output | Same trigger phrases, enforced via system prompt | +| Coder delegation format | Supervisor builds HumanMessage | The structured prompt (Goal/Reference Files/Conventions/Constraints) | +| `agent__spawn` (parallel) | `Send()` API | Dynamic fan-out to multiple nodes | +| Todo system (`todo__init`, etc.) | `SisyphusState.todos` | State field with a merge reducer | +| `auto_continue: true` | Supervisor loop (iteration counter) | Supervisor re-routes until FINISH or max iterations | +| `max_auto_continues: 25` | `MAX_ITERATIONS = 15` | Safety valve to prevent infinite loops | +| `user__ask` / `user__confirm` | `interrupt()` API | Pauses graph, surfaces question to caller, resumes with answer | +| Escalation (child → parent → user) | `interrupt()` in any node | Any node can pause; the caller handles the interaction | + +### Execution Model + +| Loki Concept | LangGraph Equivalent | Notes | +|---|---|---| +| `loki --agent sisyphus` | `python -m sisyphus_langchain.cli` | CLI entry point | +| REPL mode | `cli.py` → `repl()` | Interactive loop with thread persistence | +| One-shot mode | `cli.py` → `run_query()` | Single query, print result, exit | +| Streaming output | `graph.stream()` | LangGraph supports per-node streaming | +| `inject_spawn_instructions` | (always on) | System prompts are always included | +| `inject_todo_instructions` | (always on) | Todo instructions could be added to prompts | + +## How the Execution Flow Works + +### 1. User sends a message + +```python +graph.invoke({"messages": [HumanMessage("Add a health check endpoint")]}) +``` + +### 2. Supervisor classifies intent + +The supervisor LLM reads the message and produces a `RoutingDecision`: +```json +{ + "intent": "implementation", + "next_agent": "explore", + "delegation_notes": "Find existing API endpoint patterns, route structure, and health check conventions" +} +``` + +### 3. Supervisor routes via Command + +```python +return Command(goto="explore", update={"intent": "implementation", "iteration_count": 1}) +``` + +### 4. Explore agent runs + +- Receives the full message history (including the user's request) +- Calls read-only tools (search_content, search_files, read_file) +- Returns findings in messages + +### 5. Control returns to supervisor + +The graph edge `explore → supervisor` fires automatically. + +### 6. Supervisor reviews and routes again + +Now it has explore's findings. It routes to coder with context: +```json +{ + "intent": "implementation", + "next_agent": "coder", + "delegation_notes": "Implement health check endpoint following patterns found in src/routes/" +} +``` + +### 7. Coder implements + +- Reads explore's findings from the message history +- Writes files via `write_file` tool +- Runs `verify_build` to check compilation + +### 8. Supervisor verifies and finishes + +```json +{ + "intent": "implementation", + "next_agent": "FINISH", + "delegation_notes": "Added /health endpoint in src/routes/health.py. Build passes." +} +``` + +## Key Differences from Loki + +### What LangGraph does better + +1. **Declarative graph** — The topology is visible and debuggable. Loki's orchestration is emergent from the LLM's tool calls. +2. **Typed state** — `SisyphusState` is a TypedDict with reducers. Loki's state is implicit in the conversation. +3. **Checkpointing** — Built-in persistence. Loki manages sessions manually. +4. **Time-travel debugging** — Inspect any checkpoint. Loki has no equivalent. +5. **Structured routing** — `RoutingDecision` forces valid JSON. Loki relies on the LLM calling the right tool. + +### What Loki does better + +1. **True parallelism** — `agent__spawn` runs multiple agents concurrently in separate threads. This LangGraph implementation is sequential (see [Parallel Execution](#parallel-execution) for how to add it). +2. **Agent isolation** — Each Loki agent has its own session, tools, and config. LangGraph nodes share state. +3. **Teammate messaging** — Loki agents can send messages to siblings. LangGraph nodes communicate only through shared state. +4. **Dynamic tool compilation** — Loki compiles bash/python/typescript tools at startup. LangChain tools are statically defined. +5. **Escalation protocol** — Loki's child-to-parent escalation is sophisticated. LangGraph's `interrupt()` is simpler but less structured. +6. **Task queues with dependencies** — Loki's `agent__task_create` supports dependency DAGs. LangGraph's routing is simpler (hub-and-spoke). + +## Running It + +### Prerequisites + +```bash +# Python 3.11+ +python --version + +# Set your API key +export OPENAI_API_KEY="sk-..." +``` + +### Install + +```bash +cd examples/langchain-sisyphus + +# With pip +pip install -e . + +# Or with uv (recommended) +uv pip install -e . +``` + +### Usage + +```bash +# Interactive REPL (like `loki --agent sisyphus`) +sisyphus + +# One-shot query +sisyphus "Find all TODO comments in the codebase" + +# With custom models (cost optimization) +sisyphus --explore-model gpt-4o-mini --coder-model gpt-4o "Add input validation to the API" + +# Programmatic usage +python -c " +from sisyphus_langchain import build_graph +from langchain_core.messages import HumanMessage + +graph = build_graph() +result = graph.invoke({ + 'messages': [HumanMessage('What patterns does this codebase use?')], + 'intent': 'ambiguous', + 'next_agent': '', + 'iteration_count': 0, + 'todos': [], + 'agent_outputs': {}, + 'final_output': '', + 'project_dir': '.', +}, config={'configurable': {'thread_id': 'demo'}, 'recursion_limit': 50}) +print(result['final_output']) +" +``` + +### Using Anthropic Models + +Replace `ChatOpenAI` with `ChatAnthropic` in the agent factories: + +```python +from langchain_anthropic import ChatAnthropic + +# In agents/oracle.py: +llm = ChatAnthropic(model="claude-sonnet-4-20250514", temperature=0.2).bind_tools(ORACLE_TOOLS) +``` + +## Deployment + +### Option 1: Standalone Script (Simplest) + +Just run the CLI directly. No infrastructure needed. + +```bash +sisyphus "Add a health check endpoint" +``` + +### Option 2: FastAPI Server + +```python +# server.py +from fastapi import FastAPI +from langserve import add_routes +from sisyphus_langchain import build_graph + +app = FastAPI(title="Sisyphus API") +graph = build_graph() +add_routes(app, graph, path="/agent") + +# Run: uvicorn server:app --host 0.0.0.0 --port 8000 +# Call: POST http://localhost:8000/agent/invoke +``` + +### Option 3: LangGraph Platform (Production) + +Create a `langgraph.json` at the project root: + +```json +{ + "graphs": { + "sisyphus": "./sisyphus_langchain/graph.py:build_graph" + }, + "dependencies": ["./sisyphus_langchain"], + "env": ".env" +} +``` + +Then deploy: +```bash +pip install langgraph-cli +langgraph deploy +``` + +This gives you: +- Durable checkpointing (PostgreSQL) +- Background runs +- Streaming API +- Zero-downtime deployments +- Built-in observability + +### Option 4: Docker + +```dockerfile +FROM python:3.12-slim +WORKDIR /app +COPY . . +RUN pip install -e . +CMD ["sisyphus"] +``` + +```bash +docker build -t sisyphus . +docker run -it -e OPENAI_API_KEY=$OPENAI_API_KEY sisyphus +``` + +## Parallel Execution + +This implementation routes sequentially for simplicity. To add Loki-style parallel agent execution, use LangGraph's `Send()` API: + +```python +from langgraph.types import Send + +def supervisor_node(state): + # Fan out to multiple explore agents in parallel + # (like Loki's agent__spawn called multiple times) + return [ + Send("explore", { + **state, + "messages": state["messages"] + [ + HumanMessage("Find existing API endpoint patterns") + ], + }), + Send("explore", { + **state, + "messages": state["messages"] + [ + HumanMessage("Find data models and database patterns") + ], + }), + ] +``` + +This is equivalent to Loki's pattern of spawning multiple explore agents: +``` +agent__spawn --agent explore --prompt "Find API patterns" +agent__spawn --agent explore --prompt "Find database patterns" +agent__collect --id +agent__collect --id +``` + +## Adding Human-in-the-Loop + +To replicate Loki's `user__ask` / `user__confirm` tools, use LangGraph's `interrupt()`: + +```python +from langgraph.types import interrupt + +def supervisor_node(state): + # Pause and ask the user (like Loki's user__ask) + answer = interrupt({ + "question": "How should we structure the authentication?", + "options": [ + "JWT with httpOnly cookies (Recommended)", + "Session-based with Redis", + "OAuth2 with external provider", + ], + }) + # `answer` contains the user's selection when the graph resumes +``` + +## Project Structure + +``` +examples/langchain-sisyphus/ +├── pyproject.toml # Dependencies & build config +├── README.md # This file +└── sisyphus_langchain/ + ├── __init__.py # Package entry point + ├── cli.py # CLI (REPL + one-shot mode) + ├── graph.py # Graph assembly (wires nodes + edges) + ├── state.py # Shared state schema (TypedDict) + ├── agents/ + │ ├── __init__.py + │ ├── supervisor.py # Sisyphus orchestrator (intent → routing) + │ ├── explore.py # Read-only codebase researcher + │ ├── oracle.py # Architecture/debugging advisor + │ └── coder.py # Implementation worker + └── tools/ + ├── __init__.py + ├── filesystem.py # File read/write/search/glob tools + └── project.py # Project detection, build, test tools +``` + +### File-to-Loki Mapping + +| This Project | Loki Equivalent | +|---|---| +| `state.py` | Session context + todo state (implicit in Loki) | +| `graph.py` | `src/supervisor/mod.rs` (runtime orchestration) | +| `cli.py` | `src/main.rs` (CLI entry point) | +| `agents/supervisor.py` | `assets/agents/sisyphus/config.yaml` | +| `agents/explore.py` | `assets/agents/explore/config.yaml` + `tools.sh` | +| `agents/oracle.py` | `assets/agents/oracle/config.yaml` + `tools.sh` | +| `agents/coder.py` | `assets/agents/coder/config.yaml` + `tools.sh` | +| `tools/filesystem.py` | `assets/functions/tools/fs_*.sh` | +| `tools/project.py` | `assets/agents/.shared/utils.sh` + `sisyphus/tools.sh` | + +## Further Reading + +- [LangGraph Documentation](https://docs.langchain.com/langgraph/) +- [LangGraph Multi-Agent Tutorial](https://docs.langchain.com/langgraph/how-tos/multi-agent-systems) +- [Loki Agents Documentation](../../docs/AGENTS.md) +- [Loki Sisyphus README](../../assets/agents/sisyphus/README.md) +- [LangGraph Supervisor Library](https://github.com/langchain-ai/langgraph-supervisor-py) diff --git a/examples/langchain-sisyphus/pyproject.toml b/examples/langchain-sisyphus/pyproject.toml new file mode 100644 index 0000000..8012272 --- /dev/null +++ b/examples/langchain-sisyphus/pyproject.toml @@ -0,0 +1,29 @@ +[project] +name = "sisyphus-langchain" +version = "0.1.0" +description = "Loki's Sisyphus multi-agent orchestrator recreated in LangChain/LangGraph" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "langgraph>=0.3.0", + "langchain>=0.3.0", + "langchain-openai>=0.3.0", + "langchain-anthropic>=0.3.0", + "langchain-core>=0.3.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0", + "ruff>=0.8.0", +] +server = [ + "langgraph-api>=0.1.0", +] + +[project.scripts] +sisyphus = "sisyphus_langchain.cli:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/examples/langchain-sisyphus/sisyphus_langchain/__init__.py b/examples/langchain-sisyphus/sisyphus_langchain/__init__.py new file mode 100644 index 0000000..9653e84 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/__init__.py @@ -0,0 +1,5 @@ +"""Sisyphus multi-agent orchestrator — a LangGraph recreation of Loki's Sisyphus agent.""" + +from sisyphus_langchain.graph import build_graph + +__all__ = ["build_graph"] diff --git a/examples/langchain-sisyphus/sisyphus_langchain/agents/__init__.py b/examples/langchain-sisyphus/sisyphus_langchain/agents/__init__.py new file mode 100644 index 0000000..b1dba46 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/agents/__init__.py @@ -0,0 +1 @@ +"""Agent node definitions for the Sisyphus orchestrator.""" diff --git a/examples/langchain-sisyphus/sisyphus_langchain/agents/coder.py b/examples/langchain-sisyphus/sisyphus_langchain/agents/coder.py new file mode 100644 index 0000000..56faf64 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/agents/coder.py @@ -0,0 +1,145 @@ +""" +Coder agent node — the implementation worker. + +Loki equivalent: assets/agents/coder/config.yaml + tools.sh + +In Loki, the coder is the ONLY agent that modifies files. It: + - Receives a structured prompt from sisyphus with code patterns to follow + - Writes files via the write_file tool (never pastes code in chat) + - Verifies builds after every change + - Signals CODER_COMPLETE or CODER_FAILED + +In LangGraph, coder is a node with write-capable tools (read_file, write_file, +search_content, execute_command, verify_build). The supervisor formats a +structured delegation prompt (Goal / Reference Files / Code Patterns / +Conventions / Constraints) and routes to this node. + +Key Loki→LangGraph mapping: + - Loki's "Coder Delegation Format" → the supervisor builds this as a + HumanMessage before routing to the coder node. + - Loki's auto_continue (up to 15) → the supervisor can re-route to coder + if verification fails, up to iteration_count limits. + - Loki's todo system for multi-file changes → the coder updates + state["todos"] as it completes each file. +""" + +from __future__ import annotations + +from langchain_core.messages import SystemMessage +from langchain_openai import ChatOpenAI + +from sisyphus_langchain.state import SisyphusState +from sisyphus_langchain.tools.filesystem import ( + read_file, + search_content, + search_files, + write_file, +) +from sisyphus_langchain.tools.project import ( + execute_command, + run_tests, + verify_build, +) + +# --------------------------------------------------------------------------- +# System prompt — faithfully mirrors coder/config.yaml +# --------------------------------------------------------------------------- +CODER_SYSTEM_PROMPT = """\ +You are a senior engineer. You write code that works on the first try. + +## Your Mission + +Given an implementation task: +1. Check for context provided in the conversation (patterns, conventions, reference files). +2. Fill gaps only — read files NOT already covered in context. +3. Write the code using the write_file tool (NEVER output code in chat). +4. Verify it compiles/builds using verify_build. +5. Provide a summary of what you implemented. + +## Using Provided Context (IMPORTANT) + +Your prompt often contains prior findings from the explore agent: file paths, +code patterns, and conventions. + +**If context is provided:** +1. Use it as your primary reference. Don't re-read files already summarized. +2. Follow the code patterns shown — snippets in context ARE the style guide. +3. Read referenced files ONLY IF you need more detail (full signatures, imports). +4. If context includes a "Conventions" section, follow it exactly. + +**If context is NOT provided or is too vague:** +Fall back to self-exploration: search for similar files, read 1-2 examples, +match their style. + +## Writing Code + +CRITICAL: Write code using the write_file tool. NEVER paste code in chat. + +## Pattern Matching + +Before writing ANY file: +1. Find a similar existing file. +2. Match its style: imports, naming, structure. +3. Follow the same patterns exactly. + +## Verification + +After writing files: +1. Run verify_build to check compilation. +2. If it fails, fix the error (minimal change). +3. Don't move on until build passes. + +## Rules + +1. Write code via tools — never output code to chat. +2. Follow patterns — read existing files first. +3. Verify builds — don't finish without checking. +4. Minimal fixes — if build fails, fix precisely. +5. No refactoring — only implement what's asked. +""" + +# Full tool set — coder gets write access and command execution +CODER_TOOLS = [ + read_file, + write_file, + search_content, + search_files, + execute_command, + verify_build, + run_tests, +] + + +def create_coder_node(model_name: str = "gpt-4o", temperature: float = 0.1): + """ + Factory that returns a coder node function. + + Coder needs a capable model because it writes production code. In Loki, + coder uses the same model as the parent by default. + + Args: + model_name: Model identifier. + temperature: LLM temperature (Loki coder uses 0.1 for consistency). + """ + llm = ChatOpenAI(model=model_name, temperature=temperature).bind_tools(CODER_TOOLS) + + def coder_node(state: SisyphusState) -> dict: + """ + LangGraph node: run the coder agent. + + Reads conversation history (including the supervisor's structured + delegation prompt), invokes the LLM with write-capable tools, + and returns the result. + """ + response = llm.invoke( + [SystemMessage(content=CODER_SYSTEM_PROMPT)] + state["messages"] + ) + return { + "messages": [response], + "agent_outputs": { + **state.get("agent_outputs", {}), + "coder": response.content, + }, + } + + return coder_node diff --git a/examples/langchain-sisyphus/sisyphus_langchain/agents/explore.py b/examples/langchain-sisyphus/sisyphus_langchain/agents/explore.py new file mode 100644 index 0000000..7859396 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/agents/explore.py @@ -0,0 +1,110 @@ +""" +Explore agent node — the read-only codebase researcher. + +Loki equivalent: assets/agents/explore/config.yaml + tools.sh + +In Loki, the explore agent is spawned via `agent__spawn --agent explore --prompt "..."` +and runs as an isolated subprocess with its own session. It ends with +"EXPLORE_COMPLETE" so the parent knows it's finished. + +In LangGraph, the explore agent is a *node* in the graph. The supervisor routes +to it via `Command(goto="explore")`. It reads the latest message (the supervisor's +delegation prompt), calls the LLM with read-only tools, and writes its findings +back to the shared message list. The graph edge then returns control to the +supervisor. + +Key differences from Loki: + - No isolated session — shares the graph's message list (but has its own + system prompt and tool set, just like Loki's per-agent config). + - No "EXPLORE_COMPLETE" sentinel — the graph edge handles control flow. + - No output summarization — LangGraph's state handles context management. +""" + +from __future__ import annotations + +from langchain_core.messages import SystemMessage +from langchain_openai import ChatOpenAI + +from sisyphus_langchain.state import SisyphusState +from sisyphus_langchain.tools.filesystem import ( + list_directory, + read_file, + search_content, + search_files, +) + +# --------------------------------------------------------------------------- +# System prompt — faithfully mirrors explore/config.yaml +# --------------------------------------------------------------------------- +EXPLORE_SYSTEM_PROMPT = """\ +You are a codebase explorer. Your job: Search, find, report. Nothing else. + +## Your Mission + +Given a search task, you: +1. Search for relevant files and patterns +2. Read key files to understand structure +3. Report findings concisely + +## Strategy + +1. **Find first, read second** — Never read a file without knowing why. +2. **Use search_content to locate** — find exactly where things are defined. +3. **Use search_files to discover** — find files by name pattern. +4. **Read targeted sections** — use offset and limit to read only relevant lines. +5. **Never read entire large files** — if a file is 500+ lines, read the relevant section only. + +## Output Format + +Always end your response with a structured findings summary: + +FINDINGS: +- [Key finding 1] +- [Key finding 2] +- Relevant files: [list of paths] + +## Rules + +1. Be fast — don't read every file, read representative ones. +2. Be focused — answer the specific question asked. +3. Be concise — report findings, not your process. +4. Never modify files — you are read-only. +5. Limit reads — max 5 file reads per exploration. +""" + +# Read-only tools — mirrors explore's tool set (no write_file, no execute_command) +EXPLORE_TOOLS = [read_file, search_content, search_files, list_directory] + + +def create_explore_node(model_name: str = "gpt-4o-mini", temperature: float = 0.1): + """ + Factory that returns an explore node function bound to a specific model. + + In Loki, the model is set per-agent in config.yaml. Here we parameterize it + so you can use a cheap model for exploration (cost optimization). + + Args: + model_name: OpenAI model identifier. + temperature: LLM temperature (Loki explore uses 0.1). + """ + llm = ChatOpenAI(model=model_name, temperature=temperature).bind_tools(EXPLORE_TOOLS) + + def explore_node(state: SisyphusState) -> dict: + """ + LangGraph node: run the explore agent. + + Reads the conversation history, applies the explore system prompt, + invokes the LLM with read-only tools, and returns the response. + """ + response = llm.invoke( + [SystemMessage(content=EXPLORE_SYSTEM_PROMPT)] + state["messages"] + ) + return { + "messages": [response], + "agent_outputs": { + **state.get("agent_outputs", {}), + "explore": response.content, + }, + } + + return explore_node diff --git a/examples/langchain-sisyphus/sisyphus_langchain/agents/oracle.py b/examples/langchain-sisyphus/sisyphus_langchain/agents/oracle.py new file mode 100644 index 0000000..aa1dfd0 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/agents/oracle.py @@ -0,0 +1,124 @@ +""" +Oracle agent node — the high-IQ architecture and debugging advisor. + +Loki equivalent: assets/agents/oracle/config.yaml + tools.sh + +In Loki, the oracle is a READ-ONLY advisor spawned for: + - Architecture decisions and multi-system tradeoffs + - Complex debugging (after 2+ failed fix attempts) + - Code/design review + - Risk assessment + +It uses temperature 0.2 (slightly higher than explore/coder for more creative +reasoning) and ends with "ORACLE_COMPLETE". + +In LangGraph, oracle is a node that receives the full message history, reasons +about the problem, and writes structured advice back. It has read-only tools +only — it never modifies files. + +Key Loki→LangGraph mapping: + - Loki oracle triggers (the "MUST spawn oracle when..." rules in sisyphus) + become routing conditions in the supervisor node. + - Oracle's structured output format (Analysis/Recommendation/Reasoning/Risks) + is enforced via the system prompt, same as in Loki. +""" + +from __future__ import annotations + +from langchain_core.messages import SystemMessage +from langchain_openai import ChatOpenAI + +from sisyphus_langchain.state import SisyphusState +from sisyphus_langchain.tools.filesystem import ( + list_directory, + read_file, + search_content, + search_files, +) + +# --------------------------------------------------------------------------- +# System prompt — faithfully mirrors oracle/config.yaml +# --------------------------------------------------------------------------- +ORACLE_SYSTEM_PROMPT = """\ +You are Oracle — a senior architect and debugger consulted for complex decisions. + +## Your Role + +You are READ-ONLY. You analyze, advise, and recommend. You do NOT implement. + +## When You're Consulted + +1. **Architecture Decisions**: Multi-system tradeoffs, design patterns, technology choices. +2. **Complex Debugging**: After 2+ failed fix attempts, deep analysis needed. +3. **Code Review**: Evaluating proposed designs or implementations. +4. **Risk Assessment**: Security, performance, or reliability concerns. + +## Your Process + +1. **Understand**: Read relevant code, understand the full context. +2. **Analyze**: Consider multiple angles and tradeoffs. +3. **Recommend**: Provide clear, actionable advice. +4. **Justify**: Explain your reasoning. + +## Output Format + +Structure your response as: + +## Analysis +[Your understanding of the situation] + +## Recommendation +[Clear, specific advice] + +## Reasoning +[Why this is the right approach] + +## Risks/Considerations +[What to watch out for] + +## Rules + +1. Never modify files — you advise, others implement. +2. Be thorough — read all relevant context before advising. +3. Be specific — general advice isn't helpful. +4. Consider tradeoffs — there are rarely perfect solutions. +5. Stay focused — answer the specific question asked. +""" + +# Read-only tools — same set as explore (oracle never writes) +ORACLE_TOOLS = [read_file, search_content, search_files, list_directory] + + +def create_oracle_node(model_name: str = "gpt-4o", temperature: float = 0.2): + """ + Factory that returns an oracle node function. + + Oracle uses a more expensive model than explore because it needs deeper + reasoning. In Loki, the model is inherited from the global config unless + overridden in oracle/config.yaml. + + Args: + model_name: Model identifier (use a strong reasoning model). + temperature: LLM temperature (Loki oracle uses 0.2). + """ + llm = ChatOpenAI(model=model_name, temperature=temperature).bind_tools(ORACLE_TOOLS) + + def oracle_node(state: SisyphusState) -> dict: + """ + LangGraph node: run the oracle agent. + + Reads conversation history, applies the oracle system prompt, + invokes the LLM, and returns structured advice. + """ + response = llm.invoke( + [SystemMessage(content=ORACLE_SYSTEM_PROMPT)] + state["messages"] + ) + return { + "messages": [response], + "agent_outputs": { + **state.get("agent_outputs", {}), + "oracle": response.content, + }, + } + + return oracle_node diff --git a/examples/langchain-sisyphus/sisyphus_langchain/agents/supervisor.py b/examples/langchain-sisyphus/sisyphus_langchain/agents/supervisor.py new file mode 100644 index 0000000..7fb4771 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/agents/supervisor.py @@ -0,0 +1,227 @@ +""" +Sisyphus supervisor node — the orchestrator that classifies intent and routes. + +Loki equivalent: assets/agents/sisyphus/config.yaml + +This is the brain of the system. In Loki, Sisyphus is the top-level agent that: + 1. Classifies every incoming request (trivial / exploration / implementation / + architecture / ambiguous) + 2. Routes to the appropriate sub-agent (explore, coder, oracle) + 3. Manages the todo list for multi-step tasks + 4. Verifies results and decides when the task is complete + +In LangGraph, the supervisor is a node that returns `Command(goto="agent_name")` +to route control. This replaces Loki's `agent__spawn` + `agent__collect` pattern +with a declarative graph edge. + +Key Loki→LangGraph mapping: + - agent__spawn --agent explore → Command(goto="explore") + - agent__spawn --agent coder → Command(goto="coder") + - agent__spawn --agent oracle → Command(goto="oracle") + - agent__check / agent__collect → (implicit: graph edges return to supervisor) + - todo__init / todo__add → state["todos"] updates + - user__ask / user__confirm → interrupt() for human-in-the-loop + +Parallel execution note: + Loki can spawn multiple explore agents in parallel. In LangGraph, you'd use + the Send() API for dynamic fan-out. For simplicity, this implementation uses + sequential routing. See the README for how to add parallel fan-out. +""" + +from __future__ import annotations + +from typing import Literal + +from langchain_core.messages import SystemMessage +from langchain_openai import ChatOpenAI +from langgraph.types import Command +from pydantic import BaseModel, Field + +from sisyphus_langchain.state import SisyphusState + +# --------------------------------------------------------------------------- +# Maximum iterations before forcing completion (safety valve) +# Mirrors Loki's max_auto_continues: 25 +# --------------------------------------------------------------------------- +MAX_ITERATIONS = 15 + +# --------------------------------------------------------------------------- +# Structured output schema for the supervisor's routing decision. +# +# In Loki, the supervisor is an LLM that produces free-text and calls tools +# like agent__spawn. In LangGraph, we use structured output to force the +# LLM into a typed routing decision — more reliable than parsing free text. +# --------------------------------------------------------------------------- +class RoutingDecision(BaseModel): + """The supervisor's decision about what to do next.""" + + intent: Literal["trivial", "exploration", "implementation", "architecture", "ambiguous"] = Field( + description="Classified intent of the user's request." + ) + next_agent: Literal["explore", "oracle", "coder", "FINISH"] = Field( + description=( + "Which agent to route to. 'explore' for research/discovery, " + "'oracle' for architecture/design/debugging advice, " + "'coder' for implementation, 'FINISH' if the task is complete." + ) + ) + delegation_notes: str = Field( + description=( + "Brief instructions for the target agent: what to look for (explore), " + "what to analyze (oracle), or what to implement (coder). " + "For FINISH, summarize what was accomplished." + ) + ) + + +# --------------------------------------------------------------------------- +# Supervisor system prompt — faithfully mirrors sisyphus/config.yaml +# --------------------------------------------------------------------------- +SUPERVISOR_SYSTEM_PROMPT = """\ +You are Sisyphus — an orchestrator that drives coding tasks to completion. + +Your job: Classify → Delegate → Verify → Complete. + +## Intent Classification (BEFORE every action) + +| Type | Signal | Action | +|-----------------|-----------------------------------------------------|----------------------| +| trivial | Single file, known location, typo fix | Route to FINISH | +| exploration | "Find X", "Where is Y", "List all Z" | Route to explore | +| implementation | "Add feature", "Fix bug", "Write code" | Route to coder | +| architecture | See oracle triggers below | Route to oracle | +| ambiguous | Unclear scope, multiple interpretations | Route to FINISH with a clarifying question | + +## Oracle Triggers (MUST route to oracle when you see these) + +Route to oracle ANY time the user asks about: +- "How should I..." / "What's the best way to..." — design/approach questions +- "Why does X keep..." / "What's wrong with..." — complex debugging +- "Should I use X or Y?" — technology or pattern choices +- "How should this be structured?" — architecture +- "Review this" / "What do you think of..." — code/design review +- Tradeoff questions, multi-component questions, vague/open-ended questions + +## Agent Specializations + +| Agent | Use For | +|---------|-----------------------------------------------| +| explore | Find patterns, understand code, search | +| coder | Write/edit files, implement features | +| oracle | Architecture decisions, complex debugging | + +## Workflow Patterns + +### Implementation task: explore → coder +1. Route to explore to find existing patterns and conventions. +2. Review explore findings. +3. Route to coder with a structured prompt including the explore findings. +4. Verify the coder's output (check for CODER_COMPLETE or CODER_FAILED). + +### Architecture question: explore + oracle +1. Route to explore to find relevant code. +2. Route to oracle with the explore findings for analysis. + +### Simple question: oracle directly +For pure design/architecture questions, route to oracle directly. + +## Rules + +1. Always classify before acting. +2. You are a coordinator, not an implementer. +3. Route to oracle for ANY design/architecture question. +4. When routing to coder, include code patterns from explore findings. +5. Route to FINISH when the task is fully addressed. + +## Current State + +Iteration: {iteration_count}/{max_iterations} +Previous agent outputs: {agent_outputs} +""" + + +def create_supervisor_node(model_name: str = "gpt-4o", temperature: float = 0.1): + """ + Factory that returns a supervisor node function. + + The supervisor uses a capable model for accurate routing. + + Args: + model_name: Model identifier. + temperature: LLM temperature (low for consistent routing). + """ + llm = ChatOpenAI(model=model_name, temperature=temperature).with_structured_output( + RoutingDecision + ) + + def supervisor_node( + state: SisyphusState, + ) -> Command[Literal["explore", "oracle", "coder", "__end__"]]: + """ + LangGraph node: the Sisyphus supervisor. + + Classifies the user's intent, decides which agent to route to, + and returns a Command that directs graph execution. + """ + iteration = state.get("iteration_count", 0) + + # Safety valve — prevent infinite loops + if iteration >= MAX_ITERATIONS: + return Command( + goto="__end__", + update={ + "final_output": "Reached maximum iterations. Here's what was accomplished:\n" + + "\n".join( + f"- {k}: {v[:200]}" for k, v in state.get("agent_outputs", {}).items() + ), + }, + ) + + # Format the system prompt with current state + prompt = SUPERVISOR_SYSTEM_PROMPT.format( + iteration_count=iteration, + max_iterations=MAX_ITERATIONS, + agent_outputs=_summarize_outputs(state.get("agent_outputs", {})), + ) + + # Invoke the LLM to get a structured routing decision + decision: RoutingDecision = llm.invoke( + [SystemMessage(content=prompt)] + state["messages"] + ) + + # Route to FINISH + if decision.next_agent == "FINISH": + return Command( + goto="__end__", + update={ + "intent": decision.intent, + "next_agent": "FINISH", + "final_output": decision.delegation_notes, + }, + ) + + # Route to a worker agent + return Command( + goto=decision.next_agent, + update={ + "intent": decision.intent, + "next_agent": decision.next_agent, + "iteration_count": iteration + 1, + }, + ) + + return supervisor_node + + +def _summarize_outputs(outputs: dict[str, str]) -> str: + """Summarize agent outputs for the supervisor's context window.""" + if not outputs: + return "(none yet)" + parts = [] + for agent, output in outputs.items(): + # Truncate long outputs to keep supervisor context manageable + # This mirrors Loki's summarization_threshold behavior + if len(output) > 2000: + output = output[:2000] + "... (truncated)" + parts.append(f"[{agent}]: {output}") + return "\n\n".join(parts) diff --git a/examples/langchain-sisyphus/sisyphus_langchain/cli.py b/examples/langchain-sisyphus/sisyphus_langchain/cli.py new file mode 100644 index 0000000..3c1387f --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/cli.py @@ -0,0 +1,155 @@ +""" +CLI entry point for the Sisyphus LangChain agent. + +This mirrors Loki's `loki --agent sisyphus` entry point. + +In Loki: + loki --agent sisyphus + # Starts a REPL with the sisyphus agent loaded + +In this LangChain version: + python -m sisyphus_langchain.cli + # or: sisyphus (if installed via pip) + +Usage: + # Interactive REPL mode + sisyphus + + # One-shot query + sisyphus "Add a health check endpoint to the API" + + # With custom models + sisyphus --supervisor-model gpt-4o --explore-model gpt-4o-mini "Find auth patterns" + +Environment variables: + OPENAI_API_KEY — Required for OpenAI models + ANTHROPIC_API_KEY — Required if using Anthropic models +""" + +from __future__ import annotations + +import argparse +import sys +import uuid + +from langchain_core.messages import HumanMessage + +from sisyphus_langchain.graph import build_graph + + +def run_query(graph, query: str, thread_id: str) -> str: + """ + Run a single query through the Sisyphus graph. + + Args: + graph: Compiled LangGraph. + query: User's natural language request. + thread_id: Session identifier for checkpointing. + + Returns: + The final output string. + """ + result = graph.invoke( + { + "messages": [HumanMessage(content=query)], + "intent": "ambiguous", + "next_agent": "", + "iteration_count": 0, + "todos": [], + "agent_outputs": {}, + "final_output": "", + "project_dir": ".", + }, + config={ + "configurable": {"thread_id": thread_id}, + "recursion_limit": 50, + }, + ) + return result.get("final_output", "(no output)") + + +def repl(graph, thread_id: str) -> None: + """ + Interactive REPL loop — mirrors Loki's REPL mode. + + Maintains conversation across turns via the thread_id (checkpointer). + """ + print("Sisyphus (LangChain) — type 'quit' to exit") + print("=" * 50) + + while True: + try: + query = input("\n> ").strip() + except (EOFError, KeyboardInterrupt): + print("\nBye.") + break + + if not query: + continue + if query.lower() in ("quit", "exit", "q"): + print("Bye.") + break + + try: + output = run_query(graph, query, thread_id) + print(f"\n{output}") + except Exception as e: + print(f"\nError: {e}") + + +def main() -> None: + """CLI entry point.""" + parser = argparse.ArgumentParser( + description="Sisyphus — multi-agent coding orchestrator (LangChain edition)" + ) + parser.add_argument( + "query", + nargs="?", + help="One-shot query (omit for REPL mode)", + ) + parser.add_argument( + "--supervisor-model", + default="gpt-4o", + help="Model for the supervisor (default: gpt-4o)", + ) + parser.add_argument( + "--explore-model", + default="gpt-4o-mini", + help="Model for the explore agent (default: gpt-4o-mini)", + ) + parser.add_argument( + "--oracle-model", + default="gpt-4o", + help="Model for the oracle agent (default: gpt-4o)", + ) + parser.add_argument( + "--coder-model", + default="gpt-4o", + help="Model for the coder agent (default: gpt-4o)", + ) + parser.add_argument( + "--thread-id", + default=None, + help="Session thread ID for persistence (auto-generated if omitted)", + ) + + args = parser.parse_args() + + graph = build_graph( + supervisor_model=args.supervisor_model, + explore_model=args.explore_model, + oracle_model=args.oracle_model, + coder_model=args.coder_model, + ) + + thread_id = args.thread_id or f"sisyphus-{uuid.uuid4().hex[:8]}" + + if args.query: + output = run_query(graph, args.query, thread_id) + print(output) + else: + repl(graph, thread_id) + + +if __name__ == "__main__": + main() diff --git a/examples/langchain-sisyphus/sisyphus_langchain/graph.py b/examples/langchain-sisyphus/sisyphus_langchain/graph.py new file mode 100644 index 0000000..a267e8a --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/graph.py @@ -0,0 +1,115 @@ +""" +Graph assembly — wires together the supervisor and worker nodes. + +This is the LangGraph equivalent of Loki's runtime agent execution engine +(src/supervisor/mod.rs + src/config/request_context.rs). + +In Loki, the runtime: + 1. Loads the agent config (config.yaml) + 2. Compiles tools (tools.sh → binary) + 3. Starts a chat loop: user → LLM → tool calls → LLM → ... + 4. For orchestrators with can_spawn_agents: true, the supervisor module + manages child agent lifecycle (spawn, check, collect, cancel). + +In LangGraph, all of this is declarative: + 1. Define nodes (supervisor, explore, oracle, coder) + 2. Define edges (workers always return to supervisor) + 3. Compile the graph (with optional checkpointer for persistence) + 4. Invoke with initial state + +The graph topology: + + ┌─────────────────────────────────────────────┐ + │ SUPERVISOR │ + │ (classifies intent, routes to workers) │ + └─────┬──────────┬──────────┬─────────────────┘ + │ │ │ + ▼ ▼ ▼ + ┌────────┐ ┌────────┐ ┌────────┐ + │EXPLORE │ │ ORACLE │ │ CODER │ + │(search)│ │(advise)│ │(build) │ + └───┬────┘ └───┬────┘ └───┬────┘ + │ │ │ + └──────────┼──────────┘ + │ + (back to supervisor) + +Every worker returns to the supervisor. The supervisor decides what to do next: +route to another worker, or end the graph. +""" + +from __future__ import annotations + +from langgraph.checkpoint.memory import MemorySaver +from langgraph.graph import END, START, StateGraph + +from sisyphus_langchain.agents.coder import create_coder_node +from sisyphus_langchain.agents.explore import create_explore_node +from sisyphus_langchain.agents.oracle import create_oracle_node +from sisyphus_langchain.agents.supervisor import create_supervisor_node +from sisyphus_langchain.state import SisyphusState + + +def build_graph( + *, + supervisor_model: str = "gpt-4o", + explore_model: str = "gpt-4o-mini", + oracle_model: str = "gpt-4o", + coder_model: str = "gpt-4o", + use_checkpointer: bool = True, +): + """ + Build and compile the Sisyphus LangGraph. + + This is the main entry point for creating the agent system. It wires + together all nodes and edges, optionally adds a checkpointer for + persistence, and returns a compiled graph ready to invoke. + + Args: + supervisor_model: Model for the routing supervisor. + explore_model: Model for the explore agent (can be cheaper). + oracle_model: Model for the oracle agent (should be strong). + coder_model: Model for the coder agent. + use_checkpointer: Whether to add MemorySaver for session persistence. + + Returns: + A compiled LangGraph ready to .invoke() or .stream(). + + Model cost optimization (mirrors Loki's per-agent model config): + - supervisor: expensive (accurate routing is critical) + - explore: cheap (just searching, not reasoning deeply) + - oracle: expensive (deep reasoning, architecture advice) + - coder: expensive (writing correct code matters) + """ + # Create the graph builder with our typed state + builder = StateGraph(SisyphusState) + + # ── Register nodes ───────────────────────────────────────────────── + # Each node is a function that takes state and returns state updates. + # This mirrors Loki's agent registration (agents are discovered by + # their config.yaml in the agents/ directory). + builder.add_node("supervisor", create_supervisor_node(supervisor_model)) + builder.add_node("explore", create_explore_node(explore_model)) + builder.add_node("oracle", create_oracle_node(oracle_model)) + builder.add_node("coder", create_coder_node(coder_model)) + + # ── Define edges ─────────────────────────────────────────────────── + # Entry point: every invocation starts at the supervisor + builder.add_edge(START, "supervisor") + + # Workers always return to supervisor (the hub-and-spoke pattern). + # In Loki, this is implicit: agent__collect returns output to the parent, + # and the parent (sisyphus) decides what to do next. + builder.add_edge("explore", "supervisor") + builder.add_edge("oracle", "supervisor") + builder.add_edge("coder", "supervisor") + + # The supervisor node itself uses Command(goto=...) to route, + # so we don't need add_conditional_edges — the Command API + # handles dynamic routing internally. + + # ── Compile ──────────────────────────────────────────────────────── + checkpointer = MemorySaver() if use_checkpointer else None + graph = builder.compile(checkpointer=checkpointer) + + return graph diff --git a/examples/langchain-sisyphus/sisyphus_langchain/state.py b/examples/langchain-sisyphus/sisyphus_langchain/state.py new file mode 100644 index 0000000..52f788a --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/state.py @@ -0,0 +1,100 @@ +""" +Shared state schema for the Sisyphus orchestrator graph. + +In LangGraph, state is the single source of truth that flows through every node. +This is analogous to Loki's per-agent session context, but unified into one typed +dictionary that the entire graph shares. + +Loki Concept Mapping: + - Loki session context → SisyphusState (TypedDict) + - Loki todo__init / todo__add → SisyphusState.todos list + - Loki agent__spawn outputs → SisyphusState.agent_outputs dict + - Loki intent classification → SisyphusState.intent field +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Annotated, Literal + +from langchain_core.messages import BaseMessage +from langgraph.graph.message import add_messages +from typing_extensions import TypedDict + +# --------------------------------------------------------------------------- +# Intent types — mirrors Loki's Sisyphus classification table +# --------------------------------------------------------------------------- +IntentType = Literal[ + "trivial", # Single file, known location, typo fix → handle yourself + "exploration", # "Find X", "Where is Y" → spawn explore + "implementation", # "Add feature", "Fix bug" → spawn coder + "architecture", # Design questions, oracle triggers → spawn oracle + "ambiguous", # Unclear scope → ask user +] + +# --------------------------------------------------------------------------- +# Todo item — mirrors Loki's built-in todo system +# --------------------------------------------------------------------------- +@dataclass +class TodoItem: + """A single task in the orchestrator's todo list.""" + id: int + task: str + done: bool = False + + +def _merge_todos(existing: list[TodoItem], new: list[TodoItem]) -> list[TodoItem]: + """ + Reducer for the todos field. + + LangGraph requires a reducer for any state field that can be written by + multiple nodes. This merges by id: if a todo with the same id already + exists, the incoming version wins (allows marking done). + """ + by_id = {t.id: t for t in existing} + for t in new: + by_id[t.id] = t + return list(by_id.values()) + + +# --------------------------------------------------------------------------- +# Core graph state +# --------------------------------------------------------------------------- +class SisyphusState(TypedDict): + """ + The shared state that flows through every node in the Sisyphus graph. + + Annotated fields use *reducers* — functions that merge concurrent writes. + Without reducers, parallel node outputs would overwrite each other. + """ + + # Conversation history — the `add_messages` reducer appends new messages + # instead of replacing the list. This is critical: every node adds its + # response here, and downstream nodes see the full history. + # + # Loki equivalent: each agent's chat session accumulates messages the same + # way, but messages are scoped per-agent. In LangGraph the shared message + # list IS the inter-agent communication channel. + messages: Annotated[list[BaseMessage], add_messages] + + # Classified intent for the current request + intent: IntentType + + # Which agent the supervisor routed to last + next_agent: str + + # Iteration counter — safety valve analogous to Loki's max_auto_continues + iteration_count: int + + # Todo list for multi-step tracking (mirrors Loki's todo__* tools) + todos: Annotated[list[TodoItem], _merge_todos] + + # Accumulated outputs from sub-agent nodes, keyed by agent name. + # The supervisor reads these to decide what to do next. + agent_outputs: dict[str, str] + + # Final synthesized answer to return to the user + final_output: str + + # The working directory / project path (mirrors Loki's project_dir variable) + project_dir: str diff --git a/examples/langchain-sisyphus/sisyphus_langchain/tools/__init__.py b/examples/langchain-sisyphus/sisyphus_langchain/tools/__init__.py new file mode 100644 index 0000000..9c1ffa6 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/tools/__init__.py @@ -0,0 +1 @@ +"""Tool definitions for Sisyphus agents.""" diff --git a/examples/langchain-sisyphus/sisyphus_langchain/tools/filesystem.py b/examples/langchain-sisyphus/sisyphus_langchain/tools/filesystem.py new file mode 100644 index 0000000..9a50cd4 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/tools/filesystem.py @@ -0,0 +1,175 @@ +""" +Filesystem tools for Sisyphus agents. + +These are the LangChain equivalents of Loki's global tools: + - fs_read.sh → read_file + - fs_grep.sh → search_content + - fs_glob.sh → search_files + - fs_ls.sh → list_directory + - fs_write.sh → write_file + - fs_patch.sh → (omitted — write_file covers full rewrites) + +Loki Concept Mapping: + Loki tools are bash scripts with @cmd annotations that Loki's compiler + turns into function-calling declarations. In LangChain, we use the @tool + decorator which serves the same purpose: it generates the JSON schema + that the LLM sees, and wraps the Python function for execution. +""" + +from __future__ import annotations + +import fnmatch +import os +import re +import subprocess + +from langchain_core.tools import tool + + +@tool +def read_file(path: str, offset: int = 1, limit: int = 200) -> str: + """Read a file's contents with optional line range. + + Args: + path: Path to the file (absolute or relative to cwd). + offset: 1-based line number to start from. + limit: Maximum number of lines to return. + """ + path = os.path.expanduser(path) + if not os.path.isfile(path): + return f"Error: file not found: {path}" + + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + except Exception as e: + return f"Error reading {path}: {e}" + + total = len(lines) + start = max(0, offset - 1) + end = min(total, start + limit) + selected = lines[start:end] + + result = f"File: {path} (lines {start + 1}-{end} of {total})\n\n" + for i, line in enumerate(selected, start=start + 1): + result += f"{i}: {line}" + + if end < total: + result += f"\n... truncated ({total} total lines)" + + return result + + +@tool +def write_file(path: str, content: str) -> str: + """Write complete contents to a file, creating parent directories as needed. + + Args: + path: Path for the file. + content: Complete file contents to write. + """ + path = os.path.expanduser(path) + os.makedirs(os.path.dirname(path) or ".", exist_ok=True) + try: + with open(path, "w", encoding="utf-8") as f: + f.write(content) + return f"Wrote: {path}" + except Exception as e: + return f"Error writing {path}: {e}" + + +@tool +def search_content(pattern: str, directory: str = ".", file_type: str = "") -> str: + """Search for a text/regex pattern in files under a directory. + + Args: + pattern: Text or regex pattern to search for. + directory: Root directory to search in. + file_type: Optional file extension filter (e.g. "py", "rs"). + """ + directory = os.path.expanduser(directory) + cmd = ["grep", "-rn"] + if file_type: + cmd += [f"--include=*.{file_type}"] + cmd += [pattern, directory] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + lines = result.stdout.strip().splitlines() + except Exception as e: + return f"Error: {e}" + + # Filter noise + noise = {"/.git/", "/node_modules/", "/target/", "/dist/", "/__pycache__/"} + filtered = [l for l in lines if not any(n in l for n in noise)][:30] + + if not filtered: + return "No matches found." + return "\n".join(filtered) + + +@tool +def search_files(pattern: str, directory: str = ".") -> str: + """Find files matching a glob pattern. + + Args: + pattern: Glob pattern (e.g. '*.py', 'config*', '*test*'). + directory: Directory to search in. + """ + directory = os.path.expanduser(directory) + noise = {".git", "node_modules", "target", "dist", "__pycache__"} + matches: list[str] = [] + + for root, dirs, files in os.walk(directory): + dirs[:] = [d for d in dirs if d not in noise] + for name in files: + if fnmatch.fnmatch(name, pattern): + matches.append(os.path.join(root, name)) + if len(matches) >= 25: + break + if len(matches) >= 25: + break + + if not matches: + return "No files found." + return "\n".join(matches) + + +@tool +def list_directory(path: str = ".", max_depth: int = 3) -> str: + """List directory tree structure. + + Args: + path: Directory to list. + max_depth: Maximum depth to recurse. + """ + path = os.path.expanduser(path) + if not os.path.isdir(path): + return f"Error: not a directory: {path}" + + noise = {".git", "node_modules", "target", "dist", "__pycache__", ".venv", "venv"} + lines: list[str] = [] + + def _walk(dir_path: str, prefix: str, depth: int) -> None: + if depth > max_depth: + return + try: + entries = sorted(os.listdir(dir_path)) + except PermissionError: + return + + dirs = [e for e in entries if os.path.isdir(os.path.join(dir_path, e)) and e not in noise] + files = [e for e in entries if os.path.isfile(os.path.join(dir_path, e))] + + for f in files[:20]: + lines.append(f"{prefix}{f}") + if len(files) > 20: + lines.append(f"{prefix}... ({len(files) - 20} more files)") + + for d in dirs: + lines.append(f"{prefix}{d}/") + _walk(os.path.join(dir_path, d), prefix + " ", depth + 1) + + lines.append(f"{os.path.basename(path) or path}/") + _walk(path, " ", 1) + return "\n".join(lines[:200]) diff --git a/examples/langchain-sisyphus/sisyphus_langchain/tools/project.py b/examples/langchain-sisyphus/sisyphus_langchain/tools/project.py new file mode 100644 index 0000000..763ef32 --- /dev/null +++ b/examples/langchain-sisyphus/sisyphus_langchain/tools/project.py @@ -0,0 +1,142 @@ +""" +Project detection and build/test tools. + +These mirror Loki's .shared/utils.sh detect_project() heuristic and the +sisyphus/coder tools.sh run_build / run_tests / verify_build commands. + +Loki Concept Mapping: + Loki uses a heuristic cascade: check for Cargo.toml → go.mod → package.json + etc., then falls back to an LLM call for unknown projects. We replicate the + heuristic portion here. The LLM fallback is omitted since the agents + themselves can reason about unknown project types. +""" + +from __future__ import annotations + +import json +import os +import subprocess + +from langchain_core.tools import tool + +# --------------------------------------------------------------------------- +# Project detection (mirrors _detect_heuristic in utils.sh) +# --------------------------------------------------------------------------- +_HEURISTICS: list[tuple[str, dict[str, str]]] = [ + ("Cargo.toml", {"type": "rust", "build": "cargo build", "test": "cargo test", "check": "cargo check"}), + ("go.mod", {"type": "go", "build": "go build ./...", "test": "go test ./...", "check": "go vet ./..."}), + ("package.json", {"type": "nodejs", "build": "npm run build", "test": "npm test", "check": "npm run lint"}), + ("pyproject.toml", {"type": "python", "build": "", "test": "pytest", "check": "ruff check ."}), + ("pom.xml", {"type": "java", "build": "mvn compile", "test": "mvn test", "check": "mvn verify"}), + ("Makefile", {"type": "make", "build": "make build", "test": "make test", "check": "make lint"}), +] + + +def detect_project(directory: str) -> dict[str, str]: + """Detect project type and return build/test commands.""" + for marker, info in _HEURISTICS: + if os.path.exists(os.path.join(directory, marker)): + return info + return {"type": "unknown", "build": "", "test": "", "check": ""} + + +@tool +def get_project_info(directory: str = ".") -> str: + """Detect the project type and show structure overview. + + Args: + directory: Project root directory. + """ + directory = os.path.expanduser(directory) + info = detect_project(directory) + result = f"Project: {os.path.abspath(directory)}\n" + result += f"Type: {info['type']}\n" + result += f"Build: {info['build'] or '(none)'}\n" + result += f"Test: {info['test'] or '(none)'}\n" + result += f"Check: {info['check'] or '(none)'}\n" + return result + + +def _run_project_command(directory: str, command_key: str) -> str: + """Run a detected project command (build/test/check).""" + directory = os.path.expanduser(directory) + info = detect_project(directory) + cmd = info.get(command_key, "") + + if not cmd: + return f"No {command_key} command detected for this project." + + try: + result = subprocess.run( + cmd, + shell=True, + capture_output=True, + text=True, + cwd=directory, + timeout=300, + ) + output = result.stdout + result.stderr + status = "SUCCESS" if result.returncode == 0 else f"FAILED (exit {result.returncode})" + return f"Running: {cmd}\n\n{output}\n\n{command_key.upper()}: {status}" + except subprocess.TimeoutExpired: + return f"{command_key.upper()}: TIMEOUT after 300s" + except Exception as e: + return f"{command_key.upper()}: ERROR — {e}" + + +@tool +def run_build(directory: str = ".") -> str: + """Run the project's build command. + + Args: + directory: Project root directory. + """ + return _run_project_command(directory, "build") + + +@tool +def run_tests(directory: str = ".") -> str: + """Run the project's test suite. + + Args: + directory: Project root directory. + """ + return _run_project_command(directory, "test") + + +@tool +def verify_build(directory: str = ".") -> str: + """Run the project's check/lint command to verify correctness. + + Args: + directory: Project root directory. + """ + return _run_project_command(directory, "check") + + +@tool +def execute_command(command: str, directory: str = ".") -> str: + """Execute a shell command and return its output. + + Args: + command: Shell command to execute. + directory: Working directory. + """ + directory = os.path.expanduser(directory) + try: + result = subprocess.run( + command, + shell=True, + capture_output=True, + text=True, + cwd=directory, + timeout=120, + ) + output = (result.stdout + result.stderr).strip() + if result.returncode != 0: + return f"Command failed (exit {result.returncode}):\n{output}" + return output or "(no output)" + except subprocess.TimeoutExpired: + return "Command timed out after 120s." + except Exception as e: + return f"Error: {e}"