loki/examples/langchain-sisyphus/sisyphus_langchain/agents/explore.py

"""
Explore agent node — the read-only codebase researcher.

Loki equivalent: assets/agents/explore/config.yaml + tools.sh

In Loki, the explore agent is spawned via `agent__spawn --agent explore --prompt "..."`
and runs as an isolated subprocess with its own session.  It ends with
"EXPLORE_COMPLETE" so the parent knows it's finished.

In LangGraph, the explore agent is a *node* in the graph.  The supervisor routes
to it via `Command(goto="explore")`.  It reads the latest message (the supervisor's
delegation prompt), calls the LLM with read-only tools, and writes its findings
back to the shared message list.  The graph edge then returns control to the
supervisor.

Key differences from Loki:
    - No isolated session — shares the graph's message list (but has its own
      system prompt and tool set, just like Loki's per-agent config).
    - No "EXPLORE_COMPLETE" sentinel — the graph edge handles control flow.
    - No output summarization — LangGraph's state handles context management.
"""

from __future__ import annotations

from langchain_core.messages import SystemMessage
from langchain_openai import ChatOpenAI

from sisyphus_langchain.state import SisyphusState
from sisyphus_langchain.tools.filesystem import (
    list_directory,
    read_file,
    search_content,
    search_files,
)

# ---------------------------------------------------------------------------
# System prompt — faithfully mirrors explore/config.yaml
# ---------------------------------------------------------------------------
EXPLORE_SYSTEM_PROMPT = """\
You are a codebase explorer. Your job: Search, find, report. Nothing else.

## Your Mission

Given a search task, you:
1. Search for relevant files and patterns
2. Read key files to understand structure
3. Report findings concisely

## Strategy

1. **Find first, read second** — Never read a file without knowing why.
2. **Use search_content to locate** — find exactly where things are defined.
3. **Use search_files to discover** — find files by name pattern.
4. **Read targeted sections** — use offset and limit to read only relevant lines.
5. **Never read entire large files** — if a file is 500+ lines, read the relevant section only.

## Output Format

Always end your response with a structured findings summary:

FINDINGS:
- [Key finding 1]
- [Key finding 2]
- Relevant files: [list of paths]

## Rules

1. Be fast — don't read every file, read representative ones.
2. Be focused — answer the specific question asked.
3. Be concise — report findings, not your process.
4. Never modify files — you are read-only.
5. Limit reads — max 5 file reads per exploration.
"""

# Read-only tools — mirrors explore's tool set (no write_file, no execute_command)
EXPLORE_TOOLS = [read_file, search_content, search_files, list_directory]


def create_explore_node(model_name: str = "gpt-4o-mini", temperature: float = 0.1):
    """
    Factory that returns an explore node function bound to a specific model.

    In Loki, the model is set per-agent in config.yaml.  Here we parameterize it
    so you can use a cheap model for exploration (cost optimization).

    Args:
        model_name: OpenAI model identifier.
        temperature: LLM temperature (Loki explore uses 0.1).
    """
    llm = ChatOpenAI(model=model_name, temperature=temperature).bind_tools(EXPLORE_TOOLS)

    def explore_node(state: SisyphusState) -> dict:
        """
        LangGraph node: run the explore agent.

        Reads the conversation history, applies the explore system prompt,
        invokes the LLM with read-only tools, and returns the response.
        """
        response = llm.invoke(
            [SystemMessage(content=EXPLORE_SYSTEM_PROMPT)] + state["messages"]
        )
        return {
            "messages": [response],
            "agent_outputs": {
                **state.get("agent_outputs", {}),
                "explore": response.content,
            },
        }

    return explore_node