Compare commits
92 Commits
1ee6b4d7c7
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
| e687d78931 | |||
| 0c2e4df647 | |||
| 6221875f64 | |||
| 895b9c27db | |||
| e661ca2eda | |||
| 7066edd904 | |||
| 61bdf29bea | |||
| ef39c7d9ff | |||
| e9e46158e7 | |||
| 34dc4b0dce | |||
| cd226577e7 | |||
| b5fc633454 | |||
| 484b18ef16 | |||
| 7333046cfe | |||
| 815f0e5c39 | |||
| dacccbfcf7 | |||
| 5370637274 | |||
| e6da252a5a | |||
| 4aaff21f45 | |||
| 2678afe02b | |||
| 558b764db8 | |||
| 0bb312a85c | |||
| d81d233527 | |||
| 597f823bdf | |||
| 81c037515e | |||
| 3c7d19da07 | |||
| 4536d00067 | |||
| 98d16d9a56 | |||
| 26de81e84e | |||
| 20c28b55d5 | |||
| 7d6f1dda26 | |||
| 9a061944ae | |||
| 1f50af0974 | |||
| bdacf9fc78 | |||
| a9f2a5edc2 | |||
| 2df8b1a541 | |||
| de055bf8a4 | |||
| 8fb0eece4b | |||
| ba03c3037d | |||
| afa0e4af67 | |||
| 5a9a00bc6f | |||
| e7bb668ac7 | |||
| 04498b96ec | |||
| eb2843d38a | |||
| 696ce03ee4 | |||
| a3d67bfbf7 | |||
| 5bd0766a60 | |||
| 35e1b14843 | |||
| 503c9b4699 | |||
| 7a8b09542d | |||
| da5cd21c1c | |||
| 27fcb1fc15 | |||
| e292c414c5 | |||
| 8a2f18204f | |||
| c70ac98223 | |||
| 249d1fc881 | |||
| 3f4fd91b3f | |||
| 48c52b5829 | |||
| f58f751c59 | |||
| fc7fdc98b4 | |||
| f4d7d0fb73 | |||
| 4b38f53488 | |||
| 186422ff58 | |||
| 9bc4f8b621 | |||
| 84497d3d65 | |||
| 3ea9116a23 | |||
| bfcd73c32a | |||
| 3cd3ba55ff | |||
| 3535edba79 | |||
| bf0343e245 | |||
| b001ae4c18 | |||
| 9ce088a530 | |||
| 16f3f71188 | |||
| 0af5fa02f9 | |||
| d6a0676264 | |||
| b582bab17c | |||
| a8732c63d6 | |||
| 389d0b768f | |||
| 70a251a7e2 | |||
| 462f136596 | |||
| bf9d7d750e | |||
| 540ec648c9 | |||
| e69352ee2d | |||
| ee4e3bc13f | |||
| a576961bd6 | |||
| 59c7fc1276 | |||
| bcf512fcfc | |||
| 195401c496 | |||
| 34d8d20ec6 | |||
| 08ba6f0446 | |||
| 26984892af | |||
| 526a426073 |
@@ -10,7 +10,7 @@ repository = "https://github.com/Dark-Alex-17/loki"
|
||||
categories = ["command-line-utilities"]
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
rust-version = "1.89.0"
|
||||
rust-version = "1.95.0"
|
||||
exclude = [".github", "CONTRIBUTING.md"]
|
||||
|
||||
[dependencies]
|
||||
@@ -22,7 +22,7 @@ dunce = "1.0.5"
|
||||
futures-util = "0.3.29"
|
||||
inquire = "0.9.4"
|
||||
is-terminal = "0.4.9"
|
||||
reedline = "0.46.0"
|
||||
reedline = "0.47.0"
|
||||
serde = { version = "1.0.152", features = ["derive"] }
|
||||
serde_json = { version = "1.0.93", features = ["preserve_order"] }
|
||||
serde_yaml = "0.9.17"
|
||||
@@ -34,10 +34,6 @@ tokio = { version = "1.34.0", features = [
|
||||
"rt-multi-thread",
|
||||
"full",
|
||||
] }
|
||||
tokio-graceful = "0.2.2"
|
||||
tokio-stream = { version = "0.1.15", default-features = false, features = [
|
||||
"sync",
|
||||
] }
|
||||
crossterm = "0.29.0"
|
||||
chrono = "0.4.23"
|
||||
bincode = { version = "2.0.0", features = [
|
||||
@@ -51,7 +47,7 @@ nu-ansi-term = "0.50.0"
|
||||
async-trait = "0.1.74"
|
||||
textwrap = "0.16.0"
|
||||
ansi_colours = "1.2.2"
|
||||
reqwest-eventsource = "0.6.0"
|
||||
eventsource-stream = "0.2.3"
|
||||
log = "0.4.28"
|
||||
log4rs = { version = "1.4.0", features = ["file_appender"] }
|
||||
shell-words = "1.1.0"
|
||||
@@ -59,20 +55,14 @@ sha2 = "0.10.8"
|
||||
unicode-width = "0.2.0"
|
||||
async-recursion = "1.1.1"
|
||||
http = "1.1.0"
|
||||
http-body-util = "0.1"
|
||||
hyper = { version = "1.0", features = ["full"] }
|
||||
hyper-util = { version = "0.1", features = ["server-auto", "client-legacy"] }
|
||||
time = { version = "0.3.36", features = ["macros"] }
|
||||
indexmap = { version = "2.2.6", features = ["serde"] }
|
||||
hmac = "0.12.1"
|
||||
aws-smithy-eventstream = "0.60.4"
|
||||
urlencoding = "2.1.3"
|
||||
unicode-segmentation = "1.11.0"
|
||||
json-patch = { version = "4.0.0", default-features = false }
|
||||
bitflags = "2.5.0"
|
||||
path-absolutize = "3.1.1"
|
||||
hnsw_rs = "0.3.0"
|
||||
rayon = "1.10.0"
|
||||
uuid = { version = "1.9.1", features = ["v4"] }
|
||||
scraper = { version = "0.23.1", default-features = false, features = [
|
||||
"deterministic",
|
||||
@@ -97,7 +87,6 @@ rmcp = { version = "1.5.0", features = [
|
||||
] }
|
||||
num_cpus = "1.17.0"
|
||||
tree-sitter = "0.26.8"
|
||||
tree-sitter-language = "0.1"
|
||||
tree-sitter-python = "0.25.0"
|
||||
tree-sitter-typescript = "0.23"
|
||||
colored = "3.0.0"
|
||||
@@ -107,15 +96,24 @@ clap_complete_nushell = "4.5.9"
|
||||
open = "5"
|
||||
rand = { version = "0.10.0", features = ["default"] }
|
||||
url = "2.5.8"
|
||||
self_update = { version = "0.44", default-features = false, features = [
|
||||
"reqwest",
|
||||
"rustls",
|
||||
"archive-tar",
|
||||
"compression-flate2",
|
||||
"archive-zip",
|
||||
"compression-zip-deflate",
|
||||
] }
|
||||
|
||||
[dependencies.reqwest]
|
||||
version = "0.12.0"
|
||||
version = "0.13.3"
|
||||
features = [
|
||||
"json",
|
||||
"multipart",
|
||||
"stream",
|
||||
"form",
|
||||
"socks",
|
||||
"rustls-tls",
|
||||
"rustls-tls-native-roots",
|
||||
"rustls",
|
||||
]
|
||||
default-features = false
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# Loki: All-in-one, batteries-included LLM CLI Tool
|
||||
|
||||

|
||||

|
||||
[](https://crates.io/crates/loki-ai)
|
||||

|
||||

|
||||
@@ -11,54 +10,47 @@ Loki is an all-in-one, batteries-included, LLM CLI tool featuring Shell Assistan
|
||||
Agents, and More.
|
||||
|
||||
It is designed to include a number of useful agents, roles, macros, and more so users can get up and running with Loki
|
||||
in as little time as possible.
|
||||
in as little time as possible. You can also install entire bundles of agents, roles, macros, tools, and MCP servers from
|
||||
any git repository — see [Sharing Configurations](#sharing-configurations).
|
||||
|
||||

|
||||

|
||||
|
||||
Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration guide](./docs/AICHAT-MIGRATION.md) to get started.
|
||||
Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration guide](https://github.com/Dark-Alex-17/loki/wiki/AIChat-Migration) to get started.
|
||||
|
||||
## Quick Links
|
||||
* [AIChat Migration Guide](./docs/AICHAT-MIGRATION.md): Coming from AIChat? Follow the migration guide to get started.
|
||||
* [AIChat Migration Guide](https://github.com/Dark-Alex-17/loki/wiki/AIChat-Migration): Coming from AIChat? Follow the migration guide to get started.
|
||||
* [Installation](#install): Install Loki
|
||||
* [Getting Started](#getting-started): Get started with Loki by doing first-run setup steps.
|
||||
* [REPL](./docs/REPL.md): Interactive Read-Eval-Print Loop for conversational interactions with LLMs and Loki.
|
||||
* [Custom REPL Prompt](./docs/REPL-PROMPT.md): Customize the REPL prompt to provide useful contextual information.
|
||||
* [Vault](./docs/VAULT.md): Securely store and manage sensitive information such as API keys and credentials.
|
||||
* [Shell Integrations](./docs/SHELL-INTEGRATIONS.md): Seamlessly integrate Loki with your shell environment for enhanced command-line assistance.
|
||||
* [Function Calling](./docs/function-calling/TOOLS.md#Tools): Leverage function calling capabilities to extend Loki's functionality with custom tools
|
||||
* [Creating Custom Tools](./docs/function-calling/CUSTOM-TOOLS.md): You can create your own custom tools to enhance Loki's capabilities.
|
||||
* [Create Custom Python Tools](./docs/function-calling/CUSTOM-TOOLS.md#custom-python-based-tools)
|
||||
* [Create Custom TypeScript Tools](./docs/function-calling/CUSTOM-TOOLS.md#custom-typescript-based-tools)
|
||||
* [Create Custom Bash Tools](./docs/function-calling/CUSTOM-BASH-TOOLS.md)
|
||||
* [Bash Prompt Utilities](./docs/function-calling/BASH-PROMPT-HELPERS.md)
|
||||
* [First-Class MCP Server Support](./docs/function-calling/MCP-SERVERS.md): Easily connect and interact with MCP servers for advanced functionality.
|
||||
* [Macros](./docs/MACROS.md): Automate repetitive tasks and workflows with Loki "scripts" (macros).
|
||||
* [RAG](./docs/RAG.md): Retrieval-Augmented Generation for enhanced information retrieval and generation.
|
||||
* [Sessions](/docs/SESSIONS.md): Manage and persist conversational contexts and settings across multiple interactions.
|
||||
* [Roles](./docs/ROLES.md): Customize model behavior for specific tasks or domains.
|
||||
* [Agents](/docs/AGENTS.md): Leverage AI agents to perform complex tasks and workflows, including sub-agent spawning, teammate messaging, and user interaction tools.
|
||||
* [Todo System](./docs/TODO-SYSTEM.md): Built-in task tracking for improved agent reliability with smaller models.
|
||||
* [Environment Variables](./docs/ENVIRONMENT-VARIABLES.md): Override and customize your Loki configuration at runtime with environment variables.
|
||||
* [Client Configurations](./docs/clients/CLIENTS.md): Configuration instructions for various LLM providers.
|
||||
* [Authentication (API Key & OAuth)](./docs/clients/CLIENTS.md#authentication): Authenticate with API keys or OAuth for subscription-based access.
|
||||
* [Patching API Requests](./docs/clients/PATCHES.md): Learn how to patch API requests for advanced customization.
|
||||
* [Custom Themes](./docs/THEMES.md): Change the look and feel of Loki to your preferences with custom themes.
|
||||
* [Sharing Configurations](https://github.com/Dark-Alex-17/loki/wiki/Sharing-Configurations): Install bundles of agents, roles, macros, tools, and MCP servers from any git repo, and share your own.
|
||||
* [REPL](https://github.com/Dark-Alex-17/loki/wiki/REPL): Interactive Read-Eval-Print Loop for conversational interactions with LLMs and Loki.
|
||||
* [Custom REPL Prompt](https://github.com/Dark-Alex-17/loki/wiki/REPL-Prompt): Customize the REPL prompt to provide useful contextual information.
|
||||
* [Vault](https://github.com/Dark-Alex-17/loki/wiki/Vault): Securely store and manage sensitive information such as API keys and credentials.
|
||||
* [Shell Integrations](https://github.com/Dark-Alex-17/loki/wiki/Shell-Integrations): Seamlessly integrate Loki with your shell environment for enhanced command-line assistance.
|
||||
* [Function Calling](https://github.com/Dark-Alex-17/loki/wiki/Tools): Leverage function calling capabilities to extend Loki's functionality with custom tools
|
||||
* [Creating Custom Tools](https://github.com/Dark-Alex-17/loki/wiki/Custom-Tools): You can create your own custom tools to enhance Loki's capabilities.
|
||||
* [Create Custom Python Tools](https://github.com/Dark-Alex-17/loki/wiki/Custom-Tools#custom-python-based-tools)
|
||||
* [Create Custom TypeScript Tools](https://github.com/Dark-Alex-17/loki/wiki/Custom-Tools#custom-typescript-based-tools)
|
||||
* [Create Custom Bash Tools](https://github.com/Dark-Alex-17/loki/wiki/Custom-Bash-Tools)
|
||||
* [Bash Prompt Utilities](https://github.com/Dark-Alex-17/loki/wiki/Bash-Prompt-Helpers)
|
||||
* [First-Class MCP Server Support](https://github.com/Dark-Alex-17/loki/wiki/MCP-Servers): Easily connect and interact with MCP servers for advanced functionality.
|
||||
* [Macros](https://github.com/Dark-Alex-17/loki/wiki/Macros): Automate repetitive tasks and workflows with Loki "scripts" (macros).
|
||||
* [RAG](https://github.com/Dark-Alex-17/loki/wiki/RAG): Retrieval-Augmented Generation for enhanced information retrieval and generation.
|
||||
* [Sessions](https://github.com/Dark-Alex-17/loki/wiki/Sessions): Manage and persist conversational contexts and settings across multiple interactions.
|
||||
* [Roles](https://github.com/Dark-Alex-17/loki/wiki/Roles): Customize model behavior for specific tasks or domains.
|
||||
* [Agents](https://github.com/Dark-Alex-17/loki/wiki/Agents): Leverage AI agents to perform complex tasks and workflows, including sub-agent spawning, teammate messaging, and user interaction tools.
|
||||
* [Graph Agents](https://github.com/Dark-Alex-17/loki/wiki/Graph-Agents): Define an agent as a declarative, YAML-driven workflow. A directed graph of typed nodes (LLM calls, scripts, approvals, user input, RAG retrieval, sub-agent spawns).
|
||||
* [Todo System](https://github.com/Dark-Alex-17/loki/wiki/TODO-System): Built-in task tracking for improved LLM reliability with smaller models.
|
||||
* [Environment Variables](https://github.com/Dark-Alex-17/loki/wiki/Environment-Variables): Override and customize your Loki configuration at runtime with environment variables.
|
||||
* [Client Configurations](https://github.com/Dark-Alex-17/loki/wiki/Clients): Configuration instructions for various LLM providers.
|
||||
* [Authentication (API Key & OAuth)](https://github.com/Dark-Alex-17/loki/wiki/Clients#authentication): Authenticate with API keys or OAuth for subscription-based access.
|
||||
* [Patching API Requests](https://github.com/Dark-Alex-17/loki/wiki/Patches): Learn how to patch API requests for advanced customization.
|
||||
* [Custom Themes](https://github.com/Dark-Alex-17/loki/wiki/Themes): Change the look and feel of Loki to your preferences with custom themes.
|
||||
* [History](#history): A history of how Loki came to be.
|
||||
|
||||
## Prerequisites
|
||||
Loki requires the following tools to be installed on your system:
|
||||
* [jq](https://github.com/jqlang/jq)
|
||||
* `brew install jq`
|
||||
* [jira (optional)](https://github.com/ankitpokhrel/jira-cli/wiki/Installation) (For the `query_jira_issues` tool)
|
||||
* `brew tap ankitpokhrel/jira-cli && brew install jira-cli`
|
||||
* You'll need to [create a JIRA API token](https://id.atlassian.com/manage-profile/security/api-tokens) for authentication
|
||||
* Then, save it as an environment variable to your shell profile:
|
||||
```sh
|
||||
# ~/.bashrc or ~/.zshrc
|
||||
export JIRA_API_TOKEN="your_jira_api_token_here"
|
||||
```
|
||||
* Then run `jira init`, select installation type as `cloud`, and provide the required details to generate a config
|
||||
file for the Jira CLI.
|
||||
* [usql](https://github.com/xo/usql) (For the `sql` agent)
|
||||
* `brew install xo/xo/usql`
|
||||
* [docker](https://docs.docker.com/engine/install/)
|
||||
@@ -66,7 +58,7 @@ Loki requires the following tools to be installed on your system:
|
||||
* `curl -LsSf https://astral.sh/uv/install.sh | sh`
|
||||
|
||||
These tools are used to provide various functionalities within Loki, such as document processing, JSON manipulation,
|
||||
interaction with Jira, and they are used within agents and tools.
|
||||
etc., and they are used within agents and tools.
|
||||
|
||||
## Install
|
||||
|
||||
@@ -138,6 +130,29 @@ To use a binary from the releases page on Linux/MacOS, do the following:
|
||||
3. Extract the binary with `tar -C /usr/local/bin -xzf loki-<arch>.tar.gz` (Note: This may require `sudo`)
|
||||
4. Now you can run `loki`!
|
||||
|
||||
## Updating
|
||||
Loki can update itself in place to the latest GitHub release. Run `loki --update`
|
||||
for the newest release, or `loki --update v0.4.0` for a specific version:
|
||||
|
||||
```shell
|
||||
loki --update
|
||||
loki --update v0.4.0
|
||||
```
|
||||
|
||||
The same is available from within the REPL via `.update` and `.update v0.4.0`.
|
||||
|
||||
If Loki was installed with a package manager, prefer that package manager so its
|
||||
records stay in sync with the binary on disk; i.e. `brew upgrade loki` for Homebrew,
|
||||
or `cargo install --locked loki-ai` for Cargo.
|
||||
|
||||
When Loki detects a package-manager install it prints a warning and asks for
|
||||
confirmation. In a non-interactive shell (no TTY), pass `--force` to update
|
||||
anyway:
|
||||
|
||||
```shell
|
||||
loki --update --force
|
||||
```
|
||||
|
||||
## Getting Started
|
||||
After installation, you can generate the configuration files and directories by simply running:
|
||||
|
||||
@@ -154,23 +169,22 @@ loki --list-secrets
|
||||
|
||||
### Authentication
|
||||
Each client in your configuration needs authentication (with a few exceptions; e.g. ollama). Most clients use an API key
|
||||
(set via `api_key` in the config or through the [vault](./docs/VAULT.md)). For providers that support OAuth (e.g. Claude Pro/Max
|
||||
(set via `api_key` in the config or through the [vault](https://github.com/Dark-Alex-17/loki/wiki/Vault)). For providers that support OAuth (e.g. Claude Pro/Max
|
||||
subscribers, Google Gemini), you can authenticate with your existing subscription instead:
|
||||
|
||||
```yaml
|
||||
# In your config.yaml
|
||||
clients:
|
||||
- type: claude
|
||||
name: my-claude-oauth
|
||||
auth: oauth # Indicate you want to authenticate with OAuth instead of an API key
|
||||
```
|
||||
|
||||
```sh
|
||||
loki --authenticate my-claude-oauth
|
||||
loki --authenticate claude
|
||||
# Or via the REPL: .authenticate
|
||||
```
|
||||
|
||||
For full details, see the [authentication documentation](./docs/clients/CLIENTS.md#authentication).
|
||||
For full details, see the [authentication documentation](https://github.com/Dark-Alex-17/loki/wiki/Clients#authentication).
|
||||
|
||||
### Tab-Completions
|
||||
You can also enable tab completions to make using Loki easier. To do so, add the following to your shell profile:
|
||||
@@ -247,7 +261,7 @@ shown below:
|
||||
|
||||
| Setting | Description |
|
||||
|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `repl_prelude` | This setting lets you specify a default `session` or `role` to use when starting Loki in [REPL](./docs/REPL.md) mode. <br>Values can be <ul><li>`role:<name>` to define a role</li><li>`session:<name>` to define a session</li><li>`<session>:<role>` to define both a session and a role to use</li></ul> |
|
||||
| `repl_prelude` | This setting lets you specify a default `session` or `role` to use when starting Loki in [REPL](https://github.com/Dark-Alex-17/loki/wiki/REPL) mode. <br>Values can be <ul><li>`role:<name>` to define a role</li><li>`session:<name>` to define a session</li><li>`<session>:<role>` to define both a session and a role to use</li></ul> |
|
||||
| `cmd_prelude` | This setting lets you specify a default `session` or `role` to use when running one-off queries in Loki via the CLI. <br>Values can be <ul><li>`role:<name>` to define a role</li><li>`session:<name>` to define a session</li><li>`<session>:<role>` to define both a session and a role to use</li></ul> |
|
||||
| `agent_session` | This setting is used to specify a default session that all agents should start into, unless otherwise specified in the agent configuration. (e.g. `temp`, `default`) |
|
||||
|
||||
|
||||
@@ -1,40 +1,82 @@
|
||||
# Coder
|
||||
|
||||
An AI agent that assists you with your coding tasks.
|
||||
A graph-based implementation agent. Plans, implements, and runs build +
|
||||
tests in a bounded fix-loop until verified. Designed to be delegated to by
|
||||
the **[Sisyphus](../sisyphus/README.md)** agent.
|
||||
|
||||
This agent is designed to be delegated to by the **[Sisyphus](../sisyphus/README.md)** agent to implement code specifications. Sisyphus
|
||||
acts as the coordinator/architect, while Coder handles the implementation details.
|
||||
Coder is a [graph agent](https://github.com/Dark-Alex-17/loki/wiki/Graph-Agents): its workflow is
|
||||
defined declaratively in `graph.yaml`, with verification and the
|
||||
implement-fix loop enforced as graph edges rather than prose.
|
||||
|
||||
## Features
|
||||
## Workflow
|
||||
|
||||
- 🏗️ Intelligent project structure creation and management
|
||||
- 🖼️ Convert screenshots into clean, functional code
|
||||
- 📁 Comprehensive file system operations (create folders, files, read/write files)
|
||||
- 🧐 Advanced code analysis and improvement suggestions
|
||||
- 📊 Precise diff-based file editing for controlled code modifications
|
||||
```
|
||||
analyze_request (llm + output_schema) plan + complexity extraction
|
||||
↓
|
||||
route_complexity (script) opt-out approval gate (complexity ≥ 7)
|
||||
↓
|
||||
gate_approval (approval, optional)
|
||||
↓
|
||||
implement (llm + fs tools) actual file edits
|
||||
↓
|
||||
verify_build (script)
|
||||
↓
|
||||
verify_tests (script)
|
||||
↓
|
||||
fix_loop_gate (script) back-edge to implement (bounded)
|
||||
↓
|
||||
end_success / end_rejected / end_failure
|
||||
```
|
||||
|
||||
It can also be used as a standalone tool for direct coding assistance.
|
||||
End nodes emit one of three sentinel outcomes for the caller:
|
||||
|
||||
## Pro-Tip: Use an IDE MCP Server for Improved Performance
|
||||
Many modern IDEs now include MCP servers that let LLMs perform operations within the IDE itself and use IDE tools. Using
|
||||
an IDE's MCP server dramatically improves the performance of coding agents. So if you have an IDE, try adding that MCP
|
||||
server to your config (see the [MCP Server docs](../../../docs/function-calling/MCP-SERVERS.md) to see how to configure
|
||||
them), and modify the agent definition to look like this:
|
||||
- `CODER_COMPLETE` — build and tests passed.
|
||||
- `CODER_REJECTED` — user rejected the plan at the approval gate.
|
||||
- `CODER_FAILED` — fix-loop exhausted; build/tests still failing.
|
||||
|
||||
## Tuning
|
||||
|
||||
The agent's `project_dir` is exposed via the standard `variables:` block,
|
||||
so it accepts the runtime override flag:
|
||||
|
||||
```sh
|
||||
# Invoke from inside the project (project_dir defaults to ".")
|
||||
cd /path/to/your/project
|
||||
loki -a coder "Add a foo() function..."
|
||||
|
||||
# Or invoke from anywhere with an explicit override
|
||||
loki -a coder --agent-variable project_dir /path/to/your/project "Add..."
|
||||
```
|
||||
|
||||
`graph.yaml` `initial_state` exposes:
|
||||
|
||||
- `max_fix_attempts` (default `3`) — fix-loop budget before `end_failure`.
|
||||
|
||||
Environment overrides honored by the script nodes:
|
||||
|
||||
- `BUILD_CMD` — skip project-type detection for the build/check command.
|
||||
- `TEST_CMD` — skip detection for tests.
|
||||
- `CODER_AUTOAPPROVE=1` — bypass the approval gate (for non-interactive runs
|
||||
where complexity might trip the gate).
|
||||
|
||||
## Pro-Tip: IDE MCP Server
|
||||
|
||||
Modern IDEs (JetBrains, VS Code, Cursor, Zed, etc.) expose MCP servers
|
||||
that let LLMs use IDE tools directly. To wire one in, edit `graph.yaml`:
|
||||
|
||||
```yaml
|
||||
# ...
|
||||
|
||||
mcp_servers:
|
||||
- jetbrains # The name of your configured IDE MCP server
|
||||
- your-ide-mcp-server
|
||||
|
||||
global_tools:
|
||||
# Keep useful read-only tools for reading files in other non-project directories
|
||||
# Keep read-only fs tools for files outside the IDE project
|
||||
- fs_read.sh
|
||||
- fs_grep.sh
|
||||
- fs_glob.sh
|
||||
# - fs_write.sh
|
||||
# - fs_patch.sh
|
||||
- execute_command.sh
|
||||
```
|
||||
|
||||
# ...
|
||||
```
|
||||
Then add the MCP server's write/patch tools to the `implement` node's
|
||||
`tools:` whitelist.
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
name: coder
|
||||
description: Implementation agent - writes code, follows patterns, verifies with builds
|
||||
version: 1.0.0
|
||||
temperature: 0.1
|
||||
|
||||
auto_continue: true
|
||||
max_auto_continues: 15
|
||||
inject_todo_instructions: true
|
||||
|
||||
variables:
|
||||
- name: project_dir
|
||||
description: Project directory to work in
|
||||
default: '.'
|
||||
- name: auto_confirm
|
||||
description: Auto-confirm command execution
|
||||
default: '1'
|
||||
|
||||
global_tools:
|
||||
- fs_read.sh
|
||||
- fs_grep.sh
|
||||
- fs_glob.sh
|
||||
- fs_write.sh
|
||||
- fs_patch.sh
|
||||
- execute_command.sh
|
||||
|
||||
instructions: |
|
||||
You are a senior engineer. You write code that works on the first try.
|
||||
|
||||
## Your Mission
|
||||
|
||||
Given an implementation task:
|
||||
1. Check for orchestrator context first (see below)
|
||||
2. Fill gaps only. Read files NOT already covered in context
|
||||
3. Write the code (using tools, NOT chat output)
|
||||
4. Verify it compiles/builds
|
||||
5. Signal completion with a summary
|
||||
|
||||
## Using Orchestrator Context (IMPORTANT)
|
||||
|
||||
When spawned by sisyphus, your prompt will often contain a `<context>` block
|
||||
with prior findings: file paths, code patterns, and conventions discovered by
|
||||
explore agents.
|
||||
|
||||
**If context is provided:**
|
||||
1. Use it as your primary reference. Don't re-read files already summarized
|
||||
2. Follow the code patterns shown. Snippets in context ARE the style guide
|
||||
3. Read the referenced files ONLY IF you need more detail (e.g. full function
|
||||
signature, import list, or adjacent code not included in the snippet)
|
||||
4. If context includes a "Conventions" section, follow it exactly
|
||||
|
||||
**If context is NOT provided or is too vague to act on:**
|
||||
Fall back to self-exploration: grep for similar files, read 1-2 examples,
|
||||
match their style.
|
||||
|
||||
**Never ignore provided context.** It represents work already done upstream.
|
||||
|
||||
## Todo System
|
||||
|
||||
For multi-file changes:
|
||||
1. `todo__init` with the implementation goal
|
||||
2. `todo__add` for each file to create/modify
|
||||
3. Implement each, calling `todo__done` immediately after
|
||||
|
||||
## Writing Code
|
||||
|
||||
**CRITICAL**: Write code using `write_file` tool, NEVER paste code in chat.
|
||||
|
||||
Correct:
|
||||
```
|
||||
write_file --path "src/user.rs" --content "pub struct User { ... }"
|
||||
```
|
||||
|
||||
Wrong:
|
||||
```
|
||||
Here's the implementation:
|
||||
\`\`\`rust
|
||||
pub struct User { ... }
|
||||
\`\`\`
|
||||
```
|
||||
|
||||
## File Reading Strategy (IMPORTANT - minimize token usage)
|
||||
|
||||
1. **Use grep to find relevant code** - `fs_grep --pattern "fn handle_request" --include "*.rs"` finds where things are
|
||||
2. **Read only what you need** - `fs_read --path "src/main.rs" --offset 50 --limit 30` reads lines 50-79
|
||||
3. **Never cat entire large files** - If 500+ lines, read the relevant section after grepping for it
|
||||
4. **Use glob to find files** - `fs_glob --pattern "*.rs" --path src/` discovers files by name
|
||||
|
||||
## Pattern Matching
|
||||
|
||||
Before writing ANY file:
|
||||
1. Find a similar existing file (use `fs_grep` to locate, then `fs_read` to examine)
|
||||
2. Match its style: imports, naming, structure
|
||||
3. Follow the same patterns exactly
|
||||
|
||||
## Verification
|
||||
|
||||
After writing files:
|
||||
1. Run `verify_build` to check compilation
|
||||
2. If it fails, fix the error (minimal change)
|
||||
3. Don't move on until build passes
|
||||
|
||||
## Completion Signal
|
||||
|
||||
When done, end your response with a summary so the parent agent knows what happened:
|
||||
|
||||
```
|
||||
CODER_COMPLETE: [summary of what was implemented, which files were created/modified, and build status]
|
||||
```
|
||||
|
||||
Or if something went wrong:
|
||||
```
|
||||
CODER_FAILED: [what went wrong]
|
||||
```
|
||||
|
||||
## Rules
|
||||
|
||||
1. **Write code via tools** - Never output code to chat
|
||||
2. **Follow patterns** - Read existing files first
|
||||
3. **Verify builds** - Don't finish without checking
|
||||
4. **Minimal fixes** - If build fails, fix precisely
|
||||
5. **No refactoring** - Only implement what's asked
|
||||
|
||||
## Context
|
||||
- Project: {{project_dir}}
|
||||
- CWD: {{__cwd__}}
|
||||
- Shell: {{__shell__}}
|
||||
|
||||
## Available tools:
|
||||
{{__tools__}}
|
||||
@@ -0,0 +1,278 @@
|
||||
name: coder
|
||||
description: |
|
||||
Implementation agent. Plans, implements, and runs build + tests in a
|
||||
bounded fix-loop until verified. Designed to be delegated to by sisyphus.
|
||||
version: "1.0"
|
||||
|
||||
temperature: 0.1
|
||||
|
||||
global_tools:
|
||||
- fs_cat.sh
|
||||
- fs_ls.sh
|
||||
- fs_write.sh
|
||||
- fs_patch.sh
|
||||
- execute_command.sh
|
||||
|
||||
variables:
|
||||
- name: project_dir
|
||||
description: |
|
||||
Absolute path to the project directory. Defaults to "." which is the
|
||||
directory you invoked `loki` from. Override at runtime with
|
||||
`loki -a coder --agent-variable project_dir /abs/path "..."`.
|
||||
default: "."
|
||||
|
||||
settings:
|
||||
max_loop_iterations: 20
|
||||
log_state_snapshots: true
|
||||
validate_before_run: true
|
||||
timeout: 1800
|
||||
|
||||
initial_state:
|
||||
project_dir: ""
|
||||
fix_attempts: 0
|
||||
max_fix_attempts: 3
|
||||
fix_instructions: ""
|
||||
build_output: ""
|
||||
tests_output: ""
|
||||
last_node_output: ""
|
||||
plan_summary: ""
|
||||
files_to_modify: []
|
||||
files_to_create: []
|
||||
risks: []
|
||||
complexity_score: 0
|
||||
|
||||
start: resolve_paths
|
||||
|
||||
nodes:
|
||||
resolve_paths:
|
||||
id: resolve_paths
|
||||
type: script
|
||||
description: Resolve project_dir to an absolute path from the agent variable
|
||||
script: scripts/resolve_paths.sh
|
||||
timeout: 5
|
||||
fallback: end_failure
|
||||
|
||||
analyze_request:
|
||||
id: analyze_request
|
||||
type: llm
|
||||
description: Extract a structured plan and complexity score from the orchestrator's prompt
|
||||
instructions: |
|
||||
You are a senior engineer's planning assistant. Read the orchestrator's
|
||||
request and emit a structured plan. You only plan. You never edit files.
|
||||
|
||||
Score complexity from 1 to 10:
|
||||
1-3: trivial - single file, <=20 lines changed, obvious approach
|
||||
4-6: moderate - 2-5 files, clear approach, some pattern matching
|
||||
7-10: complex - multi-component, ambiguous tradeoffs, refactoring,
|
||||
or wide blast radius
|
||||
|
||||
Be specific in `files_to_modify` and `files_to_create`. All paths
|
||||
MUST be absolute. The project root is {{project_dir}}. Prefer paths
|
||||
like "{{project_dir}}/src/foo.rs" over "src/foo.rs". The implementer
|
||||
uses these paths directly with fs_write and fs_patch tools, which
|
||||
resolve relative paths against the loki invocation directory (NOT
|
||||
the project dir). Empty arrays are fine if no files in that category.
|
||||
|
||||
`risks` is a list of short strings. Anything that could derail the
|
||||
implementation: unknown dependencies, brittle tests, blast radius,
|
||||
etc. Empty list is fine.
|
||||
|
||||
Project directory: {{project_dir}}
|
||||
prompt: "{{initial_prompt}}"
|
||||
tools: []
|
||||
output_schema:
|
||||
type: object
|
||||
properties:
|
||||
plan_summary:
|
||||
type: string
|
||||
description: 1-3 sentences summarizing what will be done
|
||||
files_to_modify:
|
||||
type: array
|
||||
items: {type: string}
|
||||
files_to_create:
|
||||
type: array
|
||||
items: {type: string}
|
||||
complexity_score:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 10
|
||||
risks:
|
||||
type: array
|
||||
items: {type: string}
|
||||
required: [plan_summary, files_to_modify, files_to_create, complexity_score, risks]
|
||||
state_updates:
|
||||
last_node_output: "{{output}}"
|
||||
fallback: end_failure
|
||||
next: route_complexity
|
||||
|
||||
route_complexity:
|
||||
id: route_complexity
|
||||
type: script
|
||||
description: Route to approval gate for complex plans; skip otherwise
|
||||
script: scripts/route_complexity.sh
|
||||
timeout: 5
|
||||
fallback: implement
|
||||
|
||||
gate_approval:
|
||||
id: gate_approval
|
||||
type: approval
|
||||
description: Optional human checkpoint for high-complexity plans
|
||||
question: |
|
||||
## Plan
|
||||
{{plan_summary}}
|
||||
|
||||
## Files to modify
|
||||
{{files_to_modify}}
|
||||
|
||||
## Files to create
|
||||
{{files_to_create}}
|
||||
|
||||
## Risks
|
||||
{{risks}}
|
||||
|
||||
Complexity: {{complexity_score}}/10
|
||||
|
||||
Approve this plan?
|
||||
options:
|
||||
- "yes"
|
||||
- "no"
|
||||
routes:
|
||||
"yes": implement
|
||||
"no": end_rejected
|
||||
on_other: end_rejected
|
||||
|
||||
implement:
|
||||
id: implement
|
||||
type: llm
|
||||
description: Write code via fs tools. Bounded tool-call loop.
|
||||
instructions: |
|
||||
You are a senior engineer. Implement the plan by writing code via
|
||||
tools. Follow existing patterns in the codebase.
|
||||
|
||||
## Writing code
|
||||
|
||||
1. Use `fs_patch` for surgical edits to existing files.
|
||||
2. Use `fs_write` for new files or full rewrites.
|
||||
3. NEVER output code to chat. Always use tools.
|
||||
4. ALWAYS pass ABSOLUTE paths to fs_write and fs_patch. Relative
|
||||
paths resolve against the loki invocation directory (not the
|
||||
project dir), which is rarely what you want. The project root
|
||||
is {{project_dir}}.
|
||||
|
||||
## File reading
|
||||
|
||||
1. Use `execute_command` to grep/find:
|
||||
`execute_command --command "grep -rn 'fn handle_request' --include='*.rs' ."`
|
||||
`execute_command --command "find . -name '*.rs' -not -path '*/target/*'"`
|
||||
2. Read only what you need:
|
||||
`fs_cat --path "src/main.rs" --offset 50 --limit 30`
|
||||
3. Never read entire large files. Use offset/limit.
|
||||
4. Use `fs_ls` to list directory contents.
|
||||
|
||||
## Pattern matching
|
||||
|
||||
Before writing ANY file:
|
||||
1. Find a similar existing file (grep, then read).
|
||||
2. Match its style: imports, naming, structure, error handling.
|
||||
3. Follow the same patterns exactly. Do not invent new ones.
|
||||
|
||||
## Fix loop
|
||||
|
||||
If the "Fix loop status" section in your user prompt is non-empty,
|
||||
the previous attempt failed verification. Read the error, identify
|
||||
the minimal fix, apply it. Do not refactor while fixing.
|
||||
|
||||
## Rules
|
||||
|
||||
1. Match existing patterns - read examples first.
|
||||
2. Minimal changes - implement only what's asked.
|
||||
3. Never suppress errors (`as any`, `@ts-ignore`, `#[allow(...)]`
|
||||
on unfamiliar lints, etc.).
|
||||
4. No dead code, no commented-out blocks, no premature abstractions.
|
||||
5. End your turn when editing is done. The graph runs verification next.
|
||||
|
||||
Project directory: {{project_dir}}
|
||||
prompt: |
|
||||
## Plan summary
|
||||
{{plan_summary}}
|
||||
|
||||
## Files involved
|
||||
- Modify: {{files_to_modify}}
|
||||
- Create: {{files_to_create}}
|
||||
|
||||
## Original request from the orchestrator
|
||||
{{initial_prompt}}
|
||||
|
||||
## Fix loop status
|
||||
{{fix_instructions}}
|
||||
tools:
|
||||
- fs_cat
|
||||
- fs_ls
|
||||
- fs_write
|
||||
- fs_patch
|
||||
- execute_command
|
||||
max_iterations: 30
|
||||
state_updates:
|
||||
last_node_output: "{{output}}"
|
||||
fallback: end_failure
|
||||
next: verify_build
|
||||
|
||||
verify_build:
|
||||
id: verify_build
|
||||
type: script
|
||||
description: Run the project's check/build command. Routes to verify_tests on success, fix_loop_gate on failure.
|
||||
script: scripts/verify_build.sh
|
||||
timeout: 300
|
||||
fallback: fix_loop_gate
|
||||
|
||||
verify_tests:
|
||||
id: verify_tests
|
||||
type: script
|
||||
description: Run the project's test command. Routes to end_success on pass, fix_loop_gate on failure.
|
||||
script: scripts/verify_tests.sh
|
||||
timeout: 600
|
||||
fallback: fix_loop_gate
|
||||
|
||||
fix_loop_gate:
|
||||
id: fix_loop_gate
|
||||
type: script
|
||||
description: Budget gate. Loops back to implement with fix_instructions populated, or terminates as end_failure.
|
||||
script: scripts/fix_loop_gate.sh
|
||||
timeout: 5
|
||||
fallback: end_failure
|
||||
|
||||
end_success:
|
||||
id: end_success
|
||||
type: end
|
||||
output: |
|
||||
CODER_COMPLETE
|
||||
Plan: {{plan_summary}}
|
||||
Files modified: {{files_to_modify}}
|
||||
Files created: {{files_to_create}}
|
||||
Build: passed
|
||||
Tests: passed
|
||||
|
||||
end_rejected:
|
||||
id: end_rejected
|
||||
type: end
|
||||
output: |
|
||||
CODER_REJECTED
|
||||
Plan was rejected at the approval gate.
|
||||
Plan: {{plan_summary}}
|
||||
|
||||
end_failure:
|
||||
id: end_failure
|
||||
type: end
|
||||
output: |
|
||||
CODER_FAILED
|
||||
Plan: {{plan_summary}}
|
||||
Attempts: {{fix_attempts}}/{{max_fix_attempts}}
|
||||
|
||||
Last node output:
|
||||
{{last_node_output}}
|
||||
|
||||
Last build output:
|
||||
{{build_output}}
|
||||
|
||||
Last tests output:
|
||||
{{tests_output}}
|
||||
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
|
||||
state=$(cat "$GRAPH_STATE_FILE")
|
||||
elif [[ -n "${GRAPH_STATE:-}" ]]; then
|
||||
state="$GRAPH_STATE"
|
||||
else
|
||||
state='{}'
|
||||
fi
|
||||
|
||||
fix_attempts=$(echo "$state" | jq -r '.fix_attempts // 0')
|
||||
max_fix_attempts=$(echo "$state" | jq -r '.max_fix_attempts // 3')
|
||||
build_ok=$(echo "$state" | jq -r '.build_ok | if . == null then "true" else (. | tostring) end')
|
||||
tests_ok=$(echo "$state" | jq -r '.tests_ok | if . == null then "true" else (. | tostring) end')
|
||||
build_output=$(echo "$state" | jq -r '.build_output // ""')
|
||||
tests_output=$(echo "$state" | jq -r '.tests_output // ""')
|
||||
|
||||
if (( fix_attempts >= max_fix_attempts )); then
|
||||
jq -nc \
|
||||
--argjson n "$fix_attempts" \
|
||||
'{
|
||||
"fix_attempts": $n,
|
||||
"_next": "end_failure"
|
||||
}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
next_attempts=$((fix_attempts + 1))
|
||||
|
||||
if [[ "$build_ok" != "true" ]]; then
|
||||
fix_instructions=$(printf '## Fix loop status (attempt %d of %d)\n\nThe previous attempt failed the build.\n\nBuild output:\n```\n%s\n```\n\nIdentify the minimal fix and apply it. Do not refactor.' \
|
||||
"$next_attempts" "$max_fix_attempts" "$build_output")
|
||||
elif [[ "$tests_ok" != "true" ]]; then
|
||||
fix_instructions=$(printf '## Fix loop status (attempt %d of %d)\n\nBuild passed but tests failed.\n\nTest output:\n```\n%s\n```\n\nIdentify the minimal fix and apply it. Do not refactor.' \
|
||||
"$next_attempts" "$max_fix_attempts" "$tests_output")
|
||||
else
|
||||
fix_instructions=$(printf '## Fix loop status (attempt %d of %d)\n\nfix_loop_gate was reached but no failure was detected in state. Re-run the verification step.' \
|
||||
"$next_attempts" "$max_fix_attempts")
|
||||
fi
|
||||
|
||||
jq -nc \
|
||||
--argjson n "$next_attempts" \
|
||||
--arg fi "$fix_instructions" \
|
||||
'{
|
||||
"fix_attempts": $n,
|
||||
"fix_instructions": $fi,
|
||||
"_next": "implement"
|
||||
}'
|
||||
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
project_dir="${LLM_AGENT_VAR_PROJECT_DIR:-.}"
|
||||
resolved=$(cd "$project_dir" 2>/dev/null && pwd) || resolved="$project_dir"
|
||||
|
||||
jq -nc \
|
||||
--arg pd "$resolved" \
|
||||
'{
|
||||
"project_dir": $pd,
|
||||
"_next": "analyze_request"
|
||||
}'
|
||||
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
|
||||
state=$(cat "$GRAPH_STATE_FILE")
|
||||
elif [[ -n "${GRAPH_STATE:-}" ]]; then
|
||||
state="$GRAPH_STATE"
|
||||
else
|
||||
state='{}'
|
||||
fi
|
||||
|
||||
complexity=$(echo "$state" | jq -r '.complexity_score // 0')
|
||||
|
||||
if [[ "${CODER_AUTOAPPROVE:-0}" == "1" ]]; then
|
||||
jq -nc '{"_next": "implement"}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if (( complexity >= 7 )); then
|
||||
jq -nc '{"_next": "gate_approval"}'
|
||||
else
|
||||
jq -nc '{"_next": "implement"}'
|
||||
fi
|
||||
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env bash
|
||||
set -uo pipefail
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source "$(dirname "$0")/../../.shared/utils.sh"
|
||||
|
||||
if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
|
||||
state=$(cat "$GRAPH_STATE_FILE")
|
||||
elif [[ -n "${GRAPH_STATE:-}" ]]; then
|
||||
state="$GRAPH_STATE"
|
||||
else
|
||||
state='{}'
|
||||
fi
|
||||
|
||||
project_dir=$(echo "$state" | jq -r '.project_dir // "."')
|
||||
|
||||
if [[ -n "${BUILD_CMD:-}" ]]; then
|
||||
cmd="$BUILD_CMD"
|
||||
else
|
||||
project_info=$(detect_project "$project_dir")
|
||||
cmd=$(echo "$project_info" | jq -r '.check // .build // ""')
|
||||
fi
|
||||
|
||||
if [[ -z "$cmd" || "$cmd" == "null" ]]; then
|
||||
jq -nc '{
|
||||
"build_ok": true,
|
||||
"build_output": "(no build/check command available for this project type)",
|
||||
"_next": "verify_tests"
|
||||
}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
exit_code=0
|
||||
output=$(cd "$project_dir" && eval "$cmd" 2>&1) || exit_code=$?
|
||||
|
||||
if (( exit_code == 0 )); then
|
||||
jq -nc \
|
||||
--arg out "$output" \
|
||||
--arg cmd "$cmd" \
|
||||
'{
|
||||
"build_ok": true,
|
||||
"build_output": ("Ran: " + $cmd + "\n\n" + $out),
|
||||
"_next": "verify_tests"
|
||||
}'
|
||||
else
|
||||
jq -nc \
|
||||
--arg out "$output" \
|
||||
--arg cmd "$cmd" \
|
||||
--argjson rc "$exit_code" \
|
||||
'{
|
||||
"build_ok": false,
|
||||
"build_output": ("Ran: " + $cmd + "\nExit code: " + ($rc | tostring) + "\n\n" + $out),
|
||||
"_next": "fix_loop_gate"
|
||||
}'
|
||||
fi
|
||||
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env bash
|
||||
set -uo pipefail
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source "$(dirname "$0")/../../.shared/utils.sh"
|
||||
|
||||
if [[ -n "${GRAPH_STATE_FILE:-}" ]]; then
|
||||
state=$(cat "$GRAPH_STATE_FILE")
|
||||
elif [[ -n "${GRAPH_STATE:-}" ]]; then
|
||||
state="$GRAPH_STATE"
|
||||
else
|
||||
state='{}'
|
||||
fi
|
||||
|
||||
project_dir=$(echo "$state" | jq -r '.project_dir // "."')
|
||||
|
||||
if [[ -n "${TEST_CMD:-}" ]]; then
|
||||
cmd="$TEST_CMD"
|
||||
else
|
||||
project_info=$(detect_project "$project_dir")
|
||||
cmd=$(echo "$project_info" | jq -r '.test // ""')
|
||||
fi
|
||||
|
||||
if [[ -z "$cmd" || "$cmd" == "null" ]]; then
|
||||
jq -nc '{
|
||||
"tests_ok": true,
|
||||
"tests_output": "(no test command available for this project type)",
|
||||
"_next": "end_success"
|
||||
}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
exit_code=0
|
||||
output=$(cd "$project_dir" && eval "$cmd" 2>&1) || exit_code=$?
|
||||
|
||||
if (( exit_code == 0 )); then
|
||||
jq -nc \
|
||||
--arg out "$output" \
|
||||
--arg cmd "$cmd" \
|
||||
'{
|
||||
"tests_ok": true,
|
||||
"tests_output": ("Ran: " + $cmd + "\n\n" + $out),
|
||||
"_next": "end_success"
|
||||
}'
|
||||
else
|
||||
jq -nc \
|
||||
--arg out "$output" \
|
||||
--arg cmd "$cmd" \
|
||||
--argjson rc "$exit_code" \
|
||||
'{
|
||||
"tests_ok": false,
|
||||
"tests_output": ("Ran: " + $cmd + "\nExit code: " + ($rc | tostring) + "\n\n" + $out),
|
||||
"_next": "fix_loop_gate"
|
||||
}'
|
||||
fi
|
||||
@@ -14,99 +14,6 @@ _project_dir() {
|
||||
(cd "${dir}" 2>/dev/null && pwd) || echo "${dir}"
|
||||
}
|
||||
|
||||
# Normalize a path to be relative to project root.
|
||||
# Strips the project_dir prefix if the LLM passes an absolute path.
|
||||
# Usage: local rel_path; rel_path=$(_normalize_path "/abs/or/rel/path")
|
||||
_normalize_path() {
|
||||
local input_path="$1"
|
||||
local project_dir
|
||||
project_dir=$(_project_dir)
|
||||
|
||||
if [[ "${input_path}" == /* ]]; then
|
||||
input_path="${input_path#"${project_dir}"/}"
|
||||
fi
|
||||
|
||||
input_path="${input_path#./}"
|
||||
echo "${input_path}"
|
||||
}
|
||||
|
||||
# @cmd Read a file's contents before modifying
|
||||
# @option --path! Path to the file (relative to project root)
|
||||
read_file() {
|
||||
local file_path
|
||||
# shellcheck disable=SC2154
|
||||
file_path=$(_normalize_path "${argc_path}")
|
||||
local project_dir
|
||||
project_dir=$(_project_dir)
|
||||
local full_path="${project_dir}/${file_path}"
|
||||
|
||||
if [[ ! -f "${full_path}" ]]; then
|
||||
warn "File not found: ${file_path}" >> "$LLM_OUTPUT"
|
||||
return 0
|
||||
fi
|
||||
|
||||
{
|
||||
info "Reading: ${file_path}"
|
||||
echo ""
|
||||
cat "${full_path}"
|
||||
} >> "$LLM_OUTPUT"
|
||||
}
|
||||
|
||||
# @cmd Write complete file contents
|
||||
# @option --path! Path for the file (relative to project root)
|
||||
# @option --content! Complete file contents to write
|
||||
write_file() {
|
||||
local file_path
|
||||
file_path=$(_normalize_path "${argc_path}")
|
||||
# shellcheck disable=SC2154
|
||||
local content="${argc_content}"
|
||||
local project_dir
|
||||
project_dir=$(_project_dir)
|
||||
local full_path="${project_dir}/${file_path}"
|
||||
|
||||
mkdir -p "$(dirname "${full_path}")"
|
||||
printf '%s' "${content}" > "${full_path}"
|
||||
|
||||
green "Wrote: ${file_path}" >> "$LLM_OUTPUT"
|
||||
}
|
||||
|
||||
# @cmd Find files similar to a given path (for pattern matching)
|
||||
# @option --path! Path to find similar files for
|
||||
find_similar_files() {
|
||||
local file_path
|
||||
file_path=$(_normalize_path "${argc_path}")
|
||||
local project_dir
|
||||
project_dir=$(_project_dir)
|
||||
|
||||
local ext="${file_path##*.}"
|
||||
local dir
|
||||
dir=$(dirname "${file_path}")
|
||||
|
||||
info "Similar files to: ${file_path}" >> "$LLM_OUTPUT"
|
||||
echo "" >> "$LLM_OUTPUT"
|
||||
|
||||
local results
|
||||
results=$(find "${project_dir}/${dir}" -maxdepth 1 -type f -name "*.${ext}" \
|
||||
! -name "$(basename "${file_path}")" \
|
||||
! -name "*test*" \
|
||||
! -name "*spec*" \
|
||||
2>/dev/null | sed "s|^${project_dir}/||" | head -3)
|
||||
|
||||
if [[ -z "${results}" ]]; then
|
||||
results=$(find "${project_dir}/src" -type f -name "*.${ext}" \
|
||||
! -name "*test*" \
|
||||
! -name "*spec*" \
|
||||
-not -path '*/target/*' \
|
||||
2>/dev/null | sed "s|^${project_dir}/||" | head -3)
|
||||
fi
|
||||
|
||||
if [[ -n "${results}" ]]; then
|
||||
echo "${results}" >> "$LLM_OUTPUT"
|
||||
else
|
||||
warn "No similar files found" >> "$LLM_OUTPUT"
|
||||
fi
|
||||
}
|
||||
|
||||
# @cmd Verify the project builds successfully
|
||||
verify_build() {
|
||||
local project_dir
|
||||
@@ -189,28 +96,3 @@ get_project_structure() {
|
||||
} >> "$LLM_OUTPUT"
|
||||
}
|
||||
|
||||
# @cmd Search for content in the codebase
|
||||
# @option --pattern! Pattern to search for
|
||||
search_code() {
|
||||
# shellcheck disable=SC2154
|
||||
local pattern="${argc_pattern}"
|
||||
local project_dir
|
||||
project_dir=$(_project_dir)
|
||||
|
||||
info "Searching: ${pattern}" >> "$LLM_OUTPUT"
|
||||
echo "" >> "$LLM_OUTPUT"
|
||||
|
||||
local results
|
||||
results=$(grep -rn "${pattern}" "${project_dir}" 2>/dev/null | \
|
||||
grep -v '/target/' | \
|
||||
grep -v '/node_modules/' | \
|
||||
grep -v '/.git/' | \
|
||||
sed "s|^${project_dir}/||" | \
|
||||
head -20) || true
|
||||
|
||||
if [[ -n "${results}" ]]; then
|
||||
echo "${results}" >> "$LLM_OUTPUT"
|
||||
else
|
||||
warn "No matches" >> "$LLM_OUTPUT"
|
||||
fi
|
||||
}
|
||||
@@ -0,0 +1,274 @@
|
||||
# deep-research
|
||||
|
||||
A deep web research agent, built as a Loki graph agent. It plans an
|
||||
investigation, decomposes it into sub-questions researched in
|
||||
parallel, grounds the work in a local knowledge corpus, vets the
|
||||
credibility of cited sources, runs a reflexion self-critique loop to
|
||||
revise weak findings, delegates the final write-up to a focused
|
||||
sub-agent, checks that the cited sources are reachable, and gates the
|
||||
result behind human approval.
|
||||
|
||||
Unlike a regular agent (which takes a goal and improvises the steps),
|
||||
this agent runs a fixed graph: every request goes through the same
|
||||
`plan -> parallel research -> vet -> critique -> synthesize -> verify -> approve`
|
||||
pipeline.
|
||||
|
||||
This agent is also the **canonical reference for the Loki graph
|
||||
system**: it exercises every node type (`script`, `llm`, `rag`, `map`,
|
||||
`agent`, `input`, `approval`, `end`) and both static fan-out and
|
||||
dynamic `map` fan-out. If you are learning how to build a graph
|
||||
agent, this is the file to read alongside the
|
||||
[Graph-Agents wiki](https://github.com/Dark-Alex-17/loki/wiki/Graph-Agents).
|
||||
|
||||
## Workflow
|
||||
|
||||
17 nodes. `->` is the static route; a script node can also route
|
||||
dynamically via `_next`. The `▶▶` line is a parallel super-step —
|
||||
those branches run concurrently:
|
||||
|
||||
```
|
||||
parse_request (script) -> bootstrap_research (or -> ask_topic if no topic)
|
||||
ask_topic (input) -> bootstrap_research
|
||||
bootstrap_research (script) -> [plan, knowledge_lookup] ▶▶ parallel
|
||||
plan (llm + output_schema) -> research_each_question
|
||||
knowledge_lookup (rag) -> research_each_question
|
||||
research_each_question (map) -> combine_findings (spawns one branch per question)
|
||||
└─ research_one_question (llm) (atomic; runs N×, joins at map)
|
||||
combine_findings (script) -> vet_sources
|
||||
vet_sources (llm + custom tool) -> critique
|
||||
critique (llm) -> reflexion_gate
|
||||
reflexion_gate (script) -> synthesize (or -> research_each_question: reflexion loop)
|
||||
synthesize (agent: report-writer) -> verify_sources
|
||||
verify_sources (script) -> approve
|
||||
approve (approval) -> end_accepted ("accept")
|
||||
-> end_rejected ("reject")
|
||||
-> incorporate_feedback (any free-form answer)
|
||||
incorporate_feedback (script) -> research_each_question (the human-feedback loop)
|
||||
```
|
||||
|
||||
### Node-type breakdown
|
||||
|
||||
| Type | Nodes |
|
||||
|---|---|
|
||||
| `script` (Python) | `parse_request`, `bootstrap_research`, `combine_findings`, `reflexion_gate`, `verify_sources`, `incorporate_feedback` |
|
||||
| `llm` (tools: `[]`) | `plan`, `critique` |
|
||||
| `llm` (with tool whitelist) | `research_one_question`, `vet_sources` |
|
||||
| `rag` | `knowledge_lookup` — local corpus retrieval |
|
||||
| `map` | `research_each_question` — dynamic fan-out per sub-question |
|
||||
| `agent` | `synthesize` — spawns the `report-writer` sub-agent |
|
||||
| `input` | `ask_topic` |
|
||||
| `approval` | `approve` |
|
||||
| `end` | `end_accepted`, `end_rejected` |
|
||||
|
||||
## Parallel execution
|
||||
|
||||
The graph has two parallel super-steps where Loki's BSP scheduler runs
|
||||
branches concurrently.
|
||||
|
||||
**1. Context loading (`plan` ‖ `knowledge_lookup`)** — after
|
||||
`bootstrap_research`, the LLM planner (which decomposes the topic into
|
||||
sub-questions) and the RAG retrieval over the local `knowledge/`
|
||||
corpus run side by side. They write disjoint state keys (`plan` writes
|
||||
`research_plan` and `questions`; `knowledge_lookup` writes
|
||||
`local_context` and `local_sources`) so no reducer is needed.
|
||||
|
||||
**2. Per-question research (`research_each_question` map)** — the
|
||||
plan emits a `questions` array (3-5 entries, enforced by its
|
||||
`output_schema`). The `map` node spawns one parallel branch per
|
||||
question (`max_concurrency: 3`). Each branch is an isolated
|
||||
`research_one_question` LLM invocation with web tools, instructed to
|
||||
investigate exactly its assigned question. Outputs collect into
|
||||
`question_findings` in input order, then `combine_findings` joins
|
||||
them into a single `findings` Markdown document for downstream nodes.
|
||||
|
||||
`settings.max_concurrency: 4` is the graph-wide cap; the per-`map`
|
||||
override (`max_concurrency: 3` on `research_each_question`) is
|
||||
deliberately lower to leave headroom for the planner's tool calls
|
||||
running alongside RAG.
|
||||
|
||||
## Local knowledge corpus
|
||||
|
||||
`knowledge_lookup` is a `rag` node — it runs hybrid (vector + keyword)
|
||||
retrieval over every file in `knowledge/`. The directory ships with a
|
||||
small `research-style-notes.md` so the RAG node has something to
|
||||
retrieve against on a clean install; drop your own Markdown notes,
|
||||
PDFs, or text files into `knowledge/` to bias the research toward
|
||||
your local context.
|
||||
|
||||
The knowledge base is built once, at agent-load time, into
|
||||
`~/.config/loki/agents/deep-research/knowledge_lookup.yaml`. Because
|
||||
the node fully specifies its build config (`embedding_model`,
|
||||
`chunk_size`, `chunk_overlap`), the build is non-interactive. Delete
|
||||
that cached file after adding or changing knowledge to force a
|
||||
rebuild.
|
||||
|
||||
## Sub-agent: report-writer
|
||||
|
||||
The `synthesize` node is an `agent` node that spawns the
|
||||
`report-writer` sub-agent (`assets/agents/report-writer/`). This is
|
||||
the agent-as-tool pattern: the orchestrating graph delegates the
|
||||
writing phase to a focused sub-agent dedicated to coherent prose,
|
||||
while the research phase uses different (typically cheaper) LLM nodes
|
||||
for fast-and-many-question investigation.
|
||||
|
||||
The `report-writer` sub-agent has no tools — it cannot access the
|
||||
web, cannot search, and cannot invent facts. It reads only the
|
||||
findings it is given and produces a final Markdown report preserving
|
||||
every inline citation. See `assets/agents/report-writer/README.md`
|
||||
for details.
|
||||
|
||||
## Tools and tool scoping
|
||||
|
||||
This agent demonstrates Loki's three tool sources and how an `llm`
|
||||
node's `tools:` whitelist scopes them per node.
|
||||
|
||||
The agent's full tool universe, declared in `graph.yaml`:
|
||||
|
||||
- **Global tools** (`global_tools`): `web_search_loki`,
|
||||
`fetch_url_via_curl`, `search_arxiv` - Loki's built-in tool scripts.
|
||||
- **MCP server** (`mcp_servers`): `ddg-search` - a DuckDuckGo web
|
||||
search MCP server. Referenced in a whitelist as `mcp:ddg-search`.
|
||||
- **Custom agent tool** (`tools.sh`): `classify_source` - a
|
||||
deterministic source-credibility classifier shipped with this agent.
|
||||
|
||||
No node receives all of these. Each `llm` node's `tools:` whitelist
|
||||
narrows the universe to exactly what that step needs:
|
||||
|
||||
| Node | `tools:` whitelist | Draws from |
|
||||
|---|---|---|
|
||||
| `plan`, `critique` | `[]` | nothing - pure reasoning |
|
||||
| `research_one_question` | `web_search_loki`, `fetch_url_via_curl`, `search_arxiv`, `mcp:ddg-search` | global tools + MCP |
|
||||
| `vet_sources` | `classify_source` | the custom tool only |
|
||||
|
||||
`research_one_question` (each parallel branch of the map) can search
|
||||
and fetch but cannot classify sources; `vet_sources` can classify
|
||||
sources but cannot touch the web. That separation is the point of the
|
||||
`tools:` whitelist: a node gets only the tools its job calls for,
|
||||
never the agent's full set.
|
||||
|
||||
The `classify_source` custom tool (`tools.sh`) takes a URL and returns
|
||||
a credibility tier (government, academic, preprint, organization,
|
||||
unverified) derived from the host and top-level domain. It is
|
||||
deterministic - exactly the kind of logic a tool should own rather than
|
||||
the LLM guessing.
|
||||
|
||||
Web search may require API-key configuration; see the
|
||||
[Tools](https://github.com/Dark-Alex-17/loki/wiki/Tools) docs.
|
||||
`fetch_url_via_curl`, `search_arxiv`, and `classify_source` work
|
||||
without a key.
|
||||
|
||||
## Setup
|
||||
|
||||
`research_one_question` (each parallel branch of the `map`) uses the
|
||||
`ddg-search` MCP server via `mcp:ddg-search`. It is one of Loki's
|
||||
default MCP servers; make sure it is registered in
|
||||
`~/.config/loki/mcp.json` (run `loki --install mcp_config` to restore
|
||||
the default template if it is missing). If `ddg-search` is unavailable,
|
||||
the branches still have their global web-search tools to fall back on.
|
||||
|
||||
The `synthesize` node spawns the `report-writer` sub-agent. Both
|
||||
agents ship with `loki agents install`; if you install one manually,
|
||||
install both so the agent reference resolves.
|
||||
|
||||
## Reflexion
|
||||
|
||||
The agent has two loops, both built with script nodes that route via
|
||||
`_next`. The engine allows back-edges at runtime; the validator only
|
||||
rejects cycles built from static `next` / `routes` edges, so script
|
||||
`_next` loops are always allowed.
|
||||
|
||||
**Automated reflexion loop.** After the parallel research map and
|
||||
`vet_sources`, the `critique` node reviews the merged findings
|
||||
against the research plan and the source credibility assessment, and
|
||||
emits `VERDICT: PASS` or `VERDICT: REVISE` with specific feedback.
|
||||
`reflexion_gate.py` then:
|
||||
|
||||
- `PASS` -> continue to `synthesize`.
|
||||
- `REVISE`, budget remaining -> loop back to `research_each_question`,
|
||||
with the critique injected as `research_feedback` so every parallel
|
||||
branch sees it on the retry.
|
||||
- `REVISE`, budget spent -> continue to `synthesize` anyway (the human
|
||||
approval step is the final backstop).
|
||||
|
||||
The budget is `MAX_REFLEXION_REVISIONS` in `reflexion_gate.py`
|
||||
(default 2, so the research map runs at most 3 times per pass).
|
||||
|
||||
**Human-feedback loop.** At `approve` the user answers `accept`,
|
||||
`reject`, or types their own feedback. A free-form answer routes via
|
||||
the approval node's `on_other` to `incorporate_feedback.py`, which
|
||||
folds that text into `research_feedback` and loops back to
|
||||
`research_each_question` for another parallel pass.
|
||||
|
||||
`settings.max_loop_iterations` (40) is the engine's infinite-loop
|
||||
backstop: it caps the total visits to any single node.
|
||||
|
||||
## Running
|
||||
|
||||
```sh
|
||||
loki agents install # ships deep-research
|
||||
loki -a deep-research "How does HTTP/3 differ from HTTP/2?"
|
||||
loki -a deep-research "Recent advances in solid-state batteries"
|
||||
loki -a deep-research # no prompt -> triggers ask_topic
|
||||
```
|
||||
|
||||
## Anti-hallucination
|
||||
|
||||
- `research_one_question` (each map branch) is instructed to back
|
||||
every claim with a real retrieved source and never to fabricate
|
||||
URLs, titles, or DOIs.
|
||||
- `vet_sources` classifies every cited source so weak sources are
|
||||
visible to the critique step.
|
||||
- `critique` independently reviews the merged findings and sends weak
|
||||
or uncited work back for another parallel research pass.
|
||||
- `synthesize` (the `report-writer` sub-agent) is grounded: it may use
|
||||
only the gathered findings and must keep each claim's inline source.
|
||||
It has no tools and cannot browse the web.
|
||||
- `verify_sources` probes every cited URL / DOI with an HTTP HEAD
|
||||
request and reports which are unreachable, so the human reviewer
|
||||
sees broken citations before approving.
|
||||
|
||||
## Customizing
|
||||
|
||||
- **Loop budget.** `MAX_REFLEXION_REVISIONS` in `reflexion_gate.py`.
|
||||
- **Map concurrency.** The `research_each_question` node's
|
||||
`max_concurrency: 3` caps simultaneous web-research branches.
|
||||
Raise to investigate more questions in parallel; lower to be gentle
|
||||
on rate-limited providers.
|
||||
- **Per-node model.** Add `model: anthropic:...` to any `llm` node.
|
||||
Cheap models work well for `plan` / `critique` / `vet_sources`; the
|
||||
heavy intelligence is needed in `research_one_question` and the
|
||||
`report-writer` sub-agent.
|
||||
- **Tool scope.** Narrow the `research_one_question` node's `tools:`
|
||||
list to constrain where each branch looks (for example, drop
|
||||
`web_search_loki` and `mcp:ddg-search` to force arXiv-only
|
||||
research).
|
||||
- **Local knowledge.** Drop files into `knowledge/` to bias every
|
||||
research branch toward your local context (see the *Local
|
||||
knowledge corpus* section above).
|
||||
- **Different writer.** Replace `agent: report-writer` on the
|
||||
`synthesize` node with the name of any other agent. The
|
||||
orchestrator does not care what kind of agent the writer is.
|
||||
- **Skip approval.** Point both `approve` routes at `end_accepted`,
|
||||
or wire `verify_sources` straight to an `end` node.
|
||||
|
||||
## Files
|
||||
|
||||
```
|
||||
assets/agents/deep-research/
|
||||
graph.yaml - agent config + 17-node workflow
|
||||
tools.sh - classify_source custom tool
|
||||
README.md - this file
|
||||
knowledge/
|
||||
README.md - corpus-format notes
|
||||
research-style-notes.md - starter knowledge file (replace with your notes)
|
||||
scripts/
|
||||
parse_request.py - _next: bootstrap_research, or ask_topic if no topic
|
||||
bootstrap_research.py - fan-out source: next [plan, knowledge_lookup]
|
||||
combine_findings.py - joins map output (question_findings) into findings
|
||||
reflexion_gate.py - _next: research_each_question (revise) or synthesize
|
||||
verify_sources.py - HTTP HEAD on cited URLs / DOIs
|
||||
incorporate_feedback.py - _next: research_each_question, with user feedback
|
||||
```
|
||||
|
||||
See also `assets/agents/report-writer/` — the sub-agent the
|
||||
`synthesize` node spawns.
|
||||
@@ -0,0 +1,293 @@
|
||||
name: deep-research
|
||||
description: |
|
||||
Deep web research workflow. Plans an investigation, decomposes it
|
||||
into sub-questions researched in parallel, grounds the work in a
|
||||
local knowledge corpus, vets the credibility of cited sources, runs
|
||||
a reflexion self-critique loop to revise weak or incomplete findings,
|
||||
delegates the final write-up to a focused sub-agent, checks that the
|
||||
cited sources are reachable, and gates the result behind human
|
||||
approval. A reviewer's free-form feedback at the approval step feeds
|
||||
back into another research pass.
|
||||
|
||||
This is the canonical Loki graph-agent reference: it exercises every
|
||||
node type (script, llm, rag, map, agent, input, approval, end) and
|
||||
both static fan-out and dynamic map fan-out.
|
||||
|
||||
version: "1.0"
|
||||
|
||||
temperature: 0.0
|
||||
|
||||
global_tools:
|
||||
- web_search_loki.sh
|
||||
- fetch_url_via_curl.sh
|
||||
- search_arxiv.sh
|
||||
|
||||
mcp_servers:
|
||||
- ddg-search
|
||||
|
||||
conversation_starters:
|
||||
- "How does HTTP/3 differ from HTTP/2?"
|
||||
- "Summarize recent advances in solid-state battery chemistry"
|
||||
|
||||
settings:
|
||||
max_loop_iterations: 40
|
||||
log_state_snapshots: false
|
||||
validate_before_run: true
|
||||
max_concurrency: 4
|
||||
|
||||
initial_state:
|
||||
research_feedback: ""
|
||||
research_attempts: 0
|
||||
local_context: ""
|
||||
local_sources: ""
|
||||
|
||||
start: parse_request
|
||||
|
||||
nodes:
|
||||
|
||||
parse_request:
|
||||
id: parse_request
|
||||
type: script
|
||||
script: scripts/parse_request.py
|
||||
next: bootstrap_research
|
||||
|
||||
ask_topic:
|
||||
id: ask_topic
|
||||
type: input
|
||||
question: "What would you like me to research?"
|
||||
validation: "len(input) > 0"
|
||||
state_updates:
|
||||
topic: "{{input}}"
|
||||
next: bootstrap_research
|
||||
|
||||
bootstrap_research:
|
||||
id: bootstrap_research
|
||||
type: script
|
||||
script: scripts/bootstrap_research.py
|
||||
next: [plan, knowledge_lookup]
|
||||
|
||||
plan:
|
||||
id: plan
|
||||
type: llm
|
||||
instructions: |
|
||||
You are a research planner. Given a topic, produce a focused
|
||||
research plan and decompose it into 3-5 specific sub-questions
|
||||
that can each be researched independently in parallel.
|
||||
|
||||
The plan is a short narrative naming the key questions and the
|
||||
kinds of sources that would be authoritative. The sub-questions
|
||||
are precise, self-contained queries (each one is sent on its own
|
||||
to a separate research worker, so they must be answerable
|
||||
without each other's context).
|
||||
prompt: "Research topic: {{topic}}"
|
||||
tools: []
|
||||
output_schema:
|
||||
type: object
|
||||
properties:
|
||||
research_plan:
|
||||
type: string
|
||||
description: A short plan narrative.
|
||||
questions:
|
||||
type: array
|
||||
items: { type: string }
|
||||
minItems: 1
|
||||
maxItems: 6
|
||||
description: 3-5 specific, self-contained sub-questions.
|
||||
required: [research_plan, questions]
|
||||
next: research_each_question
|
||||
|
||||
knowledge_lookup:
|
||||
id: knowledge_lookup
|
||||
type: rag
|
||||
documents:
|
||||
- ./knowledge/
|
||||
query: "{{topic}}"
|
||||
top_k: 6
|
||||
chunk_size: 1000
|
||||
chunk_overlap: 100
|
||||
state_updates:
|
||||
local_context: "{{output.context}}"
|
||||
local_sources: "{{output.sources}}"
|
||||
next: research_each_question
|
||||
|
||||
research_each_question:
|
||||
id: research_each_question
|
||||
type: map
|
||||
over: "{{questions}}"
|
||||
as: question
|
||||
branch: research_one_question
|
||||
collect_into: question_findings
|
||||
max_concurrency: 3
|
||||
next: combine_findings
|
||||
|
||||
research_one_question:
|
||||
id: research_one_question
|
||||
type: llm
|
||||
instructions: |
|
||||
You are a web research assistant. Investigate the SINGLE question
|
||||
given to you using your tools: search the web, fetch and read
|
||||
pages, and search arXiv for academic sources.
|
||||
|
||||
Rules:
|
||||
- Every factual claim must be backed by a real source you
|
||||
actually retrieved. Never fabricate URLs, page titles,
|
||||
authors, or DOIs.
|
||||
- Prefer primary and authoritative sources over aggregators.
|
||||
- Where sources disagree, report the disagreement rather than
|
||||
papering over it.
|
||||
- Put the URL (or DOI) inline next to each claim it supports.
|
||||
|
||||
Return organized findings in plain text. Do not include
|
||||
meta-commentary about the process.
|
||||
prompt: |
|
||||
Research question: {{question}}
|
||||
|
||||
Local context that may help:
|
||||
{{local_context}}
|
||||
|
||||
{{research_feedback}}
|
||||
tools:
|
||||
- web_search_loki
|
||||
- fetch_url_via_curl
|
||||
- search_arxiv
|
||||
- mcp:ddg-search
|
||||
max_iterations: 10
|
||||
max_attempts: 2
|
||||
temperature: 0.1
|
||||
|
||||
combine_findings:
|
||||
id: combine_findings
|
||||
type: script
|
||||
script: scripts/combine_findings.py
|
||||
next: vet_sources
|
||||
|
||||
vet_sources:
|
||||
id: vet_sources
|
||||
type: llm
|
||||
instructions: |
|
||||
You assess the credibility of the sources cited in a set of
|
||||
research findings. For every distinct source URL in the findings,
|
||||
call the `classify_source` tool to get its credibility tier. Then
|
||||
summarize: which claims rest on HIGH-credibility sources, and
|
||||
which rest on PREPRINT or UNVERIFIED sources and so need
|
||||
corroboration. Do NOT do any new research -- assess only what is
|
||||
already cited.
|
||||
prompt: |
|
||||
Findings to assess:
|
||||
{{findings}}
|
||||
tools:
|
||||
- classify_source
|
||||
max_iterations: 15
|
||||
state_updates:
|
||||
source_assessment: "{{output}}"
|
||||
next: critique
|
||||
|
||||
critique:
|
||||
id: critique
|
||||
type: llm
|
||||
instructions: |
|
||||
You are a meticulous research reviewer. Judge whether the
|
||||
findings below are good enough to synthesize a complete,
|
||||
well-supported report that answers the research plan.
|
||||
|
||||
Mark the findings REVISE if ANY of these hold:
|
||||
- A research-plan question is unanswered or only weakly
|
||||
addressed.
|
||||
- A factual claim has no source, or cites a source that looks
|
||||
fabricated.
|
||||
- The findings lean on a single source where corroboration is
|
||||
needed.
|
||||
- A key claim rests only on a PREPRINT or UNVERIFIED source,
|
||||
per the source credibility assessment below.
|
||||
- An obvious counter-perspective or recent development is
|
||||
missing.
|
||||
Otherwise mark them PASS.
|
||||
|
||||
Respond in EXACTLY this format, nothing else:
|
||||
|
||||
VERDICT: <PASS or REVISE>
|
||||
FEEDBACK: <if REVISE, be specific and actionable -- name the gaps
|
||||
and what kind of source would close them; if PASS, write "none">
|
||||
prompt: |
|
||||
Research plan:
|
||||
{{research_plan}}
|
||||
|
||||
Findings under review:
|
||||
{{findings}}
|
||||
|
||||
Source credibility assessment:
|
||||
{{source_assessment}}
|
||||
tools: []
|
||||
state_updates:
|
||||
critique: "{{output}}"
|
||||
next: reflexion_gate
|
||||
|
||||
reflexion_gate:
|
||||
id: reflexion_gate
|
||||
type: script
|
||||
script: scripts/reflexion_gate.py
|
||||
next: synthesize
|
||||
|
||||
synthesize:
|
||||
id: synthesize
|
||||
type: agent
|
||||
agent: report-writer
|
||||
prompt: |
|
||||
Research topic: {{topic}}
|
||||
|
||||
Findings (organized by sub-question, with inline citations):
|
||||
{{findings}}
|
||||
|
||||
Source credibility assessment:
|
||||
{{source_assessment}}
|
||||
|
||||
Produce the final report following your instructions.
|
||||
timeout: 300
|
||||
state_updates:
|
||||
report: "{{output}}"
|
||||
next: verify_sources
|
||||
|
||||
verify_sources:
|
||||
id: verify_sources
|
||||
type: script
|
||||
script: scripts/verify_sources.py
|
||||
next: approve
|
||||
|
||||
approve:
|
||||
id: approve
|
||||
type: approval
|
||||
question: |
|
||||
Research report on: {{topic}}
|
||||
|
||||
{{report}}
|
||||
|
||||
----
|
||||
{{source_check}}
|
||||
----
|
||||
|
||||
Accept this report? Pick "accept" or "reject", or type specific
|
||||
feedback to send the research back for another pass.
|
||||
options:
|
||||
- "accept"
|
||||
- "reject"
|
||||
routes:
|
||||
"accept": end_accepted
|
||||
"reject": end_rejected
|
||||
on_other: incorporate_feedback
|
||||
state_updates:
|
||||
decision: "{{choice}}"
|
||||
|
||||
incorporate_feedback:
|
||||
id: incorporate_feedback
|
||||
type: script
|
||||
script: scripts/incorporate_feedback.py
|
||||
|
||||
end_accepted:
|
||||
id: end_accepted
|
||||
type: end
|
||||
output: "{{report}}"
|
||||
|
||||
end_rejected:
|
||||
id: end_rejected
|
||||
type: end
|
||||
output: "Research on '{{topic}}' was rejected and discarded."
|
||||
@@ -0,0 +1,23 @@
|
||||
# Local knowledge corpus for deep-research
|
||||
|
||||
The `knowledge_lookup` node in `graph.yaml` is a `rag` node that runs
|
||||
hybrid (vector + keyword) retrieval over every file in this directory.
|
||||
Drop your own notes, papers (PDFs), Markdown docs, or text files here
|
||||
and they will be indexed into a per-agent knowledge base on first run.
|
||||
|
||||
Loki supports common file types out of the box: `.md`, `.txt`, `.pdf`,
|
||||
`.html`, and others. Subdirectories are walked recursively.
|
||||
|
||||
A small starter file (`research-style-notes.md`) ships so the RAG
|
||||
node has something non-empty to retrieve against on a clean install.
|
||||
Replace or extend it with your own materials to bias the research
|
||||
phase toward your local context.
|
||||
|
||||
To force the knowledge base to rebuild after you add or change files,
|
||||
delete the cached index:
|
||||
|
||||
```sh
|
||||
rm ~/.config/loki/agents/deep-research/knowledge_lookup.yaml
|
||||
```
|
||||
|
||||
The next run will rebuild from the current contents of this directory.
|
||||
@@ -0,0 +1,49 @@
|
||||
# Research style notes
|
||||
|
||||
These are general principles the `deep-research` agent should keep in
|
||||
mind regardless of topic. Replace this file with your own notes if you
|
||||
want to bias retrieval toward your local context.
|
||||
|
||||
## What "good research" means here
|
||||
|
||||
- **Every factual claim cites a source you actually retrieved.** Never
|
||||
fabricate URLs, page titles, authors, or DOIs.
|
||||
- **Primary sources beat aggregators.** Prefer the original paper, the
|
||||
RFC, the standards body, or the manufacturer over a blog summarizing
|
||||
them.
|
||||
- **Corroboration matters where stakes are high.** If a single source
|
||||
makes a strong claim, look for a second independent source before
|
||||
taking it as established.
|
||||
- **Disagreement is information, not noise.** If two credible sources
|
||||
disagree, report the disagreement and the reasoning on each side.
|
||||
- **Old does not mean wrong.** A 2014 RFC is still authoritative if no
|
||||
newer one has obsoleted it; check before assuming a source is stale.
|
||||
|
||||
## Source-tier heuristics
|
||||
|
||||
The `vet_sources` node uses these rough tiers to weigh credibility.
|
||||
The custom tool `classify_source` (see `tools.sh`) implements this
|
||||
deterministically by hostname / TLD.
|
||||
|
||||
- **HIGH:** government domains (`.gov`, `.mil`), academic institutions
|
||||
(`.edu`, university subdomains), peer-reviewed journals, standards
|
||||
bodies (IETF/RFCs, W3C, ISO, IEEE, NIST), and primary documents from
|
||||
the entities being researched (e.g. a vendor's official spec page).
|
||||
- **PREPRINT:** arXiv, bioRxiv, medRxiv, SSRN. Useful but not yet
|
||||
peer-reviewed; treat numeric claims with extra caution.
|
||||
- **ORGANIZATION:** established nonprofits, standards-adjacent groups,
|
||||
industry consortia. Reliable for their stated mission but may have a
|
||||
perspective.
|
||||
- **UNVERIFIED:** general web pages, blogs, news aggregators, social
|
||||
media. Useful for leads but should not be the only source for a
|
||||
factual claim.
|
||||
|
||||
## Common pitfalls to flag in critique
|
||||
|
||||
- A claim cited only to a PREPRINT or UNVERIFIED source on a numeric
|
||||
or contested point.
|
||||
- A research-plan question that the findings address only obliquely.
|
||||
- "Findings" that paraphrase a single source three times rather than
|
||||
triangulating.
|
||||
- Citation collisions where two sources are listed but turn out to
|
||||
be the same study reported via different aggregators.
|
||||
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fan-out source for context loading.
|
||||
|
||||
Has no logic of its own. Exists so the static `next: [plan, knowledge_lookup]`
|
||||
list on this node fans out into two parallel branches (the LLM planner and
|
||||
the RAG knowledge lookup) as a single super-step. The validator requires
|
||||
declared parallel-branch script outputs, so we emit an empty JSON object
|
||||
explicitly here.
|
||||
"""
|
||||
import json
|
||||
|
||||
|
||||
def main():
|
||||
print(json.dumps({}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Join the per-question map outputs into a single `findings` string.
|
||||
|
||||
The `research_each_question` map writes `question_findings` (an array,
|
||||
one entry per sub-question, in input order). Downstream nodes
|
||||
(`vet_sources`, `critique`, `synthesize`) read `{{findings}}` as a
|
||||
single block, so this script renders the array as a Markdown document
|
||||
with one section per question.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
def load_state():
|
||||
path = os.environ.get("GRAPH_STATE_FILE")
|
||||
if path:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
return json.loads(os.environ.get("GRAPH_STATE", "{}"))
|
||||
|
||||
|
||||
def main():
|
||||
state = load_state()
|
||||
questions = state.get("questions") or []
|
||||
per_question = state.get("question_findings") or []
|
||||
|
||||
sections = []
|
||||
for idx, q in enumerate(questions):
|
||||
body = per_question[idx] if idx < len(per_question) else ""
|
||||
if isinstance(body, dict) or isinstance(body, list):
|
||||
body = json.dumps(body, indent=2)
|
||||
sections.append(f"## {q}\n\n{body}")
|
||||
|
||||
findings = "\n\n".join(sections) if sections else "No findings gathered."
|
||||
print(json.dumps({"findings": findings}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fold a reviewer's free-form feedback back into the research loop.
|
||||
|
||||
Runs when the user answers the approval step with their own text
|
||||
instead of "accept" or "reject". That text (saved by the approval node
|
||||
as `decision`) becomes `research_feedback`, and the graph loops back to
|
||||
`research_each_question` for another informed pass (each sub-question is
|
||||
re-researched in parallel with the new feedback in context). The
|
||||
reflexion counter is reset so the user-driven pass gets a fresh revision
|
||||
budget.
|
||||
|
||||
Routing (`_next`): always research_each_question.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
def load_state():
|
||||
path = os.environ.get("GRAPH_STATE_FILE")
|
||||
if path:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
return json.loads(os.environ.get("GRAPH_STATE", "{}"))
|
||||
|
||||
|
||||
def main():
|
||||
state = load_state()
|
||||
feedback = (state.get("decision") or "").strip()
|
||||
output = {
|
||||
"_next": "research_each_question",
|
||||
"research_attempts": 0,
|
||||
"research_feedback": (
|
||||
"The user reviewed the report and asked for changes. Treat "
|
||||
"this as the top priority for the next pass:\n\n" + feedback
|
||||
),
|
||||
}
|
||||
print(json.dumps(output))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Entry router for deep-research.
|
||||
|
||||
Reads the caller's prompt from state. If it contains a usable research
|
||||
topic, stores it as `topic` and falls through to the static `next`
|
||||
(plan). If the prompt is empty, routes to `ask_topic` so the user can
|
||||
supply one interactively.
|
||||
|
||||
Routing (`_next`):
|
||||
- prompt present -> (no _next; static next: plan)
|
||||
- prompt empty -> ask_topic
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
def load_state():
|
||||
path = os.environ.get("GRAPH_STATE_FILE")
|
||||
if path:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
return json.loads(os.environ.get("GRAPH_STATE", "{}"))
|
||||
|
||||
|
||||
def main():
|
||||
state = load_state()
|
||||
prompt = (state.get("initial_prompt") or "").strip()
|
||||
if prompt:
|
||||
print(json.dumps({"topic": prompt}))
|
||||
else:
|
||||
print(json.dumps({"_next": "ask_topic"}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reflexion gate for deep-research.
|
||||
|
||||
Runs after `critique` has reviewed the current research findings. If the
|
||||
critique's verdict is REVISE and the reflexion budget is not spent,
|
||||
loops back to `research` with the critique attached as
|
||||
`research_feedback`, so the retry is informed rather than a blind
|
||||
re-run. Otherwise it proceeds to `synthesize`.
|
||||
|
||||
Routing (`_next`):
|
||||
- verdict PASS -> synthesize
|
||||
- verdict REVISE, budget remaining -> research_each_question (+ research_feedback)
|
||||
- verdict REVISE, budget spent -> synthesize
|
||||
|
||||
Reflexion is a best-effort quality booster, not a hard gate: once the
|
||||
budget is spent the workflow proceeds anyway, and the human approval
|
||||
step is the final backstop.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
# Automated revision passes allowed. `research` runs at most
|
||||
# MAX_REFLEXION_REVISIONS + 1 times per user pass. Bump to allow more.
|
||||
MAX_REFLEXION_REVISIONS = 2
|
||||
|
||||
|
||||
def load_state():
|
||||
path = os.environ.get("GRAPH_STATE_FILE")
|
||||
if path:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
return json.loads(os.environ.get("GRAPH_STATE", "{}"))
|
||||
|
||||
|
||||
def as_int(value, default=0):
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def parse_verdict(critique):
|
||||
"""Pull PASS/REVISE from the critique's `VERDICT:` line. Defaults to
|
||||
PASS when no verdict line is found, so a malformed critique lets the
|
||||
workflow proceed instead of burning the whole revision budget."""
|
||||
match = re.search(r"VERDICT:\s*([A-Za-z]+)", critique, re.IGNORECASE)
|
||||
if not match:
|
||||
return "PASS"
|
||||
return match.group(1).upper()
|
||||
|
||||
|
||||
def main():
|
||||
state = load_state()
|
||||
critique = state.get("critique") or ""
|
||||
verdict = parse_verdict(critique)
|
||||
attempts = as_int(state.get("research_attempts"))
|
||||
|
||||
if verdict == "REVISE" and attempts < MAX_REFLEXION_REVISIONS:
|
||||
feedback = (
|
||||
"A reviewer judged the previous research pass incomplete. "
|
||||
"Address every point in the critique below:\n\n" + critique
|
||||
)
|
||||
output = {
|
||||
"_next": "research_each_question",
|
||||
"research_attempts": attempts + 1,
|
||||
"research_feedback": feedback,
|
||||
}
|
||||
else:
|
||||
output = {"_next": "synthesize"}
|
||||
|
||||
print(json.dumps(output))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Check that the sources cited in the research report are reachable.
|
||||
|
||||
Scans the final report for URLs and DOIs, probes each with a HEAD
|
||||
request, and writes a `source_check` summary into state so the human
|
||||
reviewer sees broken citations at the approval step.
|
||||
|
||||
Times out per request so a slow source cannot stall the graph.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
DOI_RE = re.compile(r"\b(10\.\d{4,9}/[-._;()/:A-Z0-9]+)", re.IGNORECASE)
|
||||
URL_RE = re.compile(r"https?://[^\s)\]\}\"'>]+")
|
||||
|
||||
|
||||
def load_state():
|
||||
path = os.environ.get("GRAPH_STATE_FILE")
|
||||
if path:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
return json.loads(os.environ.get("GRAPH_STATE", "{}"))
|
||||
|
||||
|
||||
def reachable(url, timeout=5.0):
|
||||
req = urllib.request.Request(url, method="HEAD")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return 200 <= resp.status < 400
|
||||
except urllib.error.HTTPError as e:
|
||||
return 200 <= e.code < 400
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
state = load_state()
|
||||
report = state.get("report") or ""
|
||||
|
||||
urls = sorted({u.rstrip(".,;)") for u in URL_RE.findall(report)})
|
||||
dois = sorted(set(DOI_RE.findall(report)))
|
||||
|
||||
results = []
|
||||
for url in urls:
|
||||
ok = reachable(url)
|
||||
results.append(f" {'OK' if ok else 'UNREACHABLE'} {url}")
|
||||
for doi in dois:
|
||||
url = f"https://doi.org/{doi}"
|
||||
if url in urls:
|
||||
continue
|
||||
ok = reachable(url)
|
||||
results.append(f" {'OK' if ok else 'UNREACHABLE'} DOI {doi} ({url})")
|
||||
|
||||
if not results:
|
||||
summary = "No web sources were cited in the report."
|
||||
else:
|
||||
summary = (
|
||||
f"Source reachability ({len(results)} checked):\n"
|
||||
+ "\n".join(results)
|
||||
)
|
||||
|
||||
print(json.dumps({"source_check": summary}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# @env LLM_OUTPUT=/dev/stdout The output path
|
||||
|
||||
# @cmd Classify the credibility tier of a web source from its URL.
|
||||
# A deterministic check based on the host and top-level domain. Use it
|
||||
# to weigh how much trust to place in a source before relying on it.
|
||||
# @option --url! The full source URL to classify
|
||||
classify_source() {
|
||||
# shellcheck disable=SC2154
|
||||
local url="$argc_url"
|
||||
local host="${url#*://}"
|
||||
host="${host%%/*}"
|
||||
host="${host##*@}"
|
||||
host="${host%%:*}"
|
||||
host="$(printf '%s' "$host" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
local tier
|
||||
case "$host" in
|
||||
'')
|
||||
tier="UNKNOWN - no host could be parsed from the URL" ;;
|
||||
*.gov | *.gov.* | *.mil)
|
||||
tier="HIGH - government source" ;;
|
||||
*.edu | *.edu.* | *.ac.*)
|
||||
tier="HIGH - academic institution" ;;
|
||||
arxiv.org | *.arxiv.org | biorxiv.org | *.biorxiv.org | medrxiv.org | *.medrxiv.org | ssrn.com | *.ssrn.com)
|
||||
tier="PREPRINT - not yet peer reviewed, corroborate before citing" ;;
|
||||
wikipedia.org | *.wikipedia.org)
|
||||
tier="TERTIARY - encyclopedia, good for orientation not citation" ;;
|
||||
*.org | *.org.*)
|
||||
tier="MEDIUM - organization site, check for institutional bias" ;;
|
||||
*)
|
||||
tier="UNVERIFIED - general web source, corroborate before citing" ;;
|
||||
esac
|
||||
|
||||
printf '%s: %s\n' "${host:-<none>}" "$tier" >> "$LLM_OUTPUT"
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
# Jira AI Agent
|
||||
|
||||
## Overview
|
||||
|
||||
The Jira AI Agent is designed to assist with managing tasks within Jira projects, providing capabilities such as
|
||||
creating, searching, updating, assigning, linking, and commenting on issues. Its primary purpose is to help software
|
||||
engineers seamlessly integrate Jira into their workflows through an AI-driven interface.
|
||||
|
||||
## Configuration
|
||||
This agent uses the official [Atlassian MCP Server](https://github.com/atlassian/atlassian-mcp-server). To use it,
|
||||
ensure you have Node.js v18+ installed to run the local MCP proxy (`mcp-remote`).
|
||||
|
||||
The server uses OAuth 2.0 so it will automatically open your browser for you to sign in to your account. No manual
|
||||
configuration is necessary!
|
||||
@@ -1,37 +0,0 @@
|
||||
name: Jira Agent
|
||||
description: An AI agent that can assist with Jira tasks such as creating issues, searching for issues, and updating issues.
|
||||
version: 0.1.0
|
||||
agent_session: temp
|
||||
mcp_servers:
|
||||
- atlassian
|
||||
instructions: |
|
||||
You are a AI agent designed to assist with managing Jira tasks and helping software engineers utilize and integrate
|
||||
Jira into their workflows. You can create, search, update, assign, link, and comment on issues in Jira.
|
||||
|
||||
## Create Issue (MANDATORY when creating a issue)
|
||||
When a user prompts you to create a Jira issue:
|
||||
1. Prompt the user for what Jira project they want the ticket created in
|
||||
2. If the ticket type requires a parent issue:
|
||||
a. Query Jira for potentially relevant parents
|
||||
b. Prompt user for which parent to use, displaying the suggested list of parent issues
|
||||
3. Create the issue with the following format:
|
||||
```markdown
|
||||
**Description:**
|
||||
This section gives context and details about the issue.
|
||||
**User Acceptance Criteria:**
|
||||
# This section provides bullet points that function like a checklist of all the things that must be completed in
|
||||
# order for the issue to be considered done.
|
||||
* Example criteria one
|
||||
* Example criteria two
|
||||
```
|
||||
4. Ask the user if the issue should be assigned to them
|
||||
a. If yes, then assign the user to the newly created issue
|
||||
|
||||
|
||||
Available tools:
|
||||
{{__tools__}}
|
||||
conversation_starters:
|
||||
- What are the latest issues in my Jira project?
|
||||
- Can you create a new Jira issue for me?
|
||||
- What are my open Jira issues?
|
||||
- Can you search for issues with the label "bug" in my Jira project?
|
||||
@@ -0,0 +1,46 @@
|
||||
# report-writer
|
||||
|
||||
A tiny, focused sub-agent that turns a set of research findings into a
|
||||
single coherent final report. Reads only what it is given — does not
|
||||
do independent research, does not access the web, does not invent
|
||||
facts. It exists as a focused tool for orchestrating agents to
|
||||
delegate the writing phase to.
|
||||
|
||||
## Why a separate agent?
|
||||
|
||||
This is an example of the **agent-as-tool** pattern in graph agents.
|
||||
The `deep-research` graph agent's `synthesize` node is an `agent` node
|
||||
that spawns this one (see `assets/agents/deep-research/graph.yaml`).
|
||||
Separating the role has two practical benefits:
|
||||
|
||||
- The orchestrating agent can use a cheap model (or a high-temperature
|
||||
exploratory one) for the research phase, while letting the writing
|
||||
phase use a different (typically lower-temperature, possibly larger)
|
||||
model dedicated to coherent prose.
|
||||
- The writing prompt is owned by this agent's `config.yaml` rather
|
||||
than buried inside another agent's graph. You can polish it
|
||||
independently without touching the research flow.
|
||||
|
||||
## Standalone use
|
||||
|
||||
You can also use this agent directly if you have a set of findings you
|
||||
want polished:
|
||||
|
||||
```sh
|
||||
loki -a report-writer "Topic: X. Findings: <paste findings here>"
|
||||
```
|
||||
|
||||
It will produce a single Markdown report following the rules in its
|
||||
system prompt: executive summary at the top, grouped sections by
|
||||
related sub-questions, every inline citation preserved verbatim, and a
|
||||
final "Open questions / disagreements" section.
|
||||
|
||||
## What it will NOT do
|
||||
|
||||
- Search the web, fetch URLs, query an MCP server, or use any tool.
|
||||
It has no tools configured.
|
||||
- Invent facts beyond what is in the findings you give it.
|
||||
- Strip or rewrite citations.
|
||||
|
||||
These constraints are the point of the agent existing: a writer that
|
||||
the orchestrator can trust to stay in its lane.
|
||||
@@ -0,0 +1,34 @@
|
||||
name: report-writer
|
||||
description: Polishes research findings into a clear, citation-preserving final report
|
||||
version: 1.0.0
|
||||
temperature: 0.2
|
||||
|
||||
instructions: |
|
||||
You are a technical writer. You will be given:
|
||||
- a research topic
|
||||
- a set of findings, organized per sub-question, with inline
|
||||
citations next to each claim
|
||||
- a source-credibility assessment of the cited sources
|
||||
|
||||
Your job is to produce a single, well-organized final report:
|
||||
|
||||
Rules:
|
||||
- Use ONLY the findings provided. Do not introduce facts from
|
||||
your own memory. Do not speculate beyond what the findings
|
||||
support.
|
||||
- Preserve every inline citation. If a sentence in the findings
|
||||
had a URL or DOI, the equivalent sentence in your report must
|
||||
keep the same citation.
|
||||
- Lead with a 2-3 sentence executive summary at the top.
|
||||
- Organize the body so that related sub-questions are grouped,
|
||||
not strictly one section per question. The findings are raw
|
||||
material; the report should read as a single coherent answer
|
||||
to the original topic.
|
||||
- End with a short "Open questions / disagreements" section
|
||||
naming anything the findings flagged as unresolved or
|
||||
contested.
|
||||
|
||||
Output plain Markdown. No metadata, no JSON wrapper.
|
||||
|
||||
conversation_starters:
|
||||
- "Polish these findings into a cited report"
|
||||
@@ -18,16 +18,15 @@ Sisyphus acts as the primary entry point, capable of handling complex tasks by c
|
||||
- 🛠️ **Tool Integration**: Seamlessly uses system tools for building, testing, and file manipulation.
|
||||
|
||||
## Pro-Tip: Use an IDE MCP Server for Improved Performance
|
||||
Many modern IDEs now include MCP servers that let LLMs perform operations within the IDE itself and use IDE tools. Using
|
||||
an IDE's MCP server dramatically improves the performance of coding agents. So if you have an IDE, try adding that MCP
|
||||
server to your config (see the [MCP Server docs](../../../docs/function-calling/MCP-SERVERS.md) to see how to configure
|
||||
them), and modify the agent definition to look like this:
|
||||
Many modern IDEs (JetBrains, VS Code, Cursor, Zed, etc.) expose MCP servers that let LLMs use IDE tools directly. Using
|
||||
one dramatically improves the performance of coding agents. If you have one, add it to your loki config (see the
|
||||
[MCP Server docs](../../../docs/function-calling/MCP-SERVERS.md)) and reference it in this agent's `mcp_servers:` list:
|
||||
|
||||
```yaml
|
||||
# ...
|
||||
|
||||
mcp_servers:
|
||||
- jetbrains
|
||||
- your-ide-mcp-server
|
||||
|
||||
global_tools:
|
||||
- fs_read.sh
|
||||
|
||||
@@ -119,20 +119,21 @@ instructions: |
|
||||
1. todo__init --goal "Add user profiles API endpoint"
|
||||
2. todo__add --task "Explore existing API patterns"
|
||||
3. todo__add --task "Implement profile endpoint"
|
||||
4. todo__add --task "Verify with build/test"
|
||||
5. agent__spawn --agent explore --prompt "Find existing API endpoint patterns, route structures, and controller conventions. Include code snippets."
|
||||
6. agent__spawn --agent explore --prompt "Find existing data models and database query patterns. Include code snippets."
|
||||
7. agent__collect --id <id1>
|
||||
8. agent__collect --id <id2>
|
||||
9. todo__done --id 1
|
||||
10. agent__spawn --agent coder --prompt "<structured prompt using Coder Delegation Format above, including code snippets from explore results>"
|
||||
11. agent__collect --id <coder_id>
|
||||
12. todo__done --id 2
|
||||
13. run_build
|
||||
14. run_tests
|
||||
15. todo__done --id 3
|
||||
4. agent__spawn --agent explore --prompt "Find existing API endpoint patterns, route structures, and controller conventions. Include code snippets."
|
||||
5. agent__spawn --agent explore --prompt "Find existing data models and database query patterns. Include code snippets."
|
||||
6. agent__collect --id <id1>
|
||||
7. agent__collect --id <id2>
|
||||
8. todo__done --id 1
|
||||
9. agent__spawn --agent coder --prompt "<structured prompt using Coder Delegation Format above, including code snippets from explore results>"
|
||||
10. agent__collect --id <coder_id>
|
||||
11. todo__done --id 2
|
||||
```
|
||||
|
||||
Note: the `coder` agent is a graph agent that runs verification (build +
|
||||
tests) and a bounded fix-loop internally. You do NOT need to spawn a
|
||||
separate build/test step. A `CODER_COMPLETE` outcome means build and
|
||||
tests already passed.
|
||||
|
||||
### Example 2: Architecture/design question (explore + oracle in parallel)
|
||||
|
||||
User: "How should I structure the authentication for this app?"
|
||||
@@ -172,6 +173,22 @@ instructions: |
|
||||
10. **Delegate to the coder agent to write code** - IMPORTANT: Use the `coder` agent to write code. Do not try to write code yourself except for trivial changes
|
||||
11. **Always output a summary of changes when finished** - Make it clear to user's that you've completed your tasks
|
||||
|
||||
## Coder Outcomes
|
||||
|
||||
The `coder` agent is a graph agent that runs the implement -> verify_build
|
||||
-> verify_tests -> fix_loop pipeline internally. It always returns one of
|
||||
three sentinel outcomes:
|
||||
|
||||
- `CODER_COMPLETE` - implementation succeeded with build + tests green.
|
||||
Continue with any follow-up todos.
|
||||
- `CODER_REJECTED` - user rejected the plan at the approval gate (only
|
||||
triggered for high-complexity plans). Do NOT re-spawn coder blindly;
|
||||
ask the user what to change first.
|
||||
- `CODER_FAILED` - the fix-loop exhausted its budget without producing
|
||||
green build/tests. The failure output includes the last build and tests
|
||||
output. Surface this to the user; consider spawning `oracle` for
|
||||
diagnosis if the failure is unclear.
|
||||
|
||||
## When to Do It Yourself
|
||||
|
||||
- Simple command execution
|
||||
|
||||
@@ -73,11 +73,11 @@ def to_args:
|
||||
to_entries | .[] |
|
||||
(.key | split("_") | join("-")) as $key |
|
||||
if .value | type == "array" then
|
||||
.value | .[] | "--\($key) \(. | escape_shell_word)"
|
||||
.value | .[] | "--\($key)=\(. | escape_shell_word)"
|
||||
elif .value | type == "boolean" then
|
||||
if .value then "--\($key)" else "" end
|
||||
else
|
||||
"--\($key) \(.value | escape_shell_word)"
|
||||
"--\($key)=\(.value | escape_shell_word)"
|
||||
end;
|
||||
[ to_args ] | join(" ")
|
||||
EOF
|
||||
|
||||
@@ -70,11 +70,11 @@ def to_args:
|
||||
to_entries | .[] |
|
||||
(.key | split("_") | join("-")) as $key |
|
||||
if .value | type == "array" then
|
||||
.value | .[] | "--\($key) \(. | escape_shell_word)"
|
||||
.value | .[] | "--\($key)=\(. | escape_shell_word)"
|
||||
elif .value | type == "boolean" then
|
||||
if .value then "--\($key)" else "" end
|
||||
else
|
||||
"--\($key) \(.value | escape_shell_word)"
|
||||
"--\($key)=\(.value | escape_shell_word)"
|
||||
end;
|
||||
[ to_args ] | join(" ")
|
||||
EOF
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# @meta require-tools jira
|
||||
# @describe Query for jira issues using a Jira Query Language (JQL) query
|
||||
# @option --jql-query! The Jira Query Language query to execute
|
||||
# @env LLM_OUTPUT=/dev/stdout The output path
|
||||
|
||||
main() {
|
||||
jira issue ls -q "$argc_jql_query" --plain >> "$LLM_OUTPUT"
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
---
|
||||
enabled_mcp_servers: atlassian
|
||||
---
|
||||
You are the librarian for the company's Confluence and Jira knowledge bases. Your job is to help users find and retrieve
|
||||
information from these platforms. Use all tools at your disposal to answer user queries.
|
||||
|
||||
Available Tools:
|
||||
{{__tools__}}
|
||||
@@ -17,16 +17,18 @@ agent_session: null # Set a session to use when starting the agent.
|
||||
name: <agent-name> # Name of the agent, used in the UI and logs
|
||||
description: <description> # Description of the agent, used in the UI
|
||||
version: 1 # Version of the agent
|
||||
# Todo System & Auto-Continuation
|
||||
# These settings help smaller models handle multi-step tasks more reliably.
|
||||
# See docs/TODO-SYSTEM.md for detailed documentation.
|
||||
# Auto-Continue (Todo System)
|
||||
# The auto-continue system provides built-in task tracking for improved reliability.
|
||||
# When enabled, the model can create todo lists and the system will automatically
|
||||
# prompt it to continue when incomplete tasks remain.
|
||||
# See the [Todo System documentation](https://github.com/Dark-Alex-17/loki/wiki/TODO-System) for more information
|
||||
auto_continue: false # Enable automatic continuation when incomplete todos remain
|
||||
max_auto_continues: 10 # Maximum number of automatic continuations before stopping
|
||||
inject_todo_instructions: true # Inject the default todo tool usage instructions into the agent's system prompt
|
||||
continuation_prompt: null # Custom prompt used when auto-continuing (optional; uses default if null)
|
||||
# Sub-Agent Spawning System
|
||||
# Enable this agent to spawn and manage child agents in parallel.
|
||||
# See docs/AGENTS.md for detailed documentation.
|
||||
# See https://github.com/Dark-Alex-17/loki/wiki/Agents for detailed documentation.
|
||||
can_spawn_agents: false # Enable the agent to spawn child agents
|
||||
max_concurrent_agents: 4 # Maximum number of agents that can run simultaneously
|
||||
max_agent_depth: 3 # Maximum nesting depth for sub-agents (prevents runaway spawning)
|
||||
|
||||
@@ -27,18 +27,18 @@ sync_models_url: > # URL to sync model changes from
|
||||
https://raw.githubusercontent.com/Dark-Alex-17/loki/refs/heads/main/models.yaml
|
||||
|
||||
# ---- REPL Prompt ----
|
||||
# Custom REPL left/right prompts; see the [REPL Prompt Documentation](./docs/REPL-PROMPT.md) for more information
|
||||
# Custom REPL left/right prompts; see the [REPL Prompt Documentation](https://github.com/Dark-Alex-17/loki/wiki/REPL-Prompt) for more information
|
||||
left_prompt:
|
||||
'{color.red}{model}){color.green}{?session {?agent {agent}>}{session}{?role /}}{!session {?agent {agent}>}}{role}{?rag @{rag}}{color.cyan}{?session )}{!session >}{color.reset} '
|
||||
right_prompt:
|
||||
'{color.purple}{?session {?consume_tokens {consume_tokens}({consume_percent}%)}{!consume_tokens {consume_tokens}}}{color.reset}'
|
||||
|
||||
# ---- Vault ----
|
||||
# See the [Vault documentation](./docs/VAULT.md) for more information on the Loki vault
|
||||
# See the [Vault documentation](https://github.com/Dark-Alex-17/loki/wiki/Vault) for more information on the Loki vault
|
||||
vault_password_file: null # Path to a file containing the password for the Loki vault (cannot be a secret template)
|
||||
|
||||
# ---- Function Calling ----
|
||||
# See the [Tools documentation](./docs/function-calling/TOOLS.md) for more details
|
||||
# See the [Tools documentation](https://github.com/Dark-Alex-17/loki/wiki/Tools) for more details
|
||||
function_calling: true # Enables or disables function calling (Globally).
|
||||
mapping_tools: # Alias for a tool or toolset
|
||||
fs: 'fs_cat,fs_ls,fs_mkdir,fs_rm,fs_write,fs_read,fs_glob,fs_grep'
|
||||
@@ -64,7 +64,6 @@ visible_tools: # Which tools are visible to be compiled (and a
|
||||
# - get_current_weather.py
|
||||
# - get_current_weather.ts
|
||||
- get_current_weather.sh
|
||||
- query_jira_issues.sh
|
||||
# - search_arxiv.sh
|
||||
# - search_wikipedia.sh
|
||||
# - search_wolframalpha.sh
|
||||
@@ -75,14 +74,24 @@ visible_tools: # Which tools are visible to be compiled (and a
|
||||
# - web_search_tavily.sh
|
||||
|
||||
# ---- MCP Servers ----
|
||||
# See the [MCP Servers documentation](./docs/MCP-SERVERS.md) for more details
|
||||
# See the [MCP Servers documentation](https://github.com/Dark-Alex-17/loki/wiki/MCP-Servers) for more details
|
||||
mcp_server_support: true # Enables or disables MCP servers (globally).
|
||||
mapping_mcp_servers: # Alias for an MCP server or set of servers
|
||||
git: github,gitmcp
|
||||
enabled_mcp_servers: null # Which MCP servers to enable by default (e.g. 'github,slack,ddg-search')
|
||||
|
||||
# ---- Auto-Continue (Todo System) ----
|
||||
# The auto-continue system provides built-in task tracking for improved reliability.
|
||||
# When enabled, the model can create todo lists and the system will automatically
|
||||
# prompt it to continue when incomplete tasks remain.
|
||||
# See the [Todo System documentation](https://github.com/Dark-Alex-17/loki/wiki/TODO-System) for more information
|
||||
auto_continue: false # Enable automatic continuation when incomplete todos remain (default: false)
|
||||
max_auto_continues: 10 # Maximum number of automatic continuations before stopping (default: 10)
|
||||
inject_todo_instructions: true # Inject default todo usage instructions into the system prompt (default: true)
|
||||
continuation_prompt: null # Custom prompt used when auto-continuing. If null, uses built-in default
|
||||
|
||||
# ---- Session ----
|
||||
# See the [Session documentation](./docs/SESSIONS.md) for more information
|
||||
# See the [Session documentation](https://github.com/Dark-Alex-17/loki/wiki/Sessions) for more information
|
||||
save_session: null # Controls the persistence of the session. If true, auto save; if false, don't auto-save save; if null, ask the user what to do
|
||||
compression_threshold: 4000 # Compress the session when the token count reaches or exceeds this threshold
|
||||
summarization_prompt: > # The text prompt used for creating a concise summary of session message
|
||||
@@ -91,7 +100,7 @@ summary_context_prompt: > # The text prompt used for including the summar
|
||||
'This is a summary of the chat history as a recap: '
|
||||
|
||||
# ---- RAG ----
|
||||
# See the [RAG Docs](./docs/RAG.md) for more details.
|
||||
# See the [RAG Docs](https://github.com/Dark-Alex-17/loki/wiki/RAG) for more details.
|
||||
rag_embedding_model: null # Specifies the embedding model used for context retrieval
|
||||
rag_reranker_model: null # Specifies the reranker model used for sorting retrieved documents; Loki uses Reciprocal Rank Fusion by default
|
||||
rag_top_k: 5 # Specifies the number of documents to retrieve for answering queries
|
||||
@@ -137,7 +146,7 @@ document_loaders:
|
||||
sh -c "yek $1 --json | jq 'map({ path: .filename, contents: .content })'"
|
||||
|
||||
# ---- Clients ----
|
||||
# See the [Clients documentation](./docs/clients/CLIENTS.md) for more details
|
||||
# See the [Clients documentation](https://github.com/Dark-Alex-17/loki/wiki/Clients) for more details
|
||||
clients:
|
||||
# All clients have the following configuration:
|
||||
# - type: xxxx
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
---
|
||||
# Everything in this section is optional
|
||||
############################################
|
||||
## Everything in this section is optional ##
|
||||
############################################
|
||||
|
||||
# Role Configuration
|
||||
name: <role-name> # The name of the role
|
||||
model: openai:gpt-4o # The model to use for this role
|
||||
temperature: 0.2 # The temperature to use for this role when querying the model
|
||||
@@ -8,5 +12,14 @@ enabled_tools: fs_ls,fs_cat # A comma-separated list of tools to enabl
|
||||
enabled_mcp_servers: github,gitmcp # A comma-separated list of MCP servers to enable for this role
|
||||
prompt: null # A custom prompt to use for this role that will immediately query
|
||||
# the model for output instead of using the instructions below
|
||||
# Auto-Continue (Todo System)
|
||||
# The auto-continue system provides built-in task tracking for improved reliability.
|
||||
# When enabled, the model can create todo lists and the system will automatically
|
||||
# prompt it to continue when incomplete tasks remain.
|
||||
# See the [Todo System documentation](https://github.com/Dark-Alex-17/loki/wiki/TODO-System) for more information
|
||||
auto_continue: false # Enable automatic continuation when incomplete todos remain (default: false)
|
||||
max_auto_continues: 10 # Maximum number of automatic continuations before stopping (default: 10)
|
||||
inject_todo_instructions: true # Inject default todo tool usage instructions into the system prompt (default: true)
|
||||
continuation_prompt: null # Custom prompt used when auto-continuing. If null, uses built-in default
|
||||
---
|
||||
You are an expert at doing things. This is where you write the instructions for the role.
|
||||
|
||||
@@ -1,775 +0,0 @@
|
||||
# Agents
|
||||
|
||||
Agents in Loki follow the same style as OpenAI's GPTs. They consist of 3 parts:
|
||||
|
||||
* [Role](./ROLES.md) - Tell the LLM how to behave
|
||||
* [RAG](./RAG.md) - Pre-built knowledge bases specifically for the agent
|
||||
* [Function Calling](./function-calling/TOOLS.md#tools) ([#2](./function-calling/MCP-SERVERS.md)) - Extends the functionality of the LLM through custom functions it can call
|
||||
|
||||

|
||||
|
||||
Agent configuration files are stored in the `agents` subdirectory of your Loki configuration directory. The location of
|
||||
this directory varies between systems so you can use the following command to locate yours:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'agents_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
If you're looking for more example agents, refer to the [built-in agents](../assets/agents).
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Directory Structure](#directory-structure)
|
||||
- [Metadata](#1-metadata)
|
||||
- [2. Define the Instructions](#2-define-the-instructions)
|
||||
- [Static Instructions](#static-instructions)
|
||||
- [Special Variables](#special-variables)
|
||||
- [User-Defined Variables](#user-defined-variables)
|
||||
- [Dynamic Instructions](#dynamic-instructions)
|
||||
- [Variables](#variables)
|
||||
- [3. Initializing RAG](#3-initializing-rag)
|
||||
- [4. Building Tools for Agents](#4-building-tools-for-agents)
|
||||
- [Limitations](#limitations)
|
||||
- [.env File Support](#env-file-support)
|
||||
- [Python-Based Agent Tools](#python-based-agent-tools)
|
||||
- [Bash-Based Agent Tools](#bash-based-agent-tools)
|
||||
- [TypeScript-Based Agent Tools](#typescript-based-agent-tools)
|
||||
- [5. Conversation Starters](#5-conversation-starters)
|
||||
- [6. Todo System & Auto-Continuation](#6-todo-system--auto-continuation)
|
||||
- [7. Sub-Agent Spawning System](#7-sub-agent-spawning-system)
|
||||
- [Configuration](#spawning-configuration)
|
||||
- [Spawning & Collecting Agents](#spawning--collecting-agents)
|
||||
- [Task Queue with Dependencies](#task-queue-with-dependencies)
|
||||
- [Active Task Dispatch](#active-task-dispatch)
|
||||
- [Output Summarization](#output-summarization)
|
||||
- [Teammate Messaging](#teammate-messaging)
|
||||
- [Runaway Safeguards](#runaway-safeguards)
|
||||
- [8. User Interaction Tools](#8-user-interaction-tools)
|
||||
- [Available Tools](#user-interaction-available-tools)
|
||||
- [Escalation (Sub-Agent to User)](#escalation-sub-agent-to-user)
|
||||
- [9. Auto-Injected Prompts](#9-auto-injected-prompts)
|
||||
- [Built-In Agents](#built-in-agents)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Directory Structure
|
||||
Agent configurations often have the following directory structure:
|
||||
|
||||
```
|
||||
<loki-config-dir>/agents
|
||||
└── my-agent
|
||||
├── config.yaml
|
||||
├── tools.sh
|
||||
or
|
||||
├── tools.py
|
||||
or
|
||||
├── tools.ts
|
||||
```
|
||||
|
||||
This means that agent configurations often are only two files: the agent configuration file (`config.yaml`), and the
|
||||
tool definitions (`agents/my-agent/tools.sh`, `tools.py`, or `tools.ts`).
|
||||
|
||||
To see a full example configuration file, refer to the [example agent config file](../config.agent.example.yaml).
|
||||
|
||||
The best way to understand how an agent is built is to go step by step in the following manner:
|
||||
|
||||
---
|
||||
|
||||
## 1. Metadata
|
||||
Agent configurations have the following settings available to customize each agent:
|
||||
|
||||
```yaml
|
||||
# Model Configuration
|
||||
model: openai:gpt-4o # Specify the LLM to use
|
||||
temperature: null # Set default temperature parameter, range (0, 1)
|
||||
top_p: null # Set default top-p parameter, with a range of (0, 1) or (0, 2), depending on the model
|
||||
# Agent Metadata Configuration
|
||||
agent_session: null # Set a session to use when starting the agent. (e.g. temp, default); defaults to globally set agent_session
|
||||
# Agent Configuration
|
||||
name: <agent-name> # Name of the agent, used in the UI and logs
|
||||
description: <description> # Description of the agent, used in the UI
|
||||
version: 1 # Version of the agent
|
||||
# Function Calling Configuration
|
||||
mcp_servers: # Optional list of MCP servers that the agent utilizes
|
||||
- github # Corresponds to the name of an MCP server in the `<loki-config-dir>/functions/mcp.json` file
|
||||
global_tools: # Optional list of additional global tools to enable for the agent; i.e. not tools specific to the agent
|
||||
- web_search
|
||||
- fs
|
||||
- python
|
||||
# Todo System & Auto-Continuation (see "Todo System & Auto-Continuation" section below)
|
||||
auto_continue: false # Enable automatic continuation when incomplete todos remain
|
||||
max_auto_continues: 10 # Maximum continuation attempts before stopping
|
||||
inject_todo_instructions: true # Inject todo tool instructions into system prompt
|
||||
continuation_prompt: null # Custom prompt for continuations (optional)
|
||||
# Sub-Agent Spawning (see "Sub-Agent Spawning System" section below)
|
||||
can_spawn_agents: false # Enable spawning child agents
|
||||
max_concurrent_agents: 4 # Max simultaneous child agents
|
||||
max_agent_depth: 3 # Max nesting depth (prevents runaway)
|
||||
inject_spawn_instructions: true # Inject spawning instructions into system prompt
|
||||
summarization_model: null # Model for summarizing sub-agent output (e.g. 'openai:gpt-4o-mini')
|
||||
summarization_threshold: 4000 # Char count above which sub-agent output is summarized
|
||||
escalation_timeout: 300 # Seconds sub-agents wait for escalated user input (default: 5 min)
|
||||
```
|
||||
|
||||
As mentioned previously: Agents utilize function calling to extend a model's capabilities. However, agents operate in
|
||||
isolated environment, so in order for an agent to use a tool or MCP server that you have defined globally, you must
|
||||
explicitly state which tools and/or MCP servers the agent uses. Otherwise, it is assumed that the agent doesn't use any
|
||||
tools outside its own custom defined tools.
|
||||
|
||||
And if you don't define a `agents/my-agent/tools.sh`, `agents/my-agent/tools.py`, or `agents/my-agent/tools.ts`, then the agent is really just a
|
||||
`role`.
|
||||
|
||||
You'll notice there are no settings for agent-specific tooling. This is because they are handled separately and
|
||||
automatically. See the [Building Tools for Agents](#4-building-tools-for-agents) section below for more information.
|
||||
|
||||
To see a full example configuration file, refer to the [example agent config file](../config.agent.example.yaml).
|
||||
|
||||
## 2. Define the Instructions
|
||||
At their heart, agents function similarly to roles in that they tell the model how to behave. Agent configuration files
|
||||
have the following settings for the instruction definitions:
|
||||
|
||||
```yaml
|
||||
dynamic_instructions: # Whether to use dynamically generated instructions for the agent; if false, static instructions are used. False by default.
|
||||
instructions: # Static instructions for the LLM; These are ignored if dynamic instructions are used
|
||||
variables: # An array of optional variables that the agent expects and uses
|
||||
```
|
||||
|
||||
### Static Instructions
|
||||
By default, Loki agents use statically defined instructions. Think of them as being identical to the instructions for a
|
||||
[role](./ROLES.md#instructions), because they virtually are.
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
instructions: |
|
||||
You are an AI agent designed to demonstrate agentic capabilities
|
||||
```
|
||||
|
||||
Just like roles, agents support variable interpolation at runtime. There's two types of variables that can be
|
||||
interpolated into the instructions at runtime: special variables (like roles have), and user-defined variables. Just
|
||||
like roles, variables are interpolated into your instructions anywhere Loki sees the `{{variable}}` syntax.
|
||||
|
||||
#### Special Variables
|
||||
The following special variables are provided by Loki at runtime and can be injected into your agent's instructions:
|
||||
|
||||
| Name | Description | Example |
|
||||
|-----------------|---------------------------------------------------------------------|----------------------------|
|
||||
| `__os__` | Operating system name | `linux` |
|
||||
| `__os_family__` | Operating system family | `unix` |
|
||||
| `__arch__` | System architecture | `x86_64` |
|
||||
| `__shell__` | The current user's default shell | `bash` |
|
||||
| `__locale__` | The current user's preferred language and region settings | `en-US` |
|
||||
| `__now__` | Current timestamp in ISO 8601 format | `2025-11-07T10:15:44.268Z` |
|
||||
| `__cwd__` | The current working directory | `/tmp` |
|
||||
| `__tools__` | A list of the enabled tools (global + mcp servers + agent-specific) | |
|
||||
|
||||
#### User-Defined Variables
|
||||
Agents also support user-defined variables that can be interpolated into the instructions, and are made available to any
|
||||
agent-specific tools you define (see [Building Tools for Agents](#4-building-tools-for-agents) for more details on how to
|
||||
create agent-specific tooling).
|
||||
|
||||
The `variables` setting in an agent's config has the following fields:
|
||||
|
||||
| Field | Required | Description |
|
||||
|---------------|----------|----------------------------------------------------------------------------------------------------|
|
||||
| `name` | * | The name of the variable |
|
||||
| `description` | * | The description of the field |
|
||||
| `default` | | A default value for the field. If left undefined, the user will be prompted for a value at runtime |
|
||||
|
||||
These variables can be referenced in both the agent's instructions, and in the tool definitions via `LLM_AGENT_VAR_<name>`.
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
instructions: |
|
||||
You are an agent who answers questions about a user's system.
|
||||
|
||||
<tools>
|
||||
{{__tools__}}
|
||||
</tools>
|
||||
|
||||
<system>
|
||||
os: {{__os__}}
|
||||
os_family: {{__os_family__}}
|
||||
arch: {{__arch__}}
|
||||
shell: {{__shell__}}
|
||||
locale: {{__locale__}}
|
||||
now: {{__now__}}
|
||||
cwd: {{__cwd__}}
|
||||
</system>
|
||||
|
||||
<user>
|
||||
username: {{username}}
|
||||
</user>
|
||||
variables:
|
||||
- name: username # Accessible from the tool definitions via the `LLM_AGENT_VAR_USERNAME` environment variable
|
||||
description: Your user name
|
||||
```
|
||||
|
||||
### Dynamic Instructions
|
||||
Sometimes you may find it useful to dynamically generate instructions on startup. Whether that be via a call to Loki
|
||||
itself to generate them, or by some other means. Loki supports this type of behavior using a special function defined
|
||||
in your `agents/my-agent/tools.py`, `agents/my-agent/tools.sh`, or `agents/my-agent/tools.ts`.
|
||||
|
||||
**Example: Instructions for a JSON-reader agent that specializes on each JSON input it receives**
|
||||
`agents/json-reader/tools.py`:
|
||||
```python
|
||||
import json
|
||||
from pathlib import Path
|
||||
from genson import SchemaBuilder
|
||||
|
||||
def _instructions():
|
||||
"""Generates instructions for the agent dynamically"""
|
||||
value = input("Enter a JSON file path OR paste raw JSON: ").strip()
|
||||
if not value:
|
||||
raise SystemExit("A file path or JSON string is required.")
|
||||
|
||||
p = Path(value)
|
||||
if p.exists() and p.is_file():
|
||||
json_file_path = str(p.resolve())
|
||||
json_text = p.read_text(encoding="utf-8")
|
||||
else:
|
||||
try:
|
||||
json.loads(value)
|
||||
except json.JSONDecodeError as e:
|
||||
raise SystemExit(f"Input is neither a file nor valid JSON.\n{e}")
|
||||
json_file_path = "<provided-inline-json>"
|
||||
json_text = value
|
||||
|
||||
try:
|
||||
data = json.loads(json_text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise SystemExit(f"Provided content is not valid JSON.\n{e}")
|
||||
|
||||
builder = SchemaBuilder()
|
||||
builder.add_object(data)
|
||||
json_schema = builder.to_schema()
|
||||
return f"""
|
||||
You are an AI agent that can view and filter JSON data with jq.
|
||||
|
||||
## Context
|
||||
json_file_path: {json_file_path}
|
||||
json_schema: {json.dumps(json_schema, indent=2)}
|
||||
"""
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
`agents/json-reader/tools.sh`:
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# @meta require-tools jq,genson
|
||||
# @env LLM_OUTPUT=/dev/stdout The output path
|
||||
|
||||
# @cmd Generates instructions for the agent dynamically
|
||||
_instructions() {
|
||||
read -r -p "Enter a JSON file path OR paste raw JSON: " value
|
||||
|
||||
if [[ -z "${value}" ]]; then
|
||||
echo "A file path or JSON string is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
json_file_path=""
|
||||
inline_temp=""
|
||||
cleanup() {
|
||||
[[ -n "${inline_temp:-}" && -f "${inline_temp}" ]] && rm -f "${inline_temp}"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
if [[ -f "${value}" ]]; then
|
||||
json_file_path="$(realpath "${value}")"
|
||||
if ! jq empty "${json_file_path}" >/dev/null 2>&1; then
|
||||
echo "Error: File does not contain valid JSON: ${json_file_path}" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
inline_temp="$(mktemp)"
|
||||
printf "%s" "${value}" > "${inline_temp}"
|
||||
if ! jq empty "${inline_temp}" >/dev/null 2>&1; then
|
||||
echo "Error: Input is neither a file nor valid JSON." >&2
|
||||
exit 1
|
||||
fi
|
||||
json_file_path="<provided-inline-json>"
|
||||
fi
|
||||
|
||||
source_file="${json_file_path}"
|
||||
if [[ "${json_file_path}" == "<provided-inline-json>" ]]; then
|
||||
source_file="${inline_temp}"
|
||||
fi
|
||||
|
||||
json_schema="$(genson < "${source_file}" | jq -c '.')"
|
||||
cat <<EOF >> "$LLM_OUTPUT"
|
||||
You are an AI agent that can view and filter JSON data with jq.
|
||||
|
||||
## Context
|
||||
json_file_path: ${json_file_path}
|
||||
json_schema: ${json_schema}
|
||||
EOF
|
||||
}
|
||||
```
|
||||
|
||||
For more information on how to create custom tools for your agent and the structure of the `agent/my-agent/tools.sh`,
|
||||
`agent/my-agent/tools.py`, or `agent/my-agent/tools.ts` files, refer to the [Building Tools for Agents](#4-building-tools-for-agents) section below.
|
||||
|
||||
#### Variables
|
||||
All the same variable interpolations supported by static instructions is also supported by dynamic instructions. For
|
||||
more information on what variables are available and how to use them, refer to the [Special Variables](#special-variables)
|
||||
and [User-Defined Variables](#user-defined-variables) sections above.
|
||||
|
||||
## 3. Initializing RAG
|
||||
Each agent you create also has a dedicated knowledge base that adds additional context to your queries and helps the LLM
|
||||
answer queries effectively. The documents to load into RAG are defined in the `documents` array of your agent
|
||||
configuration file:
|
||||
|
||||
```yaml
|
||||
documents:
|
||||
- https://www.ohdsi.org/data-standardization/
|
||||
- https://github.com/OHDSI/Vocabulary-v5.0/wiki/**
|
||||
- OMOPCDM_ddl.sql # Relative path to agent (i.e. file lives at '<loki-config-dir>/agents/my-agent/OMOPCDM_ddl.sql')
|
||||
```
|
||||
|
||||
These documents use the same syntax as those you'd define when constructing RAG normally. To see all the available types
|
||||
of documents that Loki supports and how to use custom document loaders, refer to the [RAG documentation](./RAG.md#supported-document-sources).
|
||||
|
||||
Anytime your agent starts up, it will automatically be using the RAG you've defined here.
|
||||
|
||||
## 4. Building Tools for Agents
|
||||
Building tools for agents is virtually identical to building custom tools, with one slight difference: instead of
|
||||
defining a single function that gets executed at runtime (e.g. `main` for bash tools and `run` for Python tools), agent
|
||||
tools define a number of *subcommands*.
|
||||
|
||||
### Limitations
|
||||
You can only utilize one of: a bash-based `<loki-config-dir>/agents/my-agent/tools.sh`, a Python-based
|
||||
`<loki-config-dir>/agents/my-agent/tools.py`, or a TypeScript-based `<loki-config-dir>/agents/my-agent/tools.ts`.
|
||||
However, if it's easier to achieve a task in one language vs the other,
|
||||
you're free to define other scripts in your agent's configuration directory and reference them from the main
|
||||
tools file. **Any scripts *not* named `tools.{py,sh,ts}` will not be picked up by Loki's compiler**, meaning they
|
||||
can be used like any other set of scripts.
|
||||
|
||||
It's important to keep in mind the following:
|
||||
|
||||
* **Do not give agents the same name as an executable**. Loki compiles the tools for each agent into a binary that it
|
||||
temporarily places on your path during execution. If you have a binary with the same name as your agent, then your
|
||||
shell may execute the existing binary instead of your agent's tools
|
||||
* **`LLM_ROOT_DIR` points to the agent's configuration directory**. This is where agents differ slightly from normal
|
||||
tools: The `LLM_ROOT_DIR` environment variable does *not* point to the `functions/tools` directory like it does in
|
||||
global tools. Instead, it points to the agent's configuration directory, making it easier to source scripts and other
|
||||
miscellaneous files
|
||||
|
||||
### .env File Support
|
||||
When Loki loads an agent, it will also search the agent's configuration directory for a `.env` file. If found, all
|
||||
environment variables defined in the file will be made available to the agent's tools.
|
||||
|
||||
### Python-Based Agent Tools
|
||||
Python-based tools are defined exactly the same as they are for custom tool definitions. The only difference is that
|
||||
instead of a single `run` function, you define as many as you like with whatever arguments you like.
|
||||
|
||||
**Example:**
|
||||
`agents/my-agent/tools.py`
|
||||
```python
|
||||
import urllib.request
|
||||
|
||||
def get_ip_info():
|
||||
"""
|
||||
Get your IP information
|
||||
"""
|
||||
with urllib.request.urlopen("https://httpbin.org/ip") as response:
|
||||
data = response.read()
|
||||
return data.decode('utf-8')
|
||||
|
||||
def get_ip_address_from_aws():
|
||||
"""
|
||||
Find your public IP address using AWS
|
||||
"""
|
||||
with urllib.request.urlopen("https://checkip.amazonaws.com") as response:
|
||||
data = response.read()
|
||||
return data.decode('utf-8')
|
||||
```
|
||||
|
||||
Loki automatically compiles these as separate functions for the LLM to call. No extra work is needed. Just make sure you
|
||||
follow all the same steps to define each function as you would when creating custom Python tools.
|
||||
|
||||
For more information on how to build tools in Python, refer to the [custom Python tools documentation](./function-calling/CUSTOM-TOOLS.md#custom-python-based-tools)
|
||||
|
||||
### Bash-Based Agent Tools
|
||||
Bash-based agent tools are virtually identical to custom bash tools, with only one difference. Instead of defining a
|
||||
single entrypoint via the `main` function, you actually define as many subcommands as you like.
|
||||
|
||||
**Example:**
|
||||
`agents/my-agent/tools.sh`
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# @env LLM_OUTPUT=/dev/stdout The output path
|
||||
# @describe Discover network information about your computer and its place in the internet
|
||||
|
||||
# Use the `@cmd` annotation to define subcommands for your script.
|
||||
# @cmd Get your IP information
|
||||
get_ip_info() {
|
||||
curl -fsSL https://httpbin.org/ip >> "$LLM_OUTPUT"
|
||||
}
|
||||
|
||||
# @cmd Find your public IP address using AWS
|
||||
get_ip_address_from_aws() {
|
||||
curl -fsSL https://checkip.amazonaws.com >> "$LLM_OUTPUT"
|
||||
}
|
||||
```
|
||||
To compile the script so it's executable and testable:
|
||||
```bash
|
||||
$ loki --build-tools
|
||||
```
|
||||
|
||||
Then you can execute your script (assuming your current working directory is `agents/my-agent`):
|
||||
```bash
|
||||
$ ./tools.sh get_ip_info
|
||||
$ ./tools.sh get_ip_address_from_aws
|
||||
```
|
||||
|
||||
All other special annotations (`@env`, `@arg`, `@option` `@flags`) apply to subcommands as well, so be sure to follow
|
||||
the same syntax ad formatting as is used to create custom bash tools globally.
|
||||
|
||||
For more information on how to write, [build and test](function-calling/CUSTOM-BASH-TOOLS.md#execute-and-test-your-bash-tools) tools in bash, refer to the
|
||||
[custom bash tools documentation](function-calling/CUSTOM-BASH-TOOLS.md).
|
||||
|
||||
### TypeScript-Based Agent Tools
|
||||
TypeScript-based agent tools work exactly the same as TypeScript global tools. Instead of a single `run` function,
|
||||
you define as many exported functions as you like. Non-exported functions are private helpers and are invisible to the
|
||||
LLM.
|
||||
|
||||
**Example:**
|
||||
`agents/my-agent/tools.ts`
|
||||
```typescript
|
||||
/**
|
||||
* Get your IP information
|
||||
*/
|
||||
export async function get_ip_info(): Promise<string> {
|
||||
const resp = await fetch("https://httpbin.org/ip");
|
||||
return await resp.text();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find your public IP address using AWS
|
||||
*/
|
||||
export async function get_ip_address_from_aws(): Promise<string> {
|
||||
const resp = await fetch("https://checkip.amazonaws.com");
|
||||
return await resp.text();
|
||||
}
|
||||
|
||||
// Non-exported helper — invisible to the LLM
|
||||
function formatResponse(data: string): string {
|
||||
return data.trim();
|
||||
}
|
||||
```
|
||||
|
||||
Loki automatically compiles each exported function as a separate tool for the LLM to call. Just make sure you
|
||||
follow the same JSDoc and parameter conventions as you would when creating custom TypeScript tools.
|
||||
|
||||
TypeScript agent tools also support dynamic instructions via an exported `_instructions()` function:
|
||||
|
||||
```typescript
|
||||
import { readFileSync } from "fs";
|
||||
|
||||
/**
|
||||
* Generates instructions for the agent dynamically
|
||||
*/
|
||||
export function _instructions(): string {
|
||||
const schema = readFileSync("schema.json", "utf-8");
|
||||
return `You are an AI agent that works with the following schema:\n${schema}`;
|
||||
}
|
||||
```
|
||||
|
||||
For more information on how to build tools in TypeScript, refer to the [custom TypeScript tools documentation](function-calling/CUSTOM-TOOLS.md#custom-typescript-based-tools).
|
||||
|
||||
## 5. Conversation Starters
|
||||
It's often helpful to also have some conversation starters so users know what kinds of things the agent is capable of
|
||||
doing. These are available in the REPL via the `.starter` command and are selectable.
|
||||
|
||||
They are defined using the `conversation_starters` setting in your agent's configuration file:
|
||||
|
||||
**Example:**
|
||||
`agents/my-agent/config.yaml`:
|
||||
```yaml
|
||||
conversation_starters:
|
||||
- What is my username?
|
||||
- What is my current shell?
|
||||
- What is my ip?
|
||||
- How much disk space is left on my PC??
|
||||
- How to create an agent?
|
||||
```
|
||||
|
||||

|
||||
|
||||
## 6. Todo System & Auto-Continuation
|
||||
|
||||
Loki includes a built-in task tracking system designed to improve the reliability of agents, especially when using
|
||||
smaller language models. The Todo System helps models:
|
||||
|
||||
- Break complex tasks into manageable steps
|
||||
- Track progress through multi-step workflows
|
||||
- Automatically continue work until all tasks are complete
|
||||
|
||||
### Quick Configuration
|
||||
|
||||
```yaml
|
||||
# agents/my-agent/config.yaml
|
||||
auto_continue: true # Enable auto-continuation
|
||||
max_auto_continues: 10 # Max continuation attempts
|
||||
inject_todo_instructions: true # Include the default todo instructions into prompt
|
||||
```
|
||||
|
||||
### How It Works
|
||||
|
||||
1. When `inject_todo_instructions` is enabled, agents receive instructions on using five built-in tools:
|
||||
- `todo__init`: Initialize a todo list with a goal
|
||||
- `todo__add`: Add a task to the list
|
||||
- `todo__done`: Mark a task complete
|
||||
- `todo__list`: View current todo state
|
||||
- `todo__clear`: Clear the entire todo list and reset the goal
|
||||
|
||||
These instructions are a reasonable default that detail how to use Loki's To-Do System. If you wish,
|
||||
you can disable the injection of the default instructions and specify your own instructions for how
|
||||
to use the To-Do System into your main `instructions` for the agent.
|
||||
|
||||
2. When `auto_continue` is enabled and the model stops with incomplete tasks, Loki automatically sends a
|
||||
continuation prompt with the current todo state, nudging the model to continue working.
|
||||
|
||||
3. This continues until all tasks are done or `max_auto_continues` is reached.
|
||||
|
||||
### When to Use
|
||||
|
||||
- Multistep tasks where the model might lose track
|
||||
- Smaller models that need more structure
|
||||
- Workflows requiring guaranteed completion of all steps
|
||||
|
||||
For complete documentation including all configuration options, tool details, and best practices, see the
|
||||
[Todo System Guide](./TODO-SYSTEM.md).
|
||||
|
||||
## 7. Sub-Agent Spawning System
|
||||
|
||||
Loki agents can spawn and manage child agents that run **in parallel** as background tasks inside the same process.
|
||||
This enables orchestrator-style agents that delegate specialized work to other agents, similar to how tools like
|
||||
Claude Code or OpenCode handle complex multi-step tasks.
|
||||
|
||||
For a working example of an orchestrator agent that uses sub-agent spawning, see the built-in
|
||||
[sisyphus](../assets/agents/sisyphus) agent. For an example of the teammate messaging pattern with parallel sub-agents,
|
||||
see the [code-reviewer](../assets/agents/code-reviewer) agent.
|
||||
|
||||
### Spawning Configuration
|
||||
|
||||
| Setting | Type | Default | Description |
|
||||
|-----------------------------|---------|---------------|--------------------------------------------------------------------------------|
|
||||
| `can_spawn_agents` | boolean | `false` | Enable this agent to spawn child agents |
|
||||
| `max_concurrent_agents` | integer | `4` | Maximum number of child agents that can run simultaneously |
|
||||
| `max_agent_depth` | integer | `3` | Maximum nesting depth for sub-agents (prevents runaway spawning chains) |
|
||||
| `inject_spawn_instructions` | boolean | `true` | Inject the default spawning instructions into the agent's system prompt |
|
||||
| `summarization_model` | string | current model | Model to use for summarizing long sub-agent output (e.g. `openai:gpt-4o-mini`) |
|
||||
| `summarization_threshold` | integer | `4000` | Character count above which sub-agent output is summarized before returning |
|
||||
| `escalation_timeout` | integer | `300` | Seconds a sub-agent waits for an escalated user interaction response |
|
||||
|
||||
**Example configuration:**
|
||||
```yaml
|
||||
# agents/my-orchestrator/config.yaml
|
||||
can_spawn_agents: true
|
||||
max_concurrent_agents: 6
|
||||
max_agent_depth: 2
|
||||
inject_spawn_instructions: true
|
||||
summarization_model: openai:gpt-4o-mini
|
||||
summarization_threshold: 3000
|
||||
escalation_timeout: 600
|
||||
```
|
||||
|
||||
### Spawning & Collecting Agents
|
||||
|
||||
When `can_spawn_agents` is enabled, the agent receives tools for spawning and managing child agents:
|
||||
|
||||
| Tool | Description |
|
||||
|------------------|-------------------------------------------------------------------------|
|
||||
| `agent__spawn` | Spawn a child agent in the background. Returns an agent ID immediately. |
|
||||
| `agent__check` | Non-blocking check: is the agent done? Returns `PENDING` or the result. |
|
||||
| `agent__collect` | Blocking wait: wait for an agent to finish, return its output. |
|
||||
| `agent__list` | List all spawned agents and their status. |
|
||||
| `agent__cancel` | Cancel a running agent by ID. |
|
||||
|
||||
The core pattern is **Spawn -> Continue -> Collect**:
|
||||
|
||||
```
|
||||
# 1. Spawn agents in parallel (returns IDs immediately)
|
||||
agent__spawn --agent explore --prompt "Find auth middleware patterns in src/"
|
||||
agent__spawn --agent explore --prompt "Find error handling patterns in src/"
|
||||
|
||||
# 2. Continue your own work while they run
|
||||
|
||||
# 3. Check if done (non-blocking)
|
||||
agent__check --id agent_explore_a1b2c3d4
|
||||
|
||||
# 4. Collect results when ready (blocking)
|
||||
agent__collect --id agent_explore_a1b2c3d4
|
||||
agent__collect --id agent_explore_e5f6g7h8
|
||||
```
|
||||
|
||||
Any agent defined in your `<loki-config-dir>/agents/` directory can be spawned as a child. Child agents:
|
||||
- Run in a fully isolated environment (separate session, config, and tools)
|
||||
- Have their output suppressed from the terminal (no spinner, no tool call logging)
|
||||
- Return their accumulated output to the parent when collected
|
||||
|
||||
### Task Queue with Dependencies
|
||||
|
||||
For complex workflows where tasks have ordering requirements, the spawning system includes a dependency-aware
|
||||
task queue:
|
||||
|
||||
| Tool | Description |
|
||||
|------------------------|-----------------------------------------------------------------------------|
|
||||
| `agent__task_create` | Create a task with optional dependencies and auto-dispatch agent. |
|
||||
| `agent__task_list` | List all tasks with their status, dependencies, and assignments. |
|
||||
| `agent__task_complete` | Mark a task done. Returns newly unblocked tasks and auto-dispatches agents. |
|
||||
| `agent__task_fail` | Mark a task as failed. Dependents remain blocked. |
|
||||
|
||||
```
|
||||
# Create tasks with dependency ordering
|
||||
agent__task_create --subject "Explore existing patterns"
|
||||
agent__task_create --subject "Implement feature" --blocked_by ["task_1"]
|
||||
agent__task_create --subject "Write tests" --blocked_by ["task_2"]
|
||||
|
||||
# Mark tasks complete to unblock dependents
|
||||
agent__task_complete --task_id task_1
|
||||
```
|
||||
|
||||
### Active Task Dispatch
|
||||
|
||||
Tasks can optionally specify an agent to auto-spawn when the task becomes runnable:
|
||||
|
||||
```
|
||||
agent__task_create \
|
||||
--subject "Implement the auth module" \
|
||||
--blocked_by ["task_1"] \
|
||||
--agent coder \
|
||||
--prompt "Implement auth module based on patterns found in task_1"
|
||||
```
|
||||
|
||||
When `task_1` completes and the dependent task becomes unblocked, an agent is automatically spawned with the
|
||||
specified prompt. No manual intervention needed. This enables fully automated multi-step pipelines.
|
||||
|
||||
### Output Summarization
|
||||
|
||||
When a child agent produces long output, it can be automatically summarized before returning to the parent.
|
||||
This keeps parent context windows manageable.
|
||||
|
||||
- If the output exceeds `summarization_threshold` characters (default: 4000), it is sent through an LLM
|
||||
summarization pass
|
||||
- The `summarization_model` setting lets you use a cheaper/faster model for summarization (e.g. `gpt-4o-mini`)
|
||||
- If `summarization_model` is not set, the parent's current model is used
|
||||
- The summarization preserves all actionable information: code snippets, file paths, error messages, and
|
||||
concrete recommendations
|
||||
|
||||
### Teammate Messaging
|
||||
|
||||
All agents (including children) automatically receive tools for **direct sibling-to-sibling messaging**:
|
||||
|
||||
| Tool | Description |
|
||||
|-----------------------|-----------------------------------------------------|
|
||||
| `agent__send_message` | Send a text message to another agent's inbox by ID. |
|
||||
| `agent__check_inbox` | Drain all pending messages from your inbox. |
|
||||
|
||||
This enables coordination patterns where child agents share cross-cutting findings:
|
||||
|
||||
```
|
||||
# Agent A discovers something relevant to Agent B
|
||||
agent__send_message --id agent_reviewer_b1c2d3e4 --message "Found a security issue in auth.rs line 42"
|
||||
|
||||
# Agent B checks inbox before finalizing
|
||||
agent__check_inbox
|
||||
```
|
||||
|
||||
Messages are routed through the parent's supervisor. A parent can message its children, and children can message
|
||||
their siblings. For a working example of the teammate pattern, see the built-in
|
||||
[code-reviewer](../assets/agents/code-reviewer) agent, which spawns file-specific reviewers that share
|
||||
cross-cutting findings with each other.
|
||||
|
||||
### Runaway Safeguards
|
||||
|
||||
The spawning system includes built-in safeguards to prevent runaway agent chains:
|
||||
|
||||
- **`max_concurrent_agents`:** Caps how many agents can run at once (default: 4). Spawn attempts beyond this
|
||||
limit return an error asking the agent to wait or cancel existing agents.
|
||||
- **`max_agent_depth`:** Caps nesting depth (default: 3). A child agent spawning its own child increments the
|
||||
depth counter. Attempts beyond the limit are rejected.
|
||||
- **`can_spawn_agents`:** Only agents with this flag set to `true` can spawn children. By default, spawning is
|
||||
disabled. This means child agents cannot spawn their own children unless you explicitly create them with
|
||||
`can_spawn_agents: true` in their config.
|
||||
|
||||
## 8. User Interaction Tools
|
||||
|
||||
Loki includes built-in tools for agents (and the REPL) to interactively prompt the user for input. These tools
|
||||
are **always available**. No configuration needed. They are automatically injected into every agent and into
|
||||
REPL mode when function calling is enabled.
|
||||
|
||||
### User Interaction Available Tools
|
||||
|
||||
| Tool | Description | Returns |
|
||||
|------------------|-----------------------------------------|----------------------------------|
|
||||
| `user__ask` | Present a single-select list of options | The selected option string |
|
||||
| `user__confirm` | Ask a yes/no question | `"yes"` or `"no"` |
|
||||
| `user__input` | Request free-form text input | The text entered by the user |
|
||||
| `user__checkbox` | Present a multi-select checkbox list | Array of selected option strings |
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- `user__ask`: `--question "..." --options ["Option A", "Option B", "Option C"]`
|
||||
- `user__confirm`: `--question "..."`
|
||||
- `user__input`: `--question "..."`
|
||||
- `user__checkbox`: `--question "..." --options ["Option A", "Option B", "Option C"]`
|
||||
|
||||
At the top level (depth 0), these tools render interactive terminal prompts directly using arrow-key navigation,
|
||||
checkboxes, and text input fields.
|
||||
|
||||
### Escalation (Sub-Agent to User)
|
||||
|
||||
When a **child agent** (depth > 0) calls a `user__*` tool, it cannot prompt the terminal directly. Instead,
|
||||
the request is **automatically escalated** to the root agent:
|
||||
|
||||
1. The child agent calls `user__ask(...)` and **blocks**, waiting for a reply
|
||||
2. The root agent sees a `pending_escalations` notification in its next tool results
|
||||
3. The root agent either answers from context or prompts the user itself, then calls
|
||||
`agent__reply_escalation` to unblock the child
|
||||
4. The child receives the reply and continues
|
||||
|
||||
The escalation timeout is configurable via `escalation_timeout` in the agent's `config.yaml` (default: 300
|
||||
seconds / 5 minutes). If the timeout expires, the child receives a fallback message asking it to use its
|
||||
best judgment.
|
||||
|
||||
| Tool | Description |
|
||||
|---------------------------|--------------------------------------------------------------------------|
|
||||
| `agent__reply_escalation` | Reply to a pending child escalation, unblocking the waiting child agent. |
|
||||
|
||||
This tool is automatically available to any agent with `can_spawn_agents: true`.
|
||||
|
||||
## 9. Auto-Injected Prompts
|
||||
|
||||
Loki automatically appends usage instructions to your agent's system prompt for each enabled built-in system.
|
||||
These instructions are injected into both **static and dynamic instructions** after your own instructions,
|
||||
ensuring agents always know how to use their available tools.
|
||||
|
||||
| System | Injected When | Toggle |
|
||||
|--------------------|----------------------------------------------------------------|-----------------------------|
|
||||
| Todo tools | `auto_continue: true` AND `inject_todo_instructions: true` | `inject_todo_instructions` |
|
||||
| Spawning tools | `can_spawn_agents: true` AND `inject_spawn_instructions: true` | `inject_spawn_instructions` |
|
||||
| Teammate messaging | Always (all agents) | None (always injected) |
|
||||
| User interaction | Always (all agents) | None (always injected) |
|
||||
|
||||
If you prefer to write your own instructions for a system, set the corresponding `inject_*` flag to `false`
|
||||
and include your custom instructions in the agent's `instructions` field. The built-in tools will still be
|
||||
available; only the auto-injected prompt text is suppressed.
|
||||
|
||||
## Built-In Agents
|
||||
Loki comes packaged with some useful built-in agents:
|
||||
|
||||
* `coder`: An agent to assist you with all your coding tasks
|
||||
* `code-reviewer`: A [CodeRabbit](https://coderabbit.ai)-style code reviewer that spawns per-file reviewers using the teammate messaging pattern
|
||||
* `demo`: An example agent to use for reference when learning to create your own agents
|
||||
* `explore`: An agent designed to help you explore and understand your codebase
|
||||
* `file-reviewer`: An agent designed to perform code-review on a single file (used by the `code-reviewer` agent)
|
||||
* `jira-helper`: An agent that assists you with all your Jira-related tasks
|
||||
* `oracle`: An agent for high-level architecture, design decisions, and complex debugging
|
||||
* `sisyphus`: A powerhouse orchestrator agent for writing complex code and acting as a natural language interface for your codebase (similar to ClaudeCode, Gemini CLI, Codex, or OpenCode). Uses sub-agent spawning to delegate to `explore`, `coder`, and `oracle`.
|
||||
* `sql`: A universal SQL agent that enables you to talk to any relational database in natural language
|
||||
@@ -1,211 +0,0 @@
|
||||
# AIChat to Loki Migration Guide
|
||||
Loki originally started as a fork of AIChat but has since evolved into its own separate project with separate goals.
|
||||
|
||||
As a result, there's some changes you'll need to make to your AIChat configuration to be able to use Loki.
|
||||
|
||||
Be sure you've run `loki` at least once so that the Loki configuration directory and subdirectories exist and is
|
||||
populated with the built-in defaults.
|
||||
|
||||
## Global Configuration File
|
||||
You should be able to copy/paste your AIChat configuration file into your Loki configuration directory. Since the
|
||||
location of the Loki configuration directory varies between systems, you can use the following command to locate your
|
||||
config directory:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'config_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
Then, you'll need to make the following changes:
|
||||
|
||||
* `function_calling` -> `function_calling_support`
|
||||
* `use_tools` -> `enabled_tools`
|
||||
* `agent_prelude` -> `agent_session`
|
||||
* `compress_threshold` -> `compression_threshold`
|
||||
* `summarize_prompt` -> `summarization_prompt`
|
||||
* `summary_prompt` -> `summary_context_prompt`
|
||||
|
||||
## Roles
|
||||
Locate your `roles` directory using the following command:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'roles_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
Update any roles that have `use_tools` to `enabled_tools`.
|
||||
|
||||
## Sessions
|
||||
Locate your `sessions` directory using the following command:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'sessions_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
Update the following settings:
|
||||
* `use_tools` -> `enabled_tools`
|
||||
* `compress_threshold` -> `compression_threshold`
|
||||
* `summarize_prompt` -> `summarization_prompt`
|
||||
* `summary_prompt` -> `summary_context_prompt`
|
||||
|
||||
---
|
||||
|
||||
# LLM Functions Changes
|
||||
Probably the most significant difference between AIChat and Loki is how tools are handled. So if you cloned the
|
||||
[llm-functions](https://github.com/sigoden/llm-functions) repo, you'll need to make the following changes.
|
||||
|
||||
**Note: JavaScript functions are not supported in Loki.**
|
||||
|
||||
The following guide assumes you're using the `llm-functions` repository as your base for custom functions, and thus
|
||||
follows that directory structure.
|
||||
|
||||
## Agents
|
||||
Agents are now all handled in one place: the `agents` directory (`<loki-config-dir>/agents`):
|
||||
|
||||
```shell
|
||||
loki --info | grep 'agents_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
And instead of separate `index.yaml` and `config.yaml` files, they're now both in a single `config.yaml` file.
|
||||
|
||||
So now for all of your agents, copy all the contents of those directories to the corresponding directory in the Loki
|
||||
`agents` directory. Then make the following changes:
|
||||
|
||||
* Copy the contents of your `<aichat-config-dir>/functions/agents` directory into `<loki-config-dir/agents`
|
||||
* Merge `index.yaml` into `config.yaml`
|
||||
* If you never created a custom `config.yaml` file, then simply rename `index.yaml` to `config.yaml`
|
||||
* If you've defined an `agent_prelude`, rename that field to `agent_session`
|
||||
* Convert all JavaScript tools to either Python or Bash
|
||||
* For Bash `tools.sh`: Remove the following line:
|
||||
```bash
|
||||
eval "$(argc --argc-eval "$0" "$@")"
|
||||
```
|
||||
* Any `tools.txt` files you have that define what global functions the agent uses is now replaced by the `global_tools`
|
||||
field in the agent's `config.yaml`. So for example: If your `tools.txt` looks like this:
|
||||
```text
|
||||
fs_mkdir.sh
|
||||
fs_ls.sh
|
||||
fs_patch.sh
|
||||
fs_cat.sh
|
||||
```
|
||||
then you need to add the following to your agent's `config.yaml`:
|
||||
```yaml
|
||||
global_tools:
|
||||
- fs_mkdir.sh
|
||||
- fs_ls.sh
|
||||
- fs_patch.sh
|
||||
- fs_cat.sh
|
||||
```
|
||||
* If you have any bash `tools.sh` that depend on the utility scripts in the `llm-functions` repository, they've been
|
||||
replaced by built-in utility scripts. So use the following to replace any matching lines in your `tools.sh` files:
|
||||
```bash
|
||||
##################
|
||||
## Scripts file ##
|
||||
##################
|
||||
ROOT_DIR="${LLM_ROOT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
|
||||
# replace with
|
||||
source "$LLM_PROMPT_UTILS_FILE"
|
||||
|
||||
#######################
|
||||
## guard_path script ##
|
||||
#######################
|
||||
"$ROOT_DIR/utils/guard_path.sh"
|
||||
# replace with
|
||||
guard_path
|
||||
|
||||
############################
|
||||
## guard_operation script ##
|
||||
############################
|
||||
"$ROOT_DIR/utils/guard_operation.sh"
|
||||
# replace with
|
||||
guard_operation
|
||||
|
||||
######################
|
||||
## patch.awk script ##
|
||||
######################
|
||||
awk -f "$ROOT_DIR/utils/patch.awk"
|
||||
# replace with
|
||||
patch_file
|
||||
```
|
||||
|
||||
When you're done with this migration, you should have the following:
|
||||
|
||||
* No more `functions/agents` directory
|
||||
* No `functions/agents.txt` file (Loki assumes that if the agent directory exists, it is loadable)
|
||||
* No `<loki-config-dir>/agents/<agent-name>/tools.txt`
|
||||
* No `<loki-config-dir>/agents/<agent-name>/index.yaml`
|
||||
|
||||
## Functions
|
||||
Loki consolidates much of the `llm-functions` repo functionality into one binary. So this means
|
||||
|
||||
* There's no need to have `argc` installed anymore
|
||||
* No separate repository to manage
|
||||
* No `tools.txt`
|
||||
* No `functions.json`
|
||||
* No `functions/mcp` directory at all
|
||||
* No `functions/scripts`
|
||||
|
||||
Here's how to migrate your functions over to Loki from the `llm-functions` repository.
|
||||
|
||||
* Copy your AIChat `<aichat-config-dir>/functions` directory into your Loki config directory
|
||||
* Delete the following files and directories from your `<loki-config-dir>/functions` directory:
|
||||
* `scripts/`
|
||||
* `agents.txt`
|
||||
* `functions.json`
|
||||
* `Argcfile.sh`
|
||||
* `README.md` (irrelevant now)
|
||||
* `LICENSE` (irrelevant now)
|
||||
* `utils/guard_operation.sh`
|
||||
* `utils/guard_path.sh`
|
||||
* `utils/patch.awk`
|
||||
* Everything in `tools.txt` now lives in the global config file under the `visible_tools` setting:
|
||||
```text
|
||||
get_current_weather.sh
|
||||
execute_command.sh
|
||||
web_search.sh
|
||||
#execute_py_code.py
|
||||
query_jira_issues.sh
|
||||
```
|
||||
becomes the following in your `<loki-config-dir>/config.yaml`
|
||||
```yaml
|
||||
visible_tools:
|
||||
- get_current_weather.sh
|
||||
- execute_command.sh
|
||||
- web_search.sh
|
||||
# - web_search.sh
|
||||
- query_jira_issues.sh
|
||||
```
|
||||
* If you've defined a `functions/mcp.json` file, you can leave it alone.
|
||||
* Similarly to agents, if you have any bash `tools.sh` that depend on the utility scripts in the `llm-functions`
|
||||
repository, they've been replaced by built-in utility scripts. So use the following to replace any matching lines in
|
||||
your `tools.sh` files:
|
||||
```bash
|
||||
##################
|
||||
## Scripts file ##
|
||||
##################
|
||||
ROOT_DIR="${LLM_ROOT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
|
||||
# replace with
|
||||
source "$LLM_PROMPT_UTILS_FILE"
|
||||
|
||||
#######################
|
||||
## guard_path script ##
|
||||
#######################
|
||||
"$ROOT_DIR/utils/guard_path.sh"
|
||||
# replace with
|
||||
guard_path
|
||||
|
||||
############################
|
||||
## guard_operation script ##
|
||||
############################
|
||||
"$ROOT_DIR/utils/guard_operation.sh"
|
||||
# replace with
|
||||
guard_operation
|
||||
|
||||
######################
|
||||
## patch.awk script ##
|
||||
######################
|
||||
awk -f "$ROOT_DIR/utils/patch.awk"
|
||||
# replace with
|
||||
patch_file
|
||||
```
|
||||
|
||||
Refer to the [custom bash tools docs](./function-calling/CUSTOM-BASH-TOOLS.md) to learn how to compile and test bash
|
||||
tools in Loki without needing to use `argc`.
|
||||
@@ -1,113 +0,0 @@
|
||||
# Environment Variables
|
||||
|
||||
Loki is designed to be highly dynamic and customizable. As a result, Loki utilizes a number of environment variables
|
||||
that can be used to modify its behavior at runtime without needing to modify the existing configuration files.
|
||||
|
||||
Loki also supports defining environment variables via a `.env` file in the Loki configuration directory. This directory
|
||||
varies between systems, so you can find the location of your configuration directory using the following command:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'config_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Global Configuration Related Variables](#global-configuration-related-variables)
|
||||
- [Client Related Variables](#client-related-variables)
|
||||
- [Files and Directory Related Variables](#files-and-directory-related-variables)
|
||||
- [Agent Related Variables](#agent-related-variables)
|
||||
- [Logging Related Variables](#logging-related-variables)
|
||||
- [Miscellaneous Variables](#miscellaneous-variables)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Global Configuration Related Variables
|
||||
All configuration items in the global config file have environment variables that can be overridden at runtime. To see
|
||||
all configuration options and more thorough descriptions, refer to the [example config file](../config.example.yaml).
|
||||
|
||||
Below are the most commonly used configuration settings and their corresponding environment variables:
|
||||
|
||||
| Setting | Environment Variable |
|
||||
|----------------------------|---------------------------------|
|
||||
| `model` | `LOKI_MODEL` |
|
||||
| `temperature` | `LOKI_TEMPERATURE` |
|
||||
| `top_p` | `LOKI_TOP_P` |
|
||||
| `stream` | `LOKI_STREAM` |
|
||||
| `save` | `LOKI_SAVE` |
|
||||
| `editor` | `LOKI_EDITOR` |
|
||||
| `wrap` | `LOKI_WRAP` |
|
||||
| `wrap_code` | `LOKI_WRAP_CODE` |
|
||||
| `save_session` | `LOKI_SAVE_SESSION` |
|
||||
| `compression_threshold` | `LOKI_COMPRESSION_THRESHOLD` |
|
||||
| `function_calling_support` | `LOKI_FUNCTION_CALLING_SUPPORT` |
|
||||
| `enabled_tools` | `LOKI_ENABLED_TOOLS` |
|
||||
| `mcp_server_support` | `LOKI_MCP_SERVER_SUPPORT` |
|
||||
| `enabled_mcp_servers` | `LOKI_ENABLED_MCP_SERVERS` |
|
||||
| `rag_embedding_model` | `LOKI_RAG_EMBEDDING_MODEL` |
|
||||
| `rag_reranker_model` | `LOKI_RAG_RERANKER_MODEL` |
|
||||
| `rag_top_k` | `LOKI_RAG_TOP_K` |
|
||||
| `rag_chunk_size` | `LOKI_RAG_CHUNK_SIZE` |
|
||||
| `rag_chunk_overlap` | `LOKI_RAG_CHUNK_OVERLAP` |
|
||||
| `highlight` | `LOKI_HIGHLIGHT` |
|
||||
| `theme` | `LOKI_THEME` |
|
||||
| `serve_addr` | `LOKI_SERVE_ADDR` |
|
||||
| `user_agent` | `LOKI_USER_AGENT` |
|
||||
| `save_shell_history` | `LOKI_SAVE_SHELL_HISTORY` |
|
||||
| `sync_models_url` | `LOKI_SYNC_MODELS_URL` |
|
||||
|
||||
|
||||
## Client Related Variables
|
||||
The following environment variables are available for clients in Loki:
|
||||
|
||||
| Environment Variable | Description |
|
||||
|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `{client}_API_KEY` | For clients that require an API key, you can define the keys either through environment variables or <br>using the [vault](./VAULT.md). The variables are named after the client to which they apply; <br>e.g. `OPENAI_API_KEY`, `GEMINI_API_KEY`, etc. |
|
||||
| `LOKI_PLATFORM` | Combine with `{client}_API_KEY` to run Loki without a configuration file. <br>This variable is ignored if a configuration file exists. |
|
||||
| `LOKI_PATCH_{client}_CHAT_COMPLETIONS` | Patch chat completion requests to models on the corresponding client; Can modify the URL, body, <br>or headers. |
|
||||
| `LOKI_SHELL` | Specify the shell that Loki should be using when executing commands |
|
||||
|
||||
## Files and Directory Related Variables
|
||||
You can also customize the files and directories that Loki loads its configuration files from:
|
||||
|
||||
| Environment Variable | Description | Default Value |
|
||||
|----------------------|------------------------------------------------------------------------|---------------------------------|
|
||||
| `LOKI_CONFIG_DIR` | Customize the location of the Loki configuration directory. | `<user-config-dir>/loki` |
|
||||
| `LOKI_ENV_FILE` | Customize the location of the `.env` file to load at startup. | `<loki-config-dir>/.env` |
|
||||
| `LOKI_CONFIG_FILE` | Customize the location of the global `config.yaml` configuration file. | `<loki-config-dir>/config.yaml` |
|
||||
| `LOKI_ROLES_DIR` | Customize the location of the `roles` directory. | `<loki-config-dir>/roles` |
|
||||
| `LOKI_SESSIONS_DIR` | Customize the location of the `sessions` directory. | `<loki-config-dir>/sessions` |
|
||||
| `LOKI_RAGS_DIR` | Customize the location of the `rags` directory. | `<loki-config-dir>/rags` |
|
||||
| `LOKI_FUNCTIONS_DIR` | Customize the location of the `functions` directory. | `<loki-config-dir>/functions` |
|
||||
|
||||
## Agent Related Variables
|
||||
You can also customize the location of full agent configurations using the following environment variables:
|
||||
|
||||
| Environment Variable | Description |
|
||||
|------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `<AGENT_NAME>_CONFIG_FILE` | Customize the location of the agent's configuration file; e.g. `SQL_CONFIG_FILE` |
|
||||
| `<AGENT_NAME>_MODEL` | Customize the `model` used for the agent; e.g `SQL_MODEL` |
|
||||
| `<AGENT_NAME>_TEMPERATURE` | Customize the `temperature` used for the agent; e.g. `SQL_TEMPERATURE` |
|
||||
| `<AGENT_NAME>_TOP_P` | Customize the `top_p` used for the agent; e.g. `SQL_TOP_P` |
|
||||
| `<AGENT_NAME>_GLOBAL_TOOLS` | Customize the `global_tools` that are enabled for the agent (a JSON string array); e.g. `SQL_GLOBAL_TOOLS` |
|
||||
| `<AGENT_NAME>_MCP_SERVERS` | Customize the `mcp_servers` that are enabled for the agent (a JSON string array); e.g. `SQL_MCP_SERVERS` |
|
||||
| `<AGENT_NAME>_AGENT_SESSION` | Customize the `agent_session` used with the agent; e.g. `SQL_SESSION` |
|
||||
| `<AGENT_NAME>_INSTRUCTIONS` | Customize the `instructions` for the agent; e.g. `SQL_INSTRUCTIONS` |
|
||||
| `<AGENT_NAME>_VARIABLES` | Customize the `variables` used for the agent (in JSON format of `[{"key1": "value1", "key2": "value2"}]`); <br>e.g. `SQL_VARIABLES` |
|
||||
|
||||
## Logging Related Variables
|
||||
The following variables can be used to change the log level of Loki or the location of the log file:
|
||||
|
||||
| Environment Variable | Description | Default Value |
|
||||
|----------------------|---------------------------------------------|----------------------------------|
|
||||
| `LOKI_LOG_LEVEL` | Customize the log level of Loki | `INFO` |
|
||||
| `LOKI_LOG_FILE` | Customize the location of the Loki log file | `<user-cache-dir>/loki/loki.log` |
|
||||
|
||||
**Pro-Tip:** You can always tail the Loki logs using the `--tail-logs` flag. If you need to disable color output, you
|
||||
can also pass the `--disable-log-colors` flag as well.
|
||||
|
||||
## Miscellaneous Variables
|
||||
| Environment Variable | Description | Default Value |
|
||||
|----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|
|
||||
| `AUTO_CONFIRM` | Bypass all `guard_*` checks in the bash prompt helpers; useful for agent composition and routing | |
|
||||
| `LLM_TOOL_DATA_FILE` | Set automatically by Loki on Windows. Points to a temporary file containing the JSON tool call data. <br>Tool scripts (`run-tool.sh`, `run-agent.sh`, etc.) read from this file instead of command-line args <br>to avoid JSON escaping issues when data passes through `cmd.exe` → bash. **Not intended to be set by users.** | |
|
||||
@@ -1,103 +0,0 @@
|
||||
# Macros
|
||||
Macros are essentially Loki "scripts"; that is, a predefined sequence of REPL commands that automate repetitive tasks or
|
||||
workflows. Macros run in isolated environments, ensuring that the macros don't inherit any pre-existing role, session,
|
||||
RAG, or agent state, and they will not affect your current context.
|
||||
|
||||
This isolation ensures that your workspace remains clean and unaffected by macro operations.
|
||||
|
||||

|
||||
|
||||
For more information on Loki's REPL, refer to the [REPL](./REPL.md) documentation.
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Macro Definition](#macro-definition)
|
||||
- [Step Definitions](#step-definitions)
|
||||
- [Macro Variables](#macro-variables)
|
||||
- [Built-In Macros](#built-in-macros)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Macro Definition
|
||||
Macros are defined as YAML files in the `macros` subdirectory of your Loki configuration directory. The Loki configuration
|
||||
directory can vary between systems, so to find the location of your macros config directory, you can use the following
|
||||
command:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'macros_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
Macro definitions are broken into two parts: the `steps` of the macro, and an optional `variables` section that lets
|
||||
users pass in variables to alter the behavior of the macro at runtime.
|
||||
|
||||
### Step Definitions
|
||||
The step definitions for a macro are straightforward: They are simply the exact commands you would otherwise type in the
|
||||
REPL.
|
||||
|
||||
**Example: Macro to generate a git commit message**
|
||||
`macros/generate-commit-message.yaml`
|
||||
```yaml
|
||||
steps:
|
||||
- .file `git diff` -- generate git commit message
|
||||
```
|
||||
Usage:
|
||||
```shell
|
||||
$ loki --macro generate-commit-message
|
||||
>> .file `git diff` -- generate a git commit message
|
||||
Add documentation on macros
|
||||
```
|
||||
|
||||
For a full example configuration, refer to the [example macro configuration file](../config.macro.example.yaml) in the root of this project.
|
||||
|
||||
### Macro Variables
|
||||
Sometimes it's useful to be able to modify the behavior of a macro at runtime. This is achieved with the `variables`
|
||||
array of the macro definition.
|
||||
|
||||
To pass variables to a macro, since they are just Loki scripts, the syntax is the same as it is for any other scripting
|
||||
language: You just pass them alongside your invocation.
|
||||
|
||||
**Example:**
|
||||
```shell
|
||||
$ loki --macro example-variable-macro first_argument second_argument
|
||||
```
|
||||
|
||||
Each variable in the `variables` array has the following properties:
|
||||
* `name` (Required): the name of the variable, which can be referenced in the actual steps of the macro using the
|
||||
`{{name}}` syntax.
|
||||
* `default` (Optional): A default value for the variable if no value is specified. If no default value is defined, and
|
||||
no value is provided for the variable at runtime, Loki will error out.
|
||||
* `rest` (Optional, Boolean): When set to `true`, this variable will collect all remaining arguments passed to the
|
||||
macro. This behavior is only applicable when the variable is the last variable in the list. By default, this is
|
||||
`false`.
|
||||
|
||||
The `variables` array is order-dependent; that is to say that all arguments passed to the macro are positional. So be
|
||||
careful about the ordering if that is important to your macro's invocation.
|
||||
|
||||
**Example: Simple variable example to invoke an agent**
|
||||
`macros/invoke-agent.yaml`
|
||||
```yaml
|
||||
variables:
|
||||
- name: agent # No default value means this must be defined at runtime
|
||||
- name: args
|
||||
rest: true # All remaining arguments to the macro are collected into this variable
|
||||
default: What can you do? # This is used if no value is passed at runtime
|
||||
steps:
|
||||
- .agent {{agent}}
|
||||
- '{{args}}'
|
||||
```
|
||||
Usage:
|
||||
```shell
|
||||
$ loki --macro invoke-agent sql
|
||||
# or
|
||||
$ loki --macro invoke-agent sql What tables are available?
|
||||
```
|
||||
|
||||
For a full example configuration, refer to the [example macro configuration file](../config.macro.example.yaml) in the root of this project.
|
||||
|
||||
## Built-In Macros
|
||||
Loki comes packaged with some useful built-in macros. These are also good examples if you're looking for more examples
|
||||
on how to make your own macros, so be sure to check out the [built-in macro definitions](../assets/macros) if you're
|
||||
looking for more examples.
|
||||
|
||||
* `generate-commit-message` - Generate a Git commit message based on the staged changes in the current directory
|
||||
@@ -1,407 +0,0 @@
|
||||
# Phase 1 Flow Test Plan
|
||||
|
||||
Comprehensive behavioral verification plan comparing the old codebase
|
||||
(`~/code/testing/loki` on `develop` branch) against the new Phase 1
|
||||
codebase (`~/code/loki`). Every test should produce identical behavior
|
||||
in both codebases unless noted as an intentional improvement.
|
||||
|
||||
## How to run
|
||||
|
||||
For each test case:
|
||||
1. Run the test in the OLD codebase (`cd ~/code/testing/loki && cargo run --`)
|
||||
2. Run the same test in the NEW codebase (`cd ~/code/loki && cargo run --`)
|
||||
3. Compare output/behavior
|
||||
4. Mark PASS/FAIL/IMPROVED
|
||||
|
||||
Legend:
|
||||
- `OLD:` = expected behavior from old codebase
|
||||
- `NEW:` = expected behavior from new codebase (should match unless noted)
|
||||
- `[IMPROVED]` = intentional behavioral improvement in new code
|
||||
|
||||
---
|
||||
|
||||
## 1. Build Baseline
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 1.1 | Compile check | `cargo check` | Zero warnings, zero errors |
|
||||
| 1.2 | Clippy | `cargo clippy` | Zero warnings (excluding pre-existing) |
|
||||
| 1.3 | Tests | `cargo test` | All tests pass |
|
||||
|
||||
---
|
||||
|
||||
## 2. CLI — Info and Listing (early-exit paths)
|
||||
|
||||
These should produce identical output in both codebases.
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 2.1 | System info | `loki --info` | Prints config paths, model, settings |
|
||||
| 2.2 | List models | `loki --list-models` | Prints all available model IDs |
|
||||
| 2.3 | List roles | `loki --list-roles` | Prints role names (no hidden files) |
|
||||
| 2.4 | List sessions | `loki --list-sessions` | Prints session names |
|
||||
| 2.5 | List agents | `loki --list-agents` | Prints agent names, no `.shared` [IMPROVED] |
|
||||
| 2.6 | List RAGs | `loki --list-rags` | Prints RAG names |
|
||||
| 2.7 | List macros | `loki --list-macros` | Prints macro names |
|
||||
| 2.8 | Sync models | `loki --sync-models` | Fetches models.yaml, prints status |
|
||||
|
||||
---
|
||||
|
||||
## 3. CLI — Single-shot Chat
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 3.1 | Basic chat | `loki "What is 2+2?"` | Response printed, exits |
|
||||
| 3.2 | With role | `loki --role coder "hello"` | Role context applied |
|
||||
| 3.3 | With prompt | `loki --prompt "you are a pirate" "hello"` | Temp role applied |
|
||||
| 3.4 | With model | `loki --model <model_id> "hello"` | Uses specified model |
|
||||
| 3.5 | With session | `loki -s test "hello"` | Session created, message saved |
|
||||
| 3.6 | Resume session | `loki -s test "what did I say?"` | Session context preserved |
|
||||
| 3.7 | Dry run | `loki --dry-run "hello"` | Input echoed, no API call |
|
||||
| 3.8 | No stream | `loki --no-stream "hello"` | Response printed all at once |
|
||||
| 3.9 | Empty session | `loki -s test --empty-session "hello"` | Session cleared, fresh start |
|
||||
| 3.10 | Save session | `loki -s test --save-session "hello"` | Forces session save |
|
||||
| 3.11 | Code mode | `loki -c "fibonacci in python"` | Only code output |
|
||||
|
||||
---
|
||||
|
||||
## 4. CLI — File Input
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 4.1 | File + text | `loki -f /etc/hostname "summarize"` | File content included |
|
||||
| 4.2 | File only | `loki -f /etc/hostname` | File sent as input |
|
||||
| 4.3 | Multiple files | `loki -f /etc/hostname -f /etc/os-release "compare"` | Both files included |
|
||||
| 4.4 | Stdin pipe | `echo "hello" \| loki "summarize"` | Stdin included |
|
||||
|
||||
---
|
||||
|
||||
## 5. CLI — Shell Execute
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 5.1 | Generate command | `loki -e "list files in /tmp"` | Shell command generated |
|
||||
| 5.2 | Describe mode | Press 'd' when prompted | Explanation shown |
|
||||
| 5.3 | Execute mode | Press 'y' when prompted | Command executed |
|
||||
| 5.4 | Dry run | `loki -e --dry-run "list files"` | Input shown, no execution |
|
||||
|
||||
---
|
||||
|
||||
## 6. CLI — Agent (non-interactive)
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 6.1 | Agent chat | `loki -a coder "write hello world in python"` | Agent tools available, response |
|
||||
| 6.2 | Agent + session | `loki -a coder -s test "hello"` | Agent with specific session |
|
||||
| 6.3 | Agent variables | `loki -a demo --agent-variable key val "hello"` | Variable injected |
|
||||
| 6.4 | Agent MCP | `loki -a <mcp-agent> "use the server"` | MCP servers start, tools work |
|
||||
| 6.5 | Build tools | `loki -a coder --build-tools` | Tools compiled, exits |
|
||||
|
||||
---
|
||||
|
||||
## 7. CLI — Macros
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 7.1 | Execute macro | `loki --macro generate-commit-message` | Macro executes |
|
||||
|
||||
---
|
||||
|
||||
## 8. CLI — Vault (early-exit)
|
||||
|
||||
| # | Test | Command | Expected |
|
||||
|---|---|---|---|
|
||||
| 8.1 | Add secret | `loki --add-secret test-secret` | Prompts for value, saves |
|
||||
| 8.2 | Get secret | `loki --get-secret test-secret` | Prints decrypted value |
|
||||
| 8.3 | List secrets | `loki --list-secrets` | Lists all secret names |
|
||||
| 8.4 | Delete secret | `loki --delete-secret test-secret` | Deletes, confirms |
|
||||
|
||||
---
|
||||
|
||||
## 9. REPL — Startup and Exit
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 9.1 | Start REPL | `loki` | Welcome message shown |
|
||||
| 9.2 | Exit command | Type `.exit` | Clean exit |
|
||||
| 9.3 | Ctrl+D | Press Ctrl+D | Clean exit |
|
||||
| 9.4 | Ctrl+C | Press Ctrl+C | Hint message, stays in REPL |
|
||||
| 9.5 | Prelude role | Set `repl_prelude: "role:coder"` in config, start REPL | Role auto-loaded, prompt changes |
|
||||
| 9.6 | Prelude session | Set `repl_prelude: "mysession:coder"`, start | Session+role auto-loaded |
|
||||
|
||||
---
|
||||
|
||||
## 10. REPL — Basic Chat
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 10.1 | Chat message | Type `hello` | Response streamed |
|
||||
| 10.2 | Continue | Type `.continue` after response | Continuation generated |
|
||||
| 10.3 | Regenerate | Type `.regenerate` | New response generated |
|
||||
| 10.4 | Copy | Type `.copy` | Last response copied to clipboard |
|
||||
| 10.5 | Multi-line | Type `:::`, then multi-line, then `:::` | Multi-line sent as one message |
|
||||
| 10.6 | Empty input | Press Enter on empty line | No action |
|
||||
| 10.7 | Help | Type `.help` | Help text shown |
|
||||
| 10.8 | Info | Type `.info` | System info printed |
|
||||
|
||||
---
|
||||
|
||||
## 11. REPL — Roles
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 11.1 | Enter role | `.role coder` | Prompt changes, role active |
|
||||
| 11.2 | One-shot role | `.role coder write hello world` | Response with role, then returns to no-role |
|
||||
| 11.3 | Role info | `.info role` (while in role) | Role details shown |
|
||||
| 11.4 | Edit role | `.edit role` (while in role) | Editor opens |
|
||||
| 11.5 | Save role | `.save role myname` | Role saved to file |
|
||||
| 11.6 | Exit role | `.exit role` | Prompt resets, role cleared |
|
||||
| 11.7 | Create new role | `.role newname` (non-existent) | Editor opens for new role |
|
||||
| 11.8 | Role + MCP | `.role <mcp-role>` | MCP servers start with spinner, tools available |
|
||||
| 11.9 | Exit role + MCP | `.exit role` (from MCP role) | MCP servers stop, global MCP restored |
|
||||
| 11.10 | Role in session | `.session test` then `.role coder` | Role applied within session |
|
||||
|
||||
---
|
||||
|
||||
## 12. REPL — Sessions
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 12.1 | Temp session | `.session` | Temp session started |
|
||||
| 12.2 | Named session | `.session mytest` | Named session created/resumed |
|
||||
| 12.3 | Session info | `.info session` | Session details shown |
|
||||
| 12.4 | Edit session | `.edit session` | Editor opens |
|
||||
| 12.5 | Save session | `.save session myname` | Session saved |
|
||||
| 12.6 | Empty session | `.empty session` | Messages cleared |
|
||||
| 12.7 | Compress session | `.compress session` | Compression runs with spinner |
|
||||
| 12.8 | Exit session | `.exit session` | Session exited |
|
||||
| 12.9 | Carry-over prompt | Send message, then `.session test` | "incorporate last Q&A?" prompt |
|
||||
| 12.10 | Session + MCP | `.session <mcp-session>` | MCP servers start |
|
||||
| 12.11 | Already in session | `.session` while in session | Error: "Already in a session" |
|
||||
|
||||
---
|
||||
|
||||
## 13. REPL — Agents
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 13.1 | Start agent | `.agent coder` | Tools compiled, prompt changes, agent active |
|
||||
| 13.2 | Agent + session | `.agent coder mysession` | Agent with specific session |
|
||||
| 13.3 | Agent variables | `.agent demo key=value` | Variable set, available in tools |
|
||||
| 13.4 | Agent info | `.info agent` | Agent details shown |
|
||||
| 13.5 | Starter list | `.starter` | Conversation starters listed |
|
||||
| 13.6 | Starter select | `.starter 1` | Starter message sent |
|
||||
| 13.7 | Edit agent config | `.edit agent-config` | Editor opens |
|
||||
| 13.8 | Exit agent | `.exit agent` | Agent cleared, prompt resets |
|
||||
| 13.9 | Agent + MCP | `.agent <mcp-agent>` | MCP servers start, tools available |
|
||||
| 13.10 | MCP disabled | `.agent <mcp-agent>` with mcp_server_support=false | Error, agent blocked [IMPROVED] |
|
||||
| 13.11 | Tool execution | Send message that triggers tool call | Tool executes, result returned |
|
||||
| 13.12 | Global tools | Agent with `global_tools` configured | Global tools available alongside agent tools |
|
||||
| 13.13 | Tool file priority | Delete .ts, have .sh | .sh used [IMPROVED] |
|
||||
| 13.14 | Clear todo | `.clear todo` (in agent with auto-continue) | Todo list cleared |
|
||||
| 13.15 | Auto-continuation | Agent with auto_continue=true, create todos | Agent continues until todos done |
|
||||
| 13.16 | Already in agent | `.agent coder` while agent active | Error: "Already in an agent" |
|
||||
|
||||
---
|
||||
|
||||
## 14. REPL — Sub-Agent Spawning and Escalation
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 14.1 | Spawn sub-agent | Use agent with can_spawn_agents=true, trigger spawn | Sub-agent starts in background |
|
||||
| 14.2 | Check sub-agent | Call agent__check with agent ID | Returns PENDING or result |
|
||||
| 14.3 | Collect sub-agent | Call agent__collect with agent ID | Blocks until done, returns output |
|
||||
| 14.4 | List sub-agents | Call agent__list | Shows all spawned agents + status |
|
||||
| 14.5 | Cancel sub-agent | Call agent__cancel with agent ID | Agent cancelled |
|
||||
| 14.6 | Escalation | Sub-agent calls user__ask | Parent gets notification |
|
||||
| 14.7 | Reply escalation | Parent calls agent__reply_escalation | Sub-agent unblocked |
|
||||
| 14.8 | Max depth | Spawn beyond max_agent_depth | Error: "Max agent depth exceeded" |
|
||||
| 14.9 | Max concurrent | Spawn beyond max_concurrent_agents | Error: capacity reached |
|
||||
| 14.10 | Teammate messaging | Sub-agent sends message to sibling | Message delivered via inbox |
|
||||
|
||||
---
|
||||
|
||||
## 15. REPL — RAG
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 15.1 | Init RAG | `.rag <name>` | RAG initialized/loaded |
|
||||
| 15.2 | RAG info | `.info rag` | RAG details shown |
|
||||
| 15.3 | RAG sources | `.sources rag` (after a query) | Citation sources listed |
|
||||
| 15.4 | Edit RAG docs | `.edit rag-docs` | Editor opens |
|
||||
| 15.5 | Rebuild RAG | `.rebuild rag` | RAG rebuilt |
|
||||
| 15.6 | Exit RAG | `.exit rag` | RAG cleared |
|
||||
| 15.7 | RAG embeddings | Send query with RAG active | Embeddings included in context |
|
||||
|
||||
---
|
||||
|
||||
## 16. REPL — MCP Servers
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 16.1 | Global MCP start | Start REPL with `enabled_mcp_servers` configured | Servers start |
|
||||
| 16.2 | MCP search | LLM calls `mcp__search_<server>` | Tools found and ranked |
|
||||
| 16.3 | MCP describe | LLM calls `mcp__describe_<server>` tool_name | Schema returned |
|
||||
| 16.4 | MCP invoke | LLM calls `mcp__invoke_<server>` tool args | Tool executed, result returned |
|
||||
| 16.5 | Change servers | `.set enabled_mcp_servers <other>` | Old stopped, new started |
|
||||
| 16.6 | Disable MCP | `.set mcp_server_support false` | MCP tools removed |
|
||||
| 16.7 | Enable MCP | `.set mcp_server_support true` | MCP tools restored |
|
||||
| 16.8 | Role MCP switch | Enter role with MCP X, exit, enter role with MCP Y | X stops, Y starts |
|
||||
| 16.9 | Null servers | `.set enabled_mcp_servers null` | All MCP servers stop, tools removed |
|
||||
|
||||
---
|
||||
|
||||
## 17. REPL — Settings (.set)
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 17.1 | Temperature | `.set temperature 0.5` | Temperature changed |
|
||||
| 17.2 | Top-p | `.set top_p 0.9` | Top-p changed |
|
||||
| 17.3 | Model | `.set model <name>` | Model switched |
|
||||
| 17.4 | Dry run | `.set dry_run true` | Dry run enabled |
|
||||
| 17.5 | Stream | `.set stream false` | Streaming disabled |
|
||||
| 17.6 | Save | `.set save false` | Auto-save disabled |
|
||||
| 17.7 | Highlight | `.set highlight false` | Syntax highlighting disabled |
|
||||
| 17.8 | Save session | `.set save_session true` | Session auto-save enabled |
|
||||
| 17.9 | Null value | `.set temperature null` | Temperature reset to default |
|
||||
| 17.10 | Compression threshold | `.set compression_threshold 2000` | Threshold changed |
|
||||
| 17.11 | Max output tokens | `.set max_output_tokens 4096` | Max tokens set |
|
||||
| 17.12 | Enabled tools | `.set enabled_tools all` | All tools enabled |
|
||||
| 17.13 | Function calling | `.set function_calling_support false` | Function calling disabled |
|
||||
|
||||
---
|
||||
|
||||
## 18. REPL — Tab Completion
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 18.1 | Role completion | `.role<TAB>` | Shows role names |
|
||||
| 18.2 | Agent completion | `.agent<TAB>` | Shows agent names (no .shared) [IMPROVED] |
|
||||
| 18.3 | Session completion | `.session<TAB>` | Shows session names |
|
||||
| 18.4 | RAG completion | `.rag<TAB>` | Shows RAG names |
|
||||
| 18.5 | Macro completion | `.macro<TAB>` | Shows macro names |
|
||||
| 18.6 | Model completion | `.model<TAB>` | Shows model names with descriptions |
|
||||
| 18.7 | Set keys | `.set <TAB>` | Shows all setting names |
|
||||
| 18.8 | Set values | `.set temperature <TAB>` | Shows current/suggested value |
|
||||
| 18.9 | Enabled tools | `.set enabled_tools <TAB>` | Shows tools (no user__/mcp_/todo__/agent__) [IMPROVED] |
|
||||
| 18.10 | MCP servers | `.set enabled_mcp_servers <TAB>` | Shows configured servers + mappings [IMPROVED] |
|
||||
| 18.11 | Delete types | `.delete <TAB>` | Shows: role, session, rag, macro, agent-data |
|
||||
| 18.12 | Vault cmds | `.vault <TAB>` | Shows: add, get, update, delete, list |
|
||||
|
||||
---
|
||||
|
||||
## 19. REPL — Delete
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 19.1 | Delete role | `.delete role` | Shows role picker, deletes selected |
|
||||
| 19.2 | Delete session | `.delete session` | Shows session picker, deletes |
|
||||
| 19.3 | Delete RAG | `.delete rag` | Shows RAG picker, deletes |
|
||||
| 19.4 | Delete macro | `.delete macro` | Shows macro picker, deletes |
|
||||
| 19.5 | Delete agent data | `.delete agent-data` | Shows agent picker, deletes data |
|
||||
|
||||
---
|
||||
|
||||
## 20. REPL — Vault
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 20.1 | Add secret | `.vault add mysecret` | Prompts for value, saves |
|
||||
| 20.2 | Get secret | `.vault get mysecret` | Prints decrypted value |
|
||||
| 20.3 | Update secret | `.vault update mysecret` | Prompts for new value |
|
||||
| 20.4 | Delete secret | `.vault delete mysecret` | Deletes |
|
||||
| 20.5 | List secrets | `.vault list` | Lists all secret names |
|
||||
|
||||
---
|
||||
|
||||
## 21. REPL — Macros and File
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 21.1 | Execute macro | `.macro generate-commit-message` | Macro runs |
|
||||
| 21.2 | Create macro | `.macro newname` (non-existent) | Editor opens |
|
||||
| 21.3 | File include | `.file /etc/hostname -- summarize this` | File included, query sent |
|
||||
| 21.4 | URL include | `.file https://example.com -- summarize` | URL fetched, content included |
|
||||
|
||||
---
|
||||
|
||||
## 22. REPL — Edit Commands
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 22.1 | Edit config | `.edit config` | Config file opens in editor |
|
||||
| 22.2 | Edit role | `.edit role` (in role) | Role file opens in editor |
|
||||
| 22.3 | Edit session | `.edit session` (in session) | Session file opens in editor |
|
||||
| 22.4 | Edit agent config | `.edit agent-config` (in agent) | Agent config opens in editor |
|
||||
| 22.5 | Edit RAG docs | `.edit rag-docs` (in RAG) | RAG docs opens in editor |
|
||||
|
||||
---
|
||||
|
||||
## 23. Session Compression and Autoname
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 23.1 | Auto-compress | Set low compression_threshold, send many messages | "Compressing the session." shown |
|
||||
| 23.2 | Manual compress | `.compress session` | Compression runs with spinner |
|
||||
| 23.3 | Auto-name | Start temp session, send messages | Session auto-named |
|
||||
|
||||
---
|
||||
|
||||
## 24. Error Handling
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 24.1 | Invalid role | `.role nonexistent_role_xxxxxxx` | Error shown, REPL continues |
|
||||
| 24.2 | Invalid model | `.set model nonexistent_model` | Error shown, REPL continues |
|
||||
| 24.3 | No session active | `.info session` (no session) | Error or empty |
|
||||
| 24.4 | No agent active | `.info agent` (no agent) | Error or empty |
|
||||
| 24.5 | Already in session | `.session` then `.session` again | Error: "Already in a session" |
|
||||
| 24.6 | Already in agent | `.agent coder` then `.agent coder` | Error: "Already in an agent" |
|
||||
| 24.7 | Unknown command | `.nonexistent` | Error message shown |
|
||||
| 24.8 | Tool failure | Trigger tool that fails | Error returned to LLM as tool result |
|
||||
|
||||
---
|
||||
|
||||
## 25. MCP Lifecycle State Transitions (Critical)
|
||||
|
||||
These test the most bug-prone area of the migration.
|
||||
|
||||
| # | Test | Steps | Expected |
|
||||
|---|---|---|---|
|
||||
| 25.1 | Role A→B MCP swap | Enter role with MCP-A, exit, enter role with MCP-B | A stops, B starts, B tools work |
|
||||
| 25.2 | Role MCP→no MCP | Enter role with MCP, exit role | MCP stops, global MCP restored |
|
||||
| 25.3 | No MCP→Role MCP | Start REPL (no MCP), enter role with MCP | MCP starts, tools work |
|
||||
| 25.4 | Agent MCP lifecycle | Start agent with MCP, use tools, exit agent | Agent MCP starts, works, stops on exit |
|
||||
| 25.5 | Session MCP | Start session with MCP config | MCP starts for session |
|
||||
| 25.6 | Global→Agent→Global | Start with global MCP-A, enter agent with MCP-B, exit agent | A→B→A transitions clean |
|
||||
| 25.7 | MCP mapping resolution | Role has `enabled_mcp_servers: alias`, mapping configured | Alias resolved, correct servers start |
|
||||
| 25.8 | MCP disabled + agent | Agent requires MCP, mcp_server_support=false | Error blocks agent start [IMPROVED] |
|
||||
|
||||
---
|
||||
|
||||
## Intentional Improvements (NEW ≠ OLD, by design)
|
||||
|
||||
| # | What changed | Old behavior | New behavior |
|
||||
|---|---|---|---|
|
||||
| I.1 | Agent list hides `.shared` | `.shared` shown in completions | `.shared` hidden |
|
||||
| I.2 | Tool file priority | Filesystem order (non-deterministic) | Priority: .sh > .py > .ts > .js |
|
||||
| I.3 | MCP disabled + agent | Warning printed, agent starts anyway | Error, agent blocked |
|
||||
| I.4 | Role MCP disabled warning | Warning always shown (even if role has no MCP) | Warning only when role actually has MCP |
|
||||
| I.5 | Enabled tools completions | Shows internal tools (user__, mcp_, etc.) | Internal tools hidden |
|
||||
| I.6 | MCP server completions | Only mapping aliases | Both configured servers + aliases |
|
||||
|
||||
---
|
||||
|
||||
## Test Execution Notes
|
||||
|
||||
- Run tests in order — some depend on state from previous tests
|
||||
(e.g., session tests create sessions that later tests reference)
|
||||
- For MCP tests, ensure at least one MCP server is configured in
|
||||
`~/.config/loki/functions/mcp.json`
|
||||
- For agent tests, use built-in agents (coder, demo, explore)
|
||||
- For sub-agent tests, use the sisyphus agent (has can_spawn_agents)
|
||||
- For RAG tests, configure a RAG with test documents
|
||||
- For vault tests, use temporary secret names to avoid polluting
|
||||
the real vault
|
||||
- Compare error messages between old and new — they may differ
|
||||
slightly in wording but should convey the same meaning
|
||||
@@ -1,727 +0,0 @@
|
||||
# Phase 2 Implementation Plan: Engine + Emitter
|
||||
|
||||
## Overview
|
||||
|
||||
Phase 1 splits `Config` into `AppState` + `RequestContext`. Phase 2 takes the unified state and introduces the **Engine** — a single core function that replaces CLI's `start_directive()` and REPL's `ask()` — plus an **Emitter trait** that abstracts output away from direct stdout writes. After this phase, CLI and REPL both call `Engine::run()` with different `Emitter` implementations and behave identically to today. The API server in Phase 4 will plug in without touching core logic.
|
||||
|
||||
**Estimated effort:** ~1 week
|
||||
**Risk:** Low-medium. The work is refactoring existing well-tested code paths into a shared shape. Most of the risk is in preserving exact terminal rendering behavior.
|
||||
**Depends on:** Phase 1 Steps 0–10 complete (`GlobalConfig` eliminated, `RequestContext` wired through all entry points).
|
||||
|
||||
---
|
||||
|
||||
## Why Phase 2 Exists
|
||||
|
||||
Today's CLI and REPL have two near-identical pipelines that diverge in five specific places. The divergences are accidents of history, not intentional design:
|
||||
|
||||
1. **Streaming flag handling.** `start_directive` forces non-streaming when extracting code; `ask` never extracts code.
|
||||
2. **Auto-continuation loop.** `ask` has complex logic for `auto_continue_count`, todo inspection, and continuation prompt injection. `start_directive` has none.
|
||||
3. **Session compression.** `ask` triggers `maybe_compress_session` and awaits completion; `start_directive` never compresses.
|
||||
4. **Session autoname.** `ask` calls `maybe_autoname_session` after each turn; `start_directive` doesn't.
|
||||
5. **Cleanup on exit.** `start_directive` calls `exit_session()` at the end; `ask` lets the REPL loop handle it.
|
||||
|
||||
Four of these five divergences are bugs waiting to happen — they mean agents behave differently in CLI vs REPL mode, sessions don't get compressed in CLI even when they should, and auto-continuation is silently unavailable from the CLI. Phase 2 collapses both pipelines into one `Engine::run()` that handles all five behaviors uniformly, with per-request flags to control what's active (e.g., `auto_continue: bool` on `RunRequest`).
|
||||
|
||||
The Emitter trait exists to decouple the rendering pipeline from its destination. Today, streaming output is hardcoded to write to the terminal via `crossterm`. An `Emitter` implementation can also feed an axum SSE stream, collect events for a JSON response, or capture everything for a test. The Engine sends semantic events; Emitters decide how to present them.
|
||||
|
||||
---
|
||||
|
||||
## The Architecture After Phase 2
|
||||
|
||||
```
|
||||
┌─────────┐ ┌─────────┐ ┌─────────┐
|
||||
│ CLI │ │ REPL │ │ API │ (Phase 4)
|
||||
└────┬────┘ └────┬────┘ └────┬────┘
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────────────────────────────────────────┐
|
||||
│ Engine::run(ctx, req, emitter) │
|
||||
│ ┌────────────────────────────────────────────┐ │
|
||||
│ │ 1. Apply CoreCommand (if any) │ │
|
||||
│ │ 2. Build Input from req │ │
|
||||
│ │ 3. apply_prelude (first turn only) │ │
|
||||
│ │ 4. before_chat_completion │ │
|
||||
│ │ 5. Stream or buffered LLM call │ │
|
||||
│ │ ├─ emit Started │ │
|
||||
│ │ ├─ emit AssistantDelta (per chunk) │ │
|
||||
│ │ ├─ emit ToolCall │ │
|
||||
│ │ ├─ execute tool │ │
|
||||
│ │ ├─ emit ToolResult │ │
|
||||
│ │ └─ loop on tool results │ │
|
||||
│ │ 6. after_chat_completion │ │
|
||||
│ │ 7. maybe_compress_session │ │
|
||||
│ │ 8. maybe_autoname_session │ │
|
||||
│ │ 9. Auto-continuation (if applicable) │ │
|
||||
│ │ 10. emit Finished │ │
|
||||
│ └────────────────────────────────────────────┘ │
|
||||
└──────────────────────────────────────────────────┘
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
TerminalEmitter TerminalEmitter JsonEmitter / SseEmitter
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
### `Engine`
|
||||
|
||||
```rust
|
||||
pub struct Engine {
|
||||
pub app: Arc<AppState>,
|
||||
}
|
||||
|
||||
impl Engine {
|
||||
pub fn new(app: Arc<AppState>) -> Self { Self { app } }
|
||||
|
||||
pub async fn run(
|
||||
&self,
|
||||
ctx: &mut RequestContext,
|
||||
req: RunRequest,
|
||||
emitter: &dyn Emitter,
|
||||
) -> Result<RunOutcome, CoreError>;
|
||||
}
|
||||
```
|
||||
|
||||
`Engine` is intentionally a thin wrapper around `Arc<AppState>`. All per-turn state lives on `RequestContext`, so the engine itself has no per-call fields. This makes it cheap to clone and makes `Engine::run` trivially testable.
|
||||
|
||||
### `RunRequest`
|
||||
|
||||
```rust
|
||||
pub struct RunRequest {
|
||||
pub input: Option<UserInput>,
|
||||
pub command: Option<CoreCommand>,
|
||||
pub options: RunOptions,
|
||||
}
|
||||
|
||||
pub struct UserInput {
|
||||
pub text: String,
|
||||
pub files: Vec<FileInput>,
|
||||
pub media: Vec<MediaInput>,
|
||||
pub continuation: Option<ContinuationKind>,
|
||||
}
|
||||
|
||||
pub enum ContinuationKind {
|
||||
Continue,
|
||||
Regenerate,
|
||||
}
|
||||
|
||||
pub struct RunOptions {
|
||||
pub stream: Option<bool>,
|
||||
pub extract_code: bool,
|
||||
pub auto_continue: bool,
|
||||
pub compress_session: bool,
|
||||
pub autoname_session: bool,
|
||||
pub apply_prelude: bool,
|
||||
pub with_embeddings: bool,
|
||||
pub cancel: CancellationToken,
|
||||
}
|
||||
|
||||
impl RunOptions {
|
||||
pub fn cli() -> Self { /* today's start_directive defaults */ }
|
||||
pub fn repl_turn() -> Self { /* today's ask defaults */ }
|
||||
pub fn api_oneshot() -> Self { /* API one-shot defaults */ }
|
||||
pub fn api_session() -> Self { /* API session defaults */ }
|
||||
}
|
||||
```
|
||||
|
||||
Two things to notice:
|
||||
|
||||
1. **`input` is `Option`.** A `RunRequest` can carry just a `command` (e.g., `.role explain`) with no user text, just an input (a plain prompt), or both (the `.role <name> <text>` form that activates a role and immediately sends a prompt through it). The engine handles all three shapes with one code path.
|
||||
|
||||
2. **`RunOptions` is the knob panel that replaces the five divergences.** CLI today has `auto_continue: false, compress_session: false, autoname_session: false`; REPL has all three `true`. Phase 2 exposes these as explicit options with factory constructors for each frontend's conventional defaults. This also means you can now run a CLI one-shot with auto-continuation by constructing `RunOptions::cli()` and flipping `auto_continue = true` — a capability that doesn't exist today.
|
||||
|
||||
### `CoreCommand`
|
||||
|
||||
```rust
|
||||
pub enum CoreCommand {
|
||||
// State setters
|
||||
SetModel(String),
|
||||
UsePrompt(String),
|
||||
UseRole { name: String, trailing_text: Option<String> },
|
||||
UseSession(Option<String>),
|
||||
UseAgent { name: String, session: Option<String>, variables: Vec<(String, String)> },
|
||||
UseRag(Option<String>),
|
||||
|
||||
// Exit commands
|
||||
ExitRole,
|
||||
ExitSession,
|
||||
ExitRag,
|
||||
ExitAgent,
|
||||
|
||||
// State queries
|
||||
Info(InfoScope),
|
||||
RagSources,
|
||||
|
||||
// Config mutation
|
||||
Set { key: String, value: String },
|
||||
|
||||
// Session actions
|
||||
CompressSession,
|
||||
EmptySession,
|
||||
SaveSession { name: Option<String> },
|
||||
EditSession,
|
||||
|
||||
// Role actions
|
||||
SaveRole { name: Option<String> },
|
||||
EditRole,
|
||||
|
||||
// RAG actions
|
||||
EditRagDocs,
|
||||
RebuildRag,
|
||||
|
||||
// Agent actions
|
||||
EditAgentConfig,
|
||||
ClearTodo,
|
||||
StarterList,
|
||||
StarterRun(usize),
|
||||
|
||||
// File input shortcut
|
||||
IncludeFiles { paths: Vec<String>, trailing_text: Option<String> },
|
||||
|
||||
// Macro execution
|
||||
Macro { name: String, args: Vec<String> },
|
||||
|
||||
// Vault
|
||||
VaultAdd(String),
|
||||
VaultGet(String),
|
||||
VaultUpdate(String),
|
||||
VaultDelete(String),
|
||||
VaultList,
|
||||
|
||||
// Miscellaneous
|
||||
EditConfig,
|
||||
Authenticate,
|
||||
Delete(DeleteKind),
|
||||
Copy,
|
||||
Help,
|
||||
}
|
||||
|
||||
pub enum InfoScope {
|
||||
System,
|
||||
Role,
|
||||
Session,
|
||||
Rag,
|
||||
Agent,
|
||||
}
|
||||
|
||||
pub enum DeleteKind {
|
||||
Role(String),
|
||||
Session(String),
|
||||
Rag(String),
|
||||
Macro(String),
|
||||
AgentData(String),
|
||||
}
|
||||
```
|
||||
|
||||
This enum captures all 37 dot-commands identified in the explore. Three categories deserve special attention:
|
||||
|
||||
- **LLM-triggering commands** (`UsePrompt`, `UseRole` with trailing_text, `IncludeFiles` with trailing_text, `StarterRun`, `Macro` that contains LLM calls, and the continuation variants `Continue`/`Regenerate` expressed via `UserInput.continuation`) — these don't just mutate state; they produce a full run through the LLM pipeline. The engine treats them as `RunRequest { command: Some(_), input: Some(_), .. }` — command runs first, then input flows through.
|
||||
|
||||
- **Asynchronous commands that return immediately** (`EditConfig`, `EditRole`, `EditRagDocs`, `EditAgentConfig`, most `Vault*`, `Delete`) — these are side-effecting but don't produce an LLM interaction. The engine handles them, emits a `Result` event, and returns without invoking the LLM path.
|
||||
|
||||
- **Context-dependent commands** (`ClearTodo`, `StarterList`, `StarterRun`, `EditAgentConfig`, etc.) — these require a specific scope (e.g., active agent). The engine validates the precondition before executing and returns a `CoreError::InvalidState { expected: "active agent" }` if the precondition fails.
|
||||
|
||||
### `Emitter` trait and `Event` enum
|
||||
|
||||
```rust
|
||||
#[async_trait]
|
||||
pub trait Emitter: Send + Sync {
|
||||
async fn emit(&self, event: Event<'_>) -> Result<(), EmitError>;
|
||||
}
|
||||
|
||||
pub enum Event<'a> {
|
||||
// Lifecycle
|
||||
Started { request_id: Uuid, session_id: Option<SessionId>, agent: Option<&'a str> },
|
||||
Finished { outcome: &'a RunOutcome },
|
||||
|
||||
// Assistant output
|
||||
AssistantDelta(&'a str),
|
||||
AssistantMessageEnd { full_text: &'a str },
|
||||
|
||||
// Tool calls
|
||||
ToolCall { id: &'a str, name: &'a str, args: &'a str },
|
||||
ToolResult { id: &'a str, name: &'a str, result: &'a str, is_error: bool },
|
||||
|
||||
// Auto-continuation
|
||||
AutoContinueTriggered { count: usize, max: usize, remaining_todos: usize },
|
||||
|
||||
// Session lifecycle signals
|
||||
SessionCompressing,
|
||||
SessionCompressed { tokens_saved: Option<usize> },
|
||||
SessionAutonamed(&'a str),
|
||||
|
||||
// Informational
|
||||
Info(&'a str),
|
||||
Warning(&'a str),
|
||||
|
||||
// Errors
|
||||
Error(&'a CoreError),
|
||||
}
|
||||
|
||||
pub enum EmitError {
|
||||
ClientDisconnected,
|
||||
WriteFailed(std::io::Error),
|
||||
}
|
||||
```
|
||||
|
||||
Three implementations ship in Phase 2; two are stubs, one is real:
|
||||
|
||||
- **`TerminalEmitter`** (real) — wraps today's `SseHandler` → `markdown_stream`/`raw_stream` path. This is the bulk of Phase 2's work; see "Terminal rendering details" below.
|
||||
- **`NullEmitter`** (stub, for tests) — drops all events on the floor.
|
||||
- **`CollectingEmitter`** (stub, for tests and future JSON API) — appends events to a `Vec<OwnedEvent>` for later inspection.
|
||||
|
||||
The `JsonEmitter` and `SseEmitter` implementations land in **Phase 4** when the API server comes online.
|
||||
|
||||
### `RunOutcome`
|
||||
|
||||
```rust
|
||||
pub struct RunOutcome {
|
||||
pub request_id: Uuid,
|
||||
pub session_id: Option<SessionId>,
|
||||
pub final_message: Option<String>,
|
||||
pub tool_call_count: usize,
|
||||
pub turns: usize,
|
||||
pub compressed: bool,
|
||||
pub autonamed: Option<String>,
|
||||
pub auto_continued: usize,
|
||||
}
|
||||
```
|
||||
|
||||
`RunOutcome` is what CLI/REPL ignore but the future API returns as JSON. It records everything the caller might want to know about what happened during the run.
|
||||
|
||||
### `CoreError`
|
||||
|
||||
```rust
|
||||
pub enum CoreError {
|
||||
InvalidRequest { msg: String },
|
||||
InvalidState { expected: String, found: String },
|
||||
NotFound { what: String, name: String },
|
||||
Cancelled,
|
||||
ProviderError { provider: String, msg: String },
|
||||
ToolError { tool: String, msg: String },
|
||||
EmitterError(EmitError),
|
||||
Io(std::io::Error),
|
||||
Other(anyhow::Error),
|
||||
}
|
||||
|
||||
impl CoreError {
|
||||
pub fn is_retryable(&self) -> bool { /* ... */ }
|
||||
pub fn http_status(&self) -> u16 { /* for future API use */ }
|
||||
pub fn terminal_message(&self) -> String { /* for TerminalEmitter */ }
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Terminal Rendering Details
|
||||
|
||||
The `TerminalEmitter` is the most delicate part of Phase 2 because it has to preserve every pixel of today's REPL/CLI behavior. Here's the mental model:
|
||||
|
||||
**Today's flow:**
|
||||
```
|
||||
LLM client → mpsc::Sender<SseEvent> → SseHandler → render_stream
|
||||
├─ markdown_stream (if highlight)
|
||||
└─ raw_stream (else)
|
||||
```
|
||||
|
||||
Both `markdown_stream` and `raw_stream` write directly to stdout via `crossterm`, managing cursor positions, line clears, and incremental markdown parsing themselves.
|
||||
|
||||
**Target flow:**
|
||||
```
|
||||
LLM client → mpsc::Sender<SseEvent> → SseHandler → TerminalEmitter::emit(Event::AssistantDelta)
|
||||
├─ (internal) markdown_stream state machine
|
||||
└─ (internal) raw_stream state machine
|
||||
```
|
||||
|
||||
The `TerminalEmitter` owns a `RefCell<StreamRenderState>` (or `Mutex` if we need `Send`) that wraps the existing `markdown_stream`/`raw_stream` state. Each `emit(AssistantDelta)` call feeds the chunk into this state machine exactly as `SseHandler`'s receive loop does today. The result is that the exact same crossterm calls happen in the exact same order — we've just moved them behind a trait.
|
||||
|
||||
**Things that migrate 1:1 into `TerminalEmitter`:**
|
||||
- Spinner start/stop on first delta
|
||||
- Cursor positioning for line reprint during code block growth
|
||||
- Syntax highlighting invocation via `MarkdownRender`
|
||||
- Color/dim output for tool call banners
|
||||
- Final newline + cursor reset on `AssistantMessageEnd`
|
||||
|
||||
**Things that the engine handles, not the emitter:**
|
||||
- Tool call *execution* (still lives in the engine loop)
|
||||
- Session state mutations (engine calls `before_chat_completion` / `after_chat_completion` on `RequestContext`)
|
||||
- Auto-continuation decisions (engine inspects agent runtime)
|
||||
- Compression and autoname decisions (engine)
|
||||
|
||||
**Things the emitter decides, not the engine:**
|
||||
- Whether to suppress ToolCall rendering (sub-agents in today's code suppress their own output; TerminalEmitter respects a `verbose: bool` flag)
|
||||
- How to format errors (TerminalEmitter uses colored stderr; JsonEmitter will use structured JSON)
|
||||
- Whether to show a spinner at all (disabled for non-TTY output)
|
||||
|
||||
**One gotcha:** today's `SseHandler` itself produces the `mpsc` channel that LLM clients push into. In the new model, `SseHandler` becomes an internal helper inside the engine's streaming path that converts `mpsc::Receiver<SseEvent>` into `Emitter::emit(Event::AssistantDelta(...))` calls. No LLM client code changes — they still push into the same channel type. Only the consumer side of the channel changes.
|
||||
|
||||
---
|
||||
|
||||
## The Engine::run Pipeline
|
||||
|
||||
Here's the full pipeline in pseudocode, annotated with which frontend controls each behavior via `RunOptions`:
|
||||
|
||||
```rust
|
||||
impl Engine {
|
||||
pub async fn run(
|
||||
&self,
|
||||
ctx: &mut RequestContext,
|
||||
req: RunRequest,
|
||||
emitter: &dyn Emitter,
|
||||
) -> Result<RunOutcome, CoreError> {
|
||||
let request_id = Uuid::new_v4();
|
||||
let mut outcome = RunOutcome::new(request_id);
|
||||
|
||||
emitter.emit(Event::Started { request_id, session_id: ctx.session_id(), agent: ctx.agent_name() }).await?;
|
||||
|
||||
// 1. Execute command (if any). Commands may be LLM-triggering, mutating, or informational.
|
||||
if let Some(command) = req.command {
|
||||
self.dispatch_command(ctx, command, emitter, &req.options).await?;
|
||||
}
|
||||
|
||||
// 2. Early return if there's no user input (pure command)
|
||||
let Some(user_input) = req.input else {
|
||||
emitter.emit(Event::Finished { outcome: &outcome }).await?;
|
||||
return Ok(outcome);
|
||||
};
|
||||
|
||||
// 3. Apply prelude on first turn of a fresh context (CLI/REPL only)
|
||||
if req.options.apply_prelude && !ctx.prelude_applied {
|
||||
apply_prelude(ctx, &req.options.cancel).await?;
|
||||
ctx.prelude_applied = true;
|
||||
}
|
||||
|
||||
// 4. Build Input from user_input + ctx
|
||||
let input = build_input(ctx, user_input, &req.options).await?;
|
||||
|
||||
// 5. Wait for any in-progress compression to finish (REPL-style block)
|
||||
while ctx.is_compressing_session() {
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
// 6. Enter the turn loop
|
||||
self.run_turn(ctx, input, &req.options, emitter, &mut outcome).await?;
|
||||
|
||||
// 7. Maybe compress session
|
||||
if req.options.compress_session && ctx.session_needs_compression() {
|
||||
emitter.emit(Event::SessionCompressing).await?;
|
||||
compress_session(ctx).await?;
|
||||
outcome.compressed = true;
|
||||
emitter.emit(Event::SessionCompressed { tokens_saved: None }).await?;
|
||||
}
|
||||
|
||||
// 8. Maybe autoname session
|
||||
if req.options.autoname_session {
|
||||
if let Some(name) = maybe_autoname_session(ctx).await? {
|
||||
outcome.autonamed = Some(name.clone());
|
||||
emitter.emit(Event::SessionAutonamed(&name)).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// 9. Auto-continuation (agents only)
|
||||
if req.options.auto_continue {
|
||||
if let Some(continuation) = self.check_auto_continue(ctx) {
|
||||
emitter.emit(Event::AutoContinueTriggered { .. }).await?;
|
||||
outcome.auto_continued += 1;
|
||||
// Recursive call with continuation prompt
|
||||
let next_req = RunRequest {
|
||||
input: Some(UserInput::from_continuation(continuation)),
|
||||
command: None,
|
||||
options: req.options.clone(),
|
||||
};
|
||||
return Box::pin(self.run(ctx, next_req, emitter)).await;
|
||||
}
|
||||
}
|
||||
|
||||
emitter.emit(Event::Finished { outcome: &outcome }).await?;
|
||||
Ok(outcome)
|
||||
}
|
||||
|
||||
async fn run_turn(
|
||||
&self,
|
||||
ctx: &mut RequestContext,
|
||||
mut input: Input,
|
||||
options: &RunOptions,
|
||||
emitter: &dyn Emitter,
|
||||
outcome: &mut RunOutcome,
|
||||
) -> Result<(), CoreError> {
|
||||
loop {
|
||||
outcome.turns += 1;
|
||||
|
||||
before_chat_completion(ctx, &input);
|
||||
|
||||
let client = input.create_client(ctx)?;
|
||||
let (output, tool_results) = if should_stream(&input, options) {
|
||||
stream_chat_completion(ctx, &input, client, emitter, &options.cancel).await?
|
||||
} else {
|
||||
buffered_chat_completion(ctx, &input, client, options.extract_code, &options.cancel).await?
|
||||
};
|
||||
|
||||
after_chat_completion(ctx, &input, &output, &tool_results);
|
||||
outcome.tool_call_count += tool_results.len();
|
||||
|
||||
if tool_results.is_empty() {
|
||||
outcome.final_message = Some(output);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Emit each tool call and result
|
||||
for result in &tool_results {
|
||||
emitter.emit(Event::ToolCall { .. }).await?;
|
||||
emitter.emit(Event::ToolResult { .. }).await?;
|
||||
}
|
||||
|
||||
// Loop: feed tool results back in
|
||||
input = input.merge_tool_results(output, tool_results);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Key design decisions in this pipeline:**
|
||||
|
||||
1. **Command dispatch happens first.** A `RunRequest` that carries both a command and input runs the command first (mutating `ctx`), then the input flows through the now-updated context. This lets `.role explain "tell me about X"` work as a single atomic operation — the role is activated, then the prompt is sent under the new role.
|
||||
|
||||
2. **Tool loop is iterative, not recursive.** Today both `start_directive` and `ask` recursively call themselves after tool results. The new `run_turn` uses a `loop` instead, which is cleaner, avoids stack growth on long tool chains, and makes cancellation handling simpler. Auto-continuation remains recursive because it's a full new turn with a new prompt, not just a tool-result continuation.
|
||||
|
||||
3. **Cancellation is checked at every await point.** `options.cancel: CancellationToken` is threaded into every async call. On cancellation, the engine emits `Event::Error(CoreError::Cancelled)` and returns. Today's `AbortSignal` pattern gets wrapped in a `CancellationToken` adapter during the migration.
|
||||
|
||||
4. **Session state hooks fire at the same points as today.** `before_chat_completion` and `after_chat_completion` continue to exist on `RequestContext`, called from the same places in the same order. The refactor doesn't change their semantics.
|
||||
|
||||
5. **Emitter errors don't abort the run.** If the emitter's output destination disconnects (client closes browser tab), the engine keeps running to completion so session state is correctly persisted, but it stops emitting events. The `EmitError::ClientDisconnected` case is special-cased to swallow subsequent emits. Session save + tool execution still happen.
|
||||
|
||||
---
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
This phase is structured as **extract, unify, rewrite frontends** — similar to Phase 1's facade pattern. The old functions stay in place until the new Engine is proven by tests and manual verification.
|
||||
|
||||
### Step 1: Create the core types
|
||||
|
||||
Add the new files without wiring them into anything:
|
||||
|
||||
- `src/engine/mod.rs` — module root
|
||||
- `src/engine/engine.rs` — `Engine` struct + `run` method (initially `unimplemented!()`)
|
||||
- `src/engine/request.rs` — `RunRequest`, `UserInput`, `RunOptions`, `ContinuationKind`, `RunOutcome`
|
||||
- `src/engine/command.rs` — `CoreCommand` enum + sub-enums
|
||||
- `src/engine/error.rs` — `CoreError` enum
|
||||
- `src/engine/emitter.rs` — `Emitter` trait + `Event` enum + `EmitError`
|
||||
- `src/engine/emitters/mod.rs` — emitter module
|
||||
- `src/engine/emitters/null.rs` — `NullEmitter` (test stub)
|
||||
- `src/engine/emitters/collecting.rs` — `CollectingEmitter` (test stub)
|
||||
- `src/engine/emitters/terminal.rs` — `TerminalEmitter` (initially `unimplemented!()`)
|
||||
|
||||
Register `pub mod engine;` in `src/main.rs`. Code compiles but nothing calls it yet.
|
||||
|
||||
**Verification:** `cargo check` clean, `cargo test` passes.
|
||||
|
||||
### Step 2: Implement `TerminalEmitter` against existing render code
|
||||
|
||||
Before wiring the engine, build the `TerminalEmitter` by wrapping today's `SseHandler` + `markdown_stream` + `raw_stream` + `MarkdownRender` + `Spinner` code. Don't change any of those modules — just construct a `TerminalEmitter` that holds the state they need and forwards `emit(Event::AssistantDelta(...))` into them.
|
||||
|
||||
```rust
|
||||
pub struct TerminalEmitter {
|
||||
render_state: Mutex<StreamRenderState>,
|
||||
options: TerminalEmitterOptions,
|
||||
}
|
||||
|
||||
pub struct TerminalEmitterOptions {
|
||||
pub highlight: bool,
|
||||
pub theme: Option<String>,
|
||||
pub verbose_tool_calls: bool,
|
||||
pub show_spinner: bool,
|
||||
}
|
||||
|
||||
impl TerminalEmitter {
|
||||
pub fn new_from_app(app: &AppState, working_mode: WorkingMode) -> Self { /* ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
Implement `Emitter` for it, mapping each `Event` variant to the appropriate crossterm operation:
|
||||
|
||||
| Event | TerminalEmitter action |
|
||||
|---|---|
|
||||
| `Started` | Start spinner |
|
||||
| `AssistantDelta(chunk)` | Stop spinner (if first), feed chunk into render state |
|
||||
| `AssistantMessageEnd { full_text }` | Flush render state, emit trailing newline |
|
||||
| `ToolCall { name, args }` | Print dimmed `⚙ Using <name>` banner if verbose |
|
||||
| `ToolResult { .. }` | Print dimmed result summary if verbose |
|
||||
| `AutoContinueTriggered` | Print yellow `⟳ Continuing (N/M, R todos remaining)` to stderr |
|
||||
| `SessionCompressing` | Print `Compressing session...` to stderr |
|
||||
| `SessionCompressed` | Print `Session compressed.` to stderr |
|
||||
| `SessionAutonamed` | Print `Session auto-named: <name>` to stderr |
|
||||
| `Info(msg)` | Print to stdout |
|
||||
| `Warning(msg)` | Print yellow to stderr |
|
||||
| `Error(e)` | Print red to stderr |
|
||||
| `Finished` | No-op (ensures trailing newline is flushed) |
|
||||
|
||||
**Verification:** write integration tests that construct a `TerminalEmitter`, feed it a sequence of events manually, and compare captured stdout/stderr to golden outputs. Use `assert_cmd` or similar to snapshot the rendered output of each event variant.
|
||||
|
||||
### Step 3: Implement `Engine::run` without wiring it
|
||||
|
||||
Implement `Engine::run` and `Engine::run_turn` following the pseudocode above. Use the existing helper functions (`before_chat_completion`, `after_chat_completion`, `apply_prelude`, `create_client`, `call_chat_completions`, `call_chat_completions_streaming`, `maybe_compress_session`, `maybe_autoname_session`) unchanged, just called through `ctx` instead of `&GlobalConfig`.
|
||||
|
||||
**Implementing `dispatch_command`** is the largest sub-task here because it needs to match all 37 `CoreCommand` variants and invoke the right `ctx` methods. Most variants are straightforward one-liners that call a corresponding method on `RequestContext`. A few need special handling:
|
||||
|
||||
- `CoreCommand::UseRole { name, trailing_text }` — activate role, then if `trailing_text` is `Some`, the outer `run` will flow through with the trailing text as `UserInput.text`.
|
||||
- `CoreCommand::IncludeFiles` — reads files, converts to `FileInput` list, attaches to `ctx`'s next input (or fails if no input is provided).
|
||||
- `CoreCommand::StarterRun(id)` — looks up the starter text on the active agent, fails if no agent.
|
||||
- `CoreCommand::Macro` — delegates to `macro_execute`, which may itself call `Engine::run` internally for LLM-triggering macros.
|
||||
|
||||
**Verification:** write unit tests for `dispatch_command` using `NullEmitter`. Each test activates a command and asserts the expected state mutation on `ctx`. This is ~37 tests, one per variant, and they catch the bulk of regressions early.
|
||||
|
||||
Then write a handful of integration tests for `Engine::run` with `CollectingEmitter`, asserting the expected event sequence for:
|
||||
- Plain prompt, no tools, streaming
|
||||
- Plain prompt, no tools, non-streaming
|
||||
- Prompt that triggers 2 tool calls
|
||||
- Prompt that triggers auto-continuation (mock the LLM response)
|
||||
- Prompt on a session that crosses the compression threshold
|
||||
- Command-only request (`.info`)
|
||||
- Command + prompt request (`.role explain "..."`)
|
||||
|
||||
### Step 4: Wire CLI to `Engine::run`
|
||||
|
||||
Replace `main.rs::start_directive` with a thin wrapper:
|
||||
|
||||
```rust
|
||||
async fn start_directive(
|
||||
app: Arc<AppState>,
|
||||
ctx: &mut RequestContext,
|
||||
input_text: String,
|
||||
files: Vec<String>,
|
||||
code_mode: bool,
|
||||
) -> Result<()> {
|
||||
let engine = Engine::new(app.clone());
|
||||
let emitter = TerminalEmitter::new_from_app(&app, WorkingMode::Cmd);
|
||||
|
||||
let req = RunRequest {
|
||||
input: Some(UserInput::from_text_and_files(input_text, files)),
|
||||
command: None,
|
||||
options: {
|
||||
let mut o = RunOptions::cli();
|
||||
o.extract_code = code_mode && !*IS_STDOUT_TERMINAL;
|
||||
o
|
||||
},
|
||||
};
|
||||
|
||||
match engine.run(ctx, req, &emitter).await {
|
||||
Ok(_outcome) => Ok(()),
|
||||
Err(CoreError::Cancelled) => Ok(()),
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Verification:** manual smoke test. Run `loki "hello"`, `loki --code "write a rust hello world"`, `loki --role explain "what is TCP"`. All should produce identical output to before the change.
|
||||
|
||||
### Step 5: Wire REPL to `Engine::run`
|
||||
|
||||
Replace `repl/mod.rs::ask` with a wrapper that calls the engine. The REPL's outer loop that reads lines and calls `run_repl_command` stays. `run_repl_command` for non-dot-command lines constructs a `RunRequest { input: Some(...), .. }` and calls `Engine::run`. Dot-commands get parsed into `CoreCommand` and called as `RunRequest { command: Some(...), input: None, .. }` (or with input if they carry trailing text).
|
||||
|
||||
```rust
|
||||
// In Repl:
|
||||
async fn handle_line(&mut self, line: &str) -> Result<()> {
|
||||
let req = if let Some(rest) = line.strip_prefix('.') {
|
||||
parse_dot_command_to_run_request(rest, &self.ctx)?
|
||||
} else {
|
||||
RunRequest {
|
||||
input: Some(UserInput::from_text(line.to_string())),
|
||||
command: None,
|
||||
options: RunOptions::repl_turn(),
|
||||
}
|
||||
};
|
||||
|
||||
match self.engine.run(&mut self.ctx, req, &self.emitter).await {
|
||||
Ok(_) => Ok(()),
|
||||
Err(CoreError::Cancelled) => Ok(()),
|
||||
Err(e) => {
|
||||
self.emitter.emit(Event::Error(&e)).await.ok();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Verification:** manual smoke test of the REPL. Run through a typical session:
|
||||
1. `loki` → REPL starts
|
||||
2. `hello` → plain prompt works
|
||||
3. `.role explain` → role activates
|
||||
4. `what is TCP` → responds under the role
|
||||
5. `.session` → session starts
|
||||
6. Several messages → conversation continues
|
||||
7. `.info session` → info prints
|
||||
8. `.compress session` → compression runs
|
||||
9. `.agent sisyphus` → agent activates with sub-agents
|
||||
10. `write a hello world in rust` → tool calls + output
|
||||
11. `.exit agent` → agent exits, previous session still active
|
||||
12. `.exit` → REPL exits
|
||||
|
||||
Every interaction should behave identically to pre-Phase-2. Any visual difference is a bug.
|
||||
|
||||
### Step 6: Delete the old `start_directive` and `ask`
|
||||
|
||||
Once CLI and REPL both route through `Engine::run` and all tests/smoke tests pass, delete the old function bodies. Remove any now-unused imports. Run `cargo check` and `cargo test`.
|
||||
|
||||
**Verification:** full test suite green, no dead code warnings.
|
||||
|
||||
### Step 7: Tidy and document
|
||||
|
||||
- Add rustdoc comments on `Engine`, `RunRequest`, `RunOptions`, `Emitter`, `Event`, `CoreCommand`, `CoreError`.
|
||||
- Add an `examples/` subdirectory under `src/engine/` showing how to call the engine with each emitter.
|
||||
- Update `docs/AGENTS.md` with a note that CLI now supports auto-continuation (since it's no longer a REPL-only feature).
|
||||
- Update `docs/REST-API-ARCHITECTURE.md` to remove any "in Phase 2" placeholders.
|
||||
|
||||
---
|
||||
|
||||
## Risks and Watch Items
|
||||
|
||||
| Risk | Severity | Mitigation |
|
||||
|---|---|---|
|
||||
| **Terminal rendering regressions** | High | Golden-file snapshot tests for every `Event` variant. Manual smoke tests across all common REPL flows. Keep `TerminalEmitter` as a thin wrapper — no logic changes in the render code itself. |
|
||||
| **Auto-continuation recursion limits** | Medium | The new `Engine::run` uses `Box::pin` for the auto-continuation recursive call. Verify with a mock LLM that `max_auto_continues = 100` doesn't blow the stack. |
|
||||
| **Cancellation during tool execution** | Medium | Tool execution currently uses `AbortSignal`; the new path uses `CancellationToken`. Write a shim that translates. Write a test that cancels mid-tool-call and verifies graceful cleanup (no orphaned subprocesses, no leaked file descriptors). |
|
||||
| **Command parsing fidelity** | Medium | The dot-command parser in today's REPL is hand-written and has edge cases. Port the parsing code verbatim into a dedicated `parse_dot_command_to_run_request` function with unit tests for every edge case found in today's code. |
|
||||
| **Macro execution recursion** | Medium | `.macro` can invoke LLM calls, which now go through `Engine::run`, which can invoke more macros. Verify there's a recursion depth limit or cycle detection; add one if missing. |
|
||||
| **Emitter error propagation** | Low | Emitter errors (ClientDisconnected) should NOT abort session save logic. Engine must continue executing after the first `EmitError::ClientDisconnected` — just stop emitting. Write a test that simulates a disconnected emitter mid-response and asserts the session is still correctly persisted. |
|
||||
| **Spinner interleaving with tool output** | Low | Today's spinner is tightly coupled to the stream handler. If the new order of operations fires a tool call before the spinner is stopped, you'll get garbled output. Test this specifically. |
|
||||
| **Feature flag: `auto_continue` in CLI** | Low | After Phase 2, CLI *could* support auto-continuation but it's not exposed. Decision: leave it off by default in `RunOptions::cli()`, add a `--auto-continue` flag in a separate follow-up if desired. Don't sneak behavior changes into this refactor. |
|
||||
|
||||
---
|
||||
|
||||
## What Phase 2 Does NOT Do
|
||||
|
||||
- **No new features.** Everything that worked before works the same way after.
|
||||
- **No API server.** `JsonEmitter` and `SseEmitter` are placeholders — Phase 4 implements them.
|
||||
- **No `SessionStore` abstraction.** That's Phase 3.
|
||||
- **No `ToolScope` unification.** That landed in Phase 1 Step 6.5.
|
||||
- **No changes to LLM client code.** `call_chat_completions` and `call_chat_completions_streaming` keep their existing signatures.
|
||||
- **No MCP factory pooling.** That's Phase 5.
|
||||
- **No dot-command syntax changes.** The REPL still accepts exactly the same dot-commands; they just parse into `CoreCommand` instead of being hand-dispatched in `run_repl_command`.
|
||||
|
||||
The sole goal of Phase 2 is: **extract the pipeline into Engine::run, route CLI and REPL through it, and prove via tests and smoke tests that nothing regressed.**
|
||||
|
||||
---
|
||||
|
||||
## Entry Criteria (from Phase 1)
|
||||
|
||||
Before starting Phase 2, Phase 1 must be complete:
|
||||
|
||||
- [ ] `GlobalConfig` type alias is removed
|
||||
- [ ] `AppState` and `RequestContext` are the only state holders
|
||||
- [ ] All 91 callsites in the original migration table have been updated
|
||||
- [ ] `cargo test` passes with no `Config`-based tests remaining
|
||||
- [ ] CLI and REPL manual smoke tests pass identically to pre-Phase-1
|
||||
|
||||
## Exit Criteria (Phase 2 complete)
|
||||
|
||||
- [ ] `src/engine/` module exists with Engine, Emitter, Event, CoreCommand, RunRequest, RunOutcome, CoreError
|
||||
- [ ] `TerminalEmitter` implemented and wrapping all existing render paths
|
||||
- [ ] `NullEmitter` and `CollectingEmitter` implemented
|
||||
- [ ] `start_directive` in main.rs is a thin wrapper around `Engine::run`
|
||||
- [ ] REPL's per-line handler routes through `Engine::run`
|
||||
- [ ] All 37 `CoreCommand` variants implemented with unit tests
|
||||
- [ ] Integration tests for the 7 engine scenarios listed in Step 3
|
||||
- [ ] Manual smoke tests for CLI and REPL match pre-Phase-2 behavior
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
- [ ] Phase 3 (SessionStore abstraction) can begin
|
||||
@@ -1,607 +0,0 @@
|
||||
# Phase 3 Implementation Plan: SessionStore Abstraction
|
||||
|
||||
## Overview
|
||||
|
||||
Phase 3 extracts session persistence behind a trait so that CLI, REPL, and the future API server all resolve sessions through the same interface. The file-based YAML storage that exists today remains the only implementation in Phase 3 — no database, no schema migration, no new on-disk format. What changes is that session identity becomes **UUID-primary with optional name-based aliases**, direct `std::fs::write` calls disappear from `Session::save()`, and concurrent access to the same session is properly serialized.
|
||||
|
||||
After Phase 3, Phase 4 (REST API) can plug in without touching any persistence code: `POST /v1/sessions` returns a UUID, subsequent requests address sessions by that UUID, and CLI/REPL users continue typing `.session my-project` without noticing the internal change.
|
||||
|
||||
**Estimated effort:** ~3–5 days
|
||||
**Risk:** Low. Storage semantics don't change; we're re-shaping the API surface around existing YAML files.
|
||||
**Depends on:** Phase 1 complete, Phase 2 complete (Engine needs to call through the new store, not raw `Session::load`).
|
||||
|
||||
---
|
||||
|
||||
## Why This Phase Exists
|
||||
|
||||
Today's `Session::load()` and `Session::save()` embed the file layout, the filename-is-the-identity assumption, and the absence of concurrency control directly in the type. Three things break when you try to run this in a multi-tenant server:
|
||||
|
||||
1. **No UUID identity.** Two API clients both start a "project" session and collide on the filename. You can't safely let clients name sessions freely.
|
||||
|
||||
2. **No concurrency control.** Two concurrent requests to the same session do `load → mutate → save` with no coordination. The later save clobbers the earlier one's changes.
|
||||
|
||||
3. **No abstraction seam.** Every callsite computes paths itself via `Config::session_file(name)` and calls `Session::load()` / `.save()` directly. There's no single place to swap in alternate storage, add caching, or instrument persistence.
|
||||
|
||||
Phase 3 fixes all three without breaking anything users currently do.
|
||||
|
||||
---
|
||||
|
||||
## The Architecture After Phase 3
|
||||
|
||||
```
|
||||
┌────────┐ ┌────────┐ ┌────────┐
|
||||
│ CLI │ │ REPL │ │ API │ (Phase 4)
|
||||
└───┬────┘ └───┬────┘ └───┬────┘
|
||||
└──────────┼──────────┘
|
||||
▼
|
||||
┌──────────────────────┐
|
||||
│ Engine │
|
||||
└──────────┬───────────┘
|
||||
▼
|
||||
┌──────────────────────┐
|
||||
│ SessionStore trait │
|
||||
└──────────┬───────────┘
|
||||
▼
|
||||
┌──────────────────────┐
|
||||
│ FileSessionStore │ (Phase 3: the only impl)
|
||||
│ — UUID primary │
|
||||
│ — name alias index │
|
||||
│ — per-session mutex │
|
||||
│ — atomic writes │
|
||||
└──────────┬───────────┘
|
||||
▼
|
||||
~/.config/loki/sessions/
|
||||
by-id/<uuid>/state.yaml
|
||||
by-name/<alias> → <uuid> (text file containing the UUID)
|
||||
agents/<agent>/sessions/
|
||||
by-id/<uuid>/state.yaml
|
||||
by-name/<alias> → <uuid>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
### `SessionId`
|
||||
|
||||
```rust
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)]
|
||||
pub struct SessionId(Uuid);
|
||||
|
||||
impl SessionId {
|
||||
pub fn new() -> Self { Self(Uuid::new_v4()) }
|
||||
pub fn as_uuid(&self) -> Uuid { self.0 }
|
||||
pub fn to_string(&self) -> String { self.0.to_string() }
|
||||
pub fn parse(s: &str) -> Result<Self, SessionIdError> { /* ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
UUID v4 by default. Newtype so we can't accidentally pass arbitrary strings where a session ID is expected, and so the on-disk format can evolve without breaking callers.
|
||||
|
||||
### `SessionAlias`
|
||||
|
||||
```rust
|
||||
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
|
||||
pub struct SessionAlias(String);
|
||||
|
||||
impl SessionAlias {
|
||||
pub fn new(s: impl Into<String>) -> Result<Self, AliasError>;
|
||||
pub fn as_str(&self) -> &str { &self.0 }
|
||||
}
|
||||
```
|
||||
|
||||
Wraps the human-readable names users type in `.session my-project`. Validation rejects path traversal (`..`), slashes, null bytes, and anything that would produce an invalid filename. This is the CLI/REPL compatibility layer — existing `sessions/my-project.yaml` files continue to work, the alias system just maps them to auto-generated UUIDs on first access.
|
||||
|
||||
### `SessionHandle`
|
||||
|
||||
```rust
|
||||
pub struct SessionHandle {
|
||||
id: SessionId,
|
||||
alias: Option<SessionAlias>,
|
||||
is_agent: Option<String>,
|
||||
state: Arc<tokio::sync::Mutex<Session>>,
|
||||
store: Arc<dyn SessionStore>,
|
||||
dirty: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl SessionHandle {
|
||||
pub fn id(&self) -> SessionId { self.id }
|
||||
pub fn alias(&self) -> Option<&SessionAlias> { self.alias.as_ref() }
|
||||
pub async fn lock(&self) -> SessionGuard<'_>;
|
||||
pub fn mark_dirty(&self);
|
||||
pub async fn save(&self) -> Result<(), StoreError>;
|
||||
pub async fn rename(&mut self, new_alias: SessionAlias) -> Result<(), StoreError>;
|
||||
}
|
||||
|
||||
pub struct SessionGuard<'a> {
|
||||
session: MutexGuard<'a, Session>,
|
||||
handle: &'a SessionHandle,
|
||||
}
|
||||
|
||||
impl SessionGuard<'_> {
|
||||
pub fn get(&self) -> &Session { &self.session }
|
||||
pub fn get_mut(&mut self) -> &mut Session {
|
||||
self.handle.mark_dirty();
|
||||
&mut self.session
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
A `SessionHandle` is what callers pass around. It wraps:
|
||||
- The stable `SessionId` (never changes after creation)
|
||||
- An optional `SessionAlias` (can be renamed; users see this in `.info session`)
|
||||
- An optional `is_agent` marker so the store knows which directory to read/write
|
||||
- A shared `Arc<Mutex<Session>>` that serializes access within the process
|
||||
- A backpointer to the store so `save()`, `rename()`, etc. work without the caller knowing the storage type
|
||||
- A dirty flag that auto-sets on `get_mut()` and clears after successful save
|
||||
|
||||
The `lock()` / `SessionGuard` pattern is important: it makes the "you must lock before touching state" rule compiler-enforced. Today's code mutates `Config.session` freely because the whole `Config` is behind an `RwLock`. After Phase 3, mutating a session requires going through `handle.lock().await.get_mut()`, which acquires the per-session mutex. Two concurrent requests to the same session serialize automatically.
|
||||
|
||||
### `SessionStore` trait
|
||||
|
||||
```rust
|
||||
#[async_trait]
|
||||
pub trait SessionStore: Send + Sync {
|
||||
/// Create a new session. If `alias` is provided, register it in the
|
||||
/// alias index. Fails with AliasInUse if the alias already exists.
|
||||
async fn create(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
alias: Option<SessionAlias>,
|
||||
initial: Session,
|
||||
) -> Result<SessionHandle, StoreError>;
|
||||
|
||||
/// Open an existing session by UUID.
|
||||
async fn open(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
id: SessionId,
|
||||
) -> Result<SessionHandle, StoreError>;
|
||||
|
||||
/// Open an existing session by alias, or create it if it doesn't exist.
|
||||
/// This is the CLI/REPL compatibility path.
|
||||
async fn open_or_create_by_alias(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
alias: SessionAlias,
|
||||
initial_factory: impl FnOnce() -> Session + Send,
|
||||
) -> Result<SessionHandle, StoreError>;
|
||||
|
||||
/// Resolve an alias to its UUID without loading the session.
|
||||
async fn resolve_alias(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
alias: &SessionAlias,
|
||||
) -> Result<Option<SessionId>, StoreError>;
|
||||
|
||||
/// Persist the current in-memory state of a handle back to storage.
|
||||
/// Atomically — no torn writes.
|
||||
async fn save(&self, handle: &SessionHandle) -> Result<(), StoreError>;
|
||||
|
||||
/// Rename a session's alias. The UUID and session state are unchanged.
|
||||
async fn rename(
|
||||
&self,
|
||||
handle: &SessionHandle,
|
||||
new_alias: SessionAlias,
|
||||
) -> Result<(), StoreError>;
|
||||
|
||||
/// Delete a session permanently. Both the state file and any alias
|
||||
/// pointing at it are removed.
|
||||
async fn delete(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
id: SessionId,
|
||||
) -> Result<(), StoreError>;
|
||||
|
||||
/// List all sessions in a scope (global or per-agent). Returns UUIDs
|
||||
/// paired with their aliases if any.
|
||||
async fn list(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
) -> Result<Vec<SessionMeta>, StoreError>;
|
||||
}
|
||||
|
||||
pub struct SessionMeta {
|
||||
pub id: SessionId,
|
||||
pub alias: Option<SessionAlias>,
|
||||
pub last_modified: SystemTime,
|
||||
pub is_autoname: bool,
|
||||
}
|
||||
|
||||
pub enum StoreError {
|
||||
NotFound { id: Option<SessionId>, alias: Option<String> },
|
||||
AliasInUse(String),
|
||||
InvalidAlias(String),
|
||||
Io(std::io::Error),
|
||||
Serde(serde_yaml::Error),
|
||||
Concurrent, // best-effort optimistic check
|
||||
Other(anyhow::Error),
|
||||
}
|
||||
```
|
||||
|
||||
### `FileSessionStore`
|
||||
|
||||
```rust
|
||||
pub struct FileSessionStore {
|
||||
root: PathBuf, // ~/.config/loki/
|
||||
agents_root: PathBuf, // ~/.config/loki/agents/
|
||||
handles: Mutex<HashMap<(Option<String>, SessionId), Weak<Mutex<Session>>>>,
|
||||
}
|
||||
```
|
||||
|
||||
The `handles` map is the in-process cache that enforces "one `Arc<Mutex<Session>>` per live session per process." If two callers `open()` the same session, they get two `SessionHandle`s pointing at the same underlying mutex, so their locks serialize. When the last handle drops, the weak ref fails on the next lookup and the store re-reads from disk.
|
||||
|
||||
---
|
||||
|
||||
## The On-Disk Layout
|
||||
|
||||
### New layout (Phase 3 target)
|
||||
|
||||
```
|
||||
~/.config/loki/sessions/
|
||||
by-id/
|
||||
<uuid>/
|
||||
state.yaml
|
||||
by-name/
|
||||
my-project → text file containing the UUID
|
||||
another-chat → text file containing the UUID
|
||||
```
|
||||
|
||||
Agent sessions mirror this inside each agent's directory:
|
||||
|
||||
```
|
||||
~/.config/loki/agents/sisyphus/sessions/
|
||||
by-id/
|
||||
<uuid>/
|
||||
state.yaml
|
||||
by-name/
|
||||
my-project → UUID
|
||||
```
|
||||
|
||||
### Backward compatibility
|
||||
|
||||
The migration is lazy and non-destructive. On `FileSessionStore` startup, we do NOT rewrite the directory. On the first `open_or_create_by_alias("my-project")` call, the store checks:
|
||||
|
||||
1. **New layout hit:** is there a `by-name/my-project` alias file? Read the UUID, open `by-id/<uuid>/state.yaml`.
|
||||
2. **Legacy layout hit:** is there a `sessions/my-project.yaml`? Generate a fresh UUID, create `by-id/<uuid>/state.yaml` from the legacy content (atomic copy), write `by-name/my-project` pointing to the new UUID, and leave the legacy file in place. The legacy file becomes stale but untouched.
|
||||
3. **Neither:** create fresh.
|
||||
|
||||
This means users upgrading from pre-Phase-3 builds never lose data, and they can downgrade during the migration window (their old files are still readable by the old code because we haven't deleted them). A `loki migrate sessions` command can later do a clean sweep to remove the legacy files — but that's an operational convenience, not a requirement of Phase 3.
|
||||
|
||||
**Deleting a migrated session** (the `.delete` REPL command) also deletes the legacy file if it still exists, so users don't see orphan entries in `list_sessions()`.
|
||||
|
||||
**Autoname temp sessions** (today: `sessions/_/20231201T123456-autoname.yaml`) map cleanly to the new layout — they get UUIDs like any other session, and their alias is the generated `20231201T123456-autoname` string. The `_/` prefix from today's path becomes a flag on `SessionMeta::is_autoname: true` set by the store when it recognizes the naming pattern during migration.
|
||||
|
||||
### Atomic writes
|
||||
|
||||
Today's `Session::save()` is `std::fs::write(path, yaml)` — if the process dies mid-write, you get a truncated YAML file that can't be loaded. The new `FileSessionStore::save()` uses the standard tempfile-and-rename pattern:
|
||||
|
||||
```rust
|
||||
async fn save(&self, handle: &SessionHandle) -> Result<(), StoreError> {
|
||||
let session = handle.state.lock().await;
|
||||
let yaml = serde_yaml::to_string(&*session)?;
|
||||
let target = self.state_path(handle.is_agent.as_deref(), handle.id);
|
||||
let tmp = target.with_extension("yaml.tmp");
|
||||
tokio::fs::write(&tmp, yaml).await?;
|
||||
tokio::fs::rename(&tmp, &target).await?;
|
||||
handle.dirty.store(false, Ordering::Release);
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
`rename` is atomic on POSIX filesystems and on Windows NTFS (via `MoveFileEx`). Either the old content or the new content is visible to readers; never a half-written file.
|
||||
|
||||
---
|
||||
|
||||
## Concurrency Model
|
||||
|
||||
Three layers, each with a clear responsibility:
|
||||
|
||||
1. **Process-level: per-session `Arc<Mutex<Session>>`.** Two handles to the same session share one mutex. Inside one process, concurrent access to the same session is serialized automatically. This is enough for CLI (single request) and REPL (single user, but multiple async tasks like background compression).
|
||||
|
||||
2. **Inter-process: filesystem rename atomicity.** Two separate Loki processes (unlikely today but possible for someone running CLI and REPL simultaneously on the same state) can't corrupt files because writes go through tempfile+rename. The later writer wins cleanly; the earlier writer's changes are lost but the file is always readable.
|
||||
|
||||
3. **Optimistic conflict detection (optional, Phase 5+):** If we later decide to add "you edited this session somewhere else, please reload" UX, we can add an `mtime` check on load/save and surface `StoreError::Concurrent` when the on-disk mtime doesn't match the value we read at `open()` time. This is deliberately not built in Phase 3 — it's a UX improvement for later, not a correctness requirement.
|
||||
|
||||
For Phase 3, layers 1 and 2 together are sufficient for everything up through "many concurrent API sessions, each addressing different UUIDs." The one gap they don't cover is "multiple API requests on the same session UUID at the same time" — but the per-session mutex in layer 1 handles that by serializing them, which is the desired behavior. The second request waits its turn and sees the first request's updates.
|
||||
|
||||
---
|
||||
|
||||
## Engine and Callsite Changes
|
||||
|
||||
### Before Phase 3
|
||||
|
||||
```rust
|
||||
// In REPL command handler:
|
||||
Config::use_session_safely(&config, Some("my-project"), abort_signal)?;
|
||||
// later:
|
||||
config.write().session.as_mut().unwrap().add_message(...);
|
||||
// later:
|
||||
Config::save_session_safely(&config, None)?;
|
||||
```
|
||||
|
||||
### After Phase 3
|
||||
|
||||
```rust
|
||||
// In CoreCommand::UseSession handler inside Engine::dispatch_command:
|
||||
let alias = SessionAlias::new("my-project")?;
|
||||
let handle = self.app.sessions.open_or_create_by_alias(
|
||||
ctx.agent_name(),
|
||||
alias,
|
||||
|| Session::new_default(ctx.model_id(), ctx.role_name()),
|
||||
).await?;
|
||||
ctx.session = Some(handle);
|
||||
|
||||
// later, during the chat loop:
|
||||
{
|
||||
let mut guard = handle.lock().await;
|
||||
guard.get_mut().add_message(input, output);
|
||||
}
|
||||
handle.save().await?; // fires when the turn completes
|
||||
```
|
||||
|
||||
The `RequestContext.session: Option<Session>` field becomes `RequestContext.session: Option<SessionHandle>`. All 13 session-touching callsites from the explore get rewritten to go through the handle instead of direct access.
|
||||
|
||||
### The 13 callsites and their new shapes
|
||||
|
||||
| Current location | Current call | New call |
|
||||
|---|---|---|
|
||||
| `Config::use_session` | `Session::load` or `Session::new` | `store.open_or_create_by_alias(...)` |
|
||||
| `Config::use_session_safely` | take/replace pattern on `config.session` | `ctx.session = Some(handle)` |
|
||||
| `Config::exit_session` | `session.exit()` (maybe saves) | `if ctx.session.dirty() { handle.save().await? }; ctx.session = None` |
|
||||
| `Config::empty_session` | `session.clear_messages()` | `handle.lock().await.get_mut().clear_messages()` |
|
||||
| `Config::save_session` | `session.save()` with name logic | `handle.rename(alias)?; handle.save().await?` |
|
||||
| `Config::compress_session` | mutates session, relies on dirty flag | `handle.lock().await.get_mut().compress(...)?; handle.save().await?` |
|
||||
| `Config::maybe_autoname_session` | spawns task, mutates session | same, but via handle |
|
||||
| `Config::delete` (kind="session") | `remove_file` on path | `store.delete(agent, id).await?` |
|
||||
| `Config::after_chat_completion` | `session.add_message(...)` | via handle |
|
||||
| `Config::apply_prelude` | may `use_session` | via store |
|
||||
| `Agent::init` / `use_agent` | may load agent session | via store, with `agent=Some(name)` |
|
||||
| `.session` REPL command | via `use_session_safely` | via store |
|
||||
| `.delete session` REPL command | via `Config::delete` | via store |
|
||||
|
||||
Most of these are one-liner changes since the store's API mirrors the semantics of today's methods. The subtle ones are:
|
||||
|
||||
- **`exit_session`** has "save if dirty and `save_session != Some(false)`" logic plus "prompt for name if temp session" UX. The prompt lives in the REPL layer (it calls `inquire::Text`), not in the store. After the refactor, the REPL reads the dirty flag from the handle, prompts for a name if needed, calls `handle.rename()` if the user provided one, then calls `handle.save()`.
|
||||
|
||||
- **`compress_session`** runs asynchronously today — it spawns a task that holds a clone of `GlobalConfig` and writes back via `config.write()`. After the refactor, the task holds an `Arc<SessionHandle>` and does `handle.lock().await.get_mut().compress(...)` followed by `handle.save().await`. The per-session mutex prevents the compression task from clobbering concurrent turn writes.
|
||||
|
||||
- **`maybe_autoname_session`** is the same story as compression: spawn task, mutate through handle, save through store.
|
||||
|
||||
---
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
### Step 1: Create the types without wiring
|
||||
|
||||
Add new files:
|
||||
|
||||
- `src/session/mod.rs` — module root
|
||||
- `src/session/id.rs` — `SessionId`, `SessionAlias`
|
||||
- `src/session/store.rs` — `SessionStore` trait, `StoreError`, `SessionMeta`
|
||||
- `src/session/handle.rs` — `SessionHandle`, `SessionGuard`
|
||||
- `src/session/file_store.rs` — `FileSessionStore` implementation
|
||||
|
||||
Move the existing `Session` struct from `src/config/session.rs` to `src/session/session.rs`. Keep the pub re-export at `src/config::Session` so no external callers break during the migration. The struct itself is unchanged — same fields, same YAML format, same methods. This is purely a module reorganization.
|
||||
|
||||
Register `pub mod session;` in `src/main.rs` and add `pub sessions: Arc<dyn SessionStore>` to `AppState`. Initialize it in `AppState::init()` with `FileSessionStore::new(config_dir)`.
|
||||
|
||||
**Verification:** `cargo check` clean, `cargo test` passes. Nothing uses the new types yet.
|
||||
|
||||
### Step 2: Implement `FileSessionStore` against the new layout
|
||||
|
||||
Build the file-based implementation:
|
||||
|
||||
- `state_path(agent, id) → ~/.config/loki/[agents/<agent>/]sessions/by-id/<uuid>/state.yaml`
|
||||
- `alias_path(agent, alias) → ~/.config/loki/[agents/<agent>/]sessions/by-name/<alias>`
|
||||
- `legacy_path(agent, alias) → ~/.config/loki/[agents/<agent>/]sessions/<alias>.yaml`
|
||||
|
||||
Implement `create`, `open`, `open_or_create_by_alias`, `resolve_alias`, `save`, `rename`, `delete`, `list`. The `open_or_create_by_alias` method is the most complex — it has the lazy-migration logic that checks new layout, then legacy layout, then falls through to creation.
|
||||
|
||||
**Unit tests for `FileSessionStore`:**
|
||||
- Create + open roundtrip
|
||||
- Create with alias + open_or_create_by_alias finds it
|
||||
- Lazy migration from legacy `.yaml` file
|
||||
- Delete removes both new and legacy paths
|
||||
- Rename updates alias index without touching state file
|
||||
- List returns both new-layout and legacy-layout sessions
|
||||
- Atomic write: kill the process mid-write (simulated by injected failure) and verify no torn YAML
|
||||
|
||||
These tests use `tempfile::TempDir` so they don't touch the real config directory.
|
||||
|
||||
**Verification:** Unit tests pass. `cargo check` clean.
|
||||
|
||||
### Step 3: Add `SessionHandle` and integrate with `RequestContext`
|
||||
|
||||
Change `RequestContext.session` from `Option<Session>` to `Option<SessionHandle>`. This is a mass rename across the codebase — every callsite that does `ctx.session.as_ref()` needs to become `ctx.session.as_ref().map(|h| h.lock().await.get())` or similar.
|
||||
|
||||
The cleanest way to minimize the blast radius is to add a thin compatibility layer on `RequestContext`:
|
||||
|
||||
```rust
|
||||
impl RequestContext {
|
||||
pub async fn session_read<F, R>(&self, f: F) -> Option<R>
|
||||
where F: FnOnce(&Session) -> R {
|
||||
let handle = self.session.as_ref()?;
|
||||
let guard = handle.lock().await;
|
||||
Some(f(guard.get()))
|
||||
}
|
||||
|
||||
pub async fn session_write<F, R>(&mut self, f: F) -> Option<R>
|
||||
where F: FnOnce(&mut Session) -> R {
|
||||
let handle = self.session.as_ref()?;
|
||||
let mut guard = handle.lock().await;
|
||||
Some(f(guard.get_mut()))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Most callsites become `ctx.session_read(|s| s.model_id.clone()).await` or `ctx.session_write(|s| s.add_message(...)).await`. A few that need to hold the guard across await points (e.g., compression) use `handle.lock()` directly.
|
||||
|
||||
**Verification:** `cargo check` clean. Existing REPL functions still work because the old method names get forwarded through the compatibility helpers.
|
||||
|
||||
### Step 4: Rewrite the 13 session callsites to use the store
|
||||
|
||||
Go through each callsite in the inventory table and rewrite it:
|
||||
|
||||
1. `Config::use_session` → `Engine::dispatch_command` for `CoreCommand::UseSession`
|
||||
2. `Config::use_session_safely` → same, with extra ctx reset logic
|
||||
3. `Config::exit_session` → `Engine::dispatch_command` for `CoreCommand::ExitSession`
|
||||
4. ... and so on
|
||||
|
||||
Where possible, move the logic INTO `Engine::dispatch_command` rather than leaving it on `Config`. This is consistent with Phase 2's direction — core logic lives in the engine, not on state containers.
|
||||
|
||||
For each rewrite:
|
||||
- Delete the old method from `Config`
|
||||
- Add the new handler in `Engine::dispatch_command`
|
||||
- Update any callers that still reference the old method name
|
||||
- Run `cargo check` after each file to catch issues incrementally
|
||||
|
||||
**Verification:** After each rewrite, `cargo check` + the relevant integration tests from Phase 2. The Phase 2 `CollectingEmitter` tests for session-touching scenarios are especially important here — they're the regression net.
|
||||
|
||||
### Step 5: Remove the compatibility helpers from `RequestContext`
|
||||
|
||||
Once all 13 callsites are rewritten, the `session_read` / `session_write` helpers are only used by the old session methods we just deleted. Remove them. Any remaining compile errors point at callsites we missed.
|
||||
|
||||
**Verification:** `cargo check` clean, all of Phase 2's tests still pass, plus the new `FileSessionStore` unit tests.
|
||||
|
||||
### Step 6: Add the integration tests for concurrent access
|
||||
|
||||
These are the tests that prove Phase 3 actually solved the concurrency problem:
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn concurrent_opens_share_one_mutex() {
|
||||
let store = FileSessionStore::new(tempdir);
|
||||
let id = SessionId::new();
|
||||
// ... create initial session ...
|
||||
|
||||
let h1 = store.open(None, id).await.unwrap();
|
||||
let h2 = store.open(None, id).await.unwrap();
|
||||
|
||||
// Both handles should point at the same Arc<Mutex<Session>>
|
||||
let lock1 = h1.lock().await;
|
||||
// Try to lock h2 — should block
|
||||
let try_lock = tokio::time::timeout(
|
||||
Duration::from_millis(50),
|
||||
h2.lock(),
|
||||
).await;
|
||||
assert!(try_lock.is_err(), "h2 should block while h1 holds the lock");
|
||||
drop(lock1);
|
||||
let _lock2 = h2.lock().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn concurrent_writes_serialize_without_loss() {
|
||||
let store = Arc::new(FileSessionStore::new(tempdir));
|
||||
let id = create_initial_session(&store).await;
|
||||
|
||||
let tasks: Vec<_> = (0..100).map(|i| {
|
||||
let store = store.clone();
|
||||
tokio::spawn(async move {
|
||||
let handle = store.open(None, id).await.unwrap();
|
||||
{
|
||||
let mut guard = handle.lock().await;
|
||||
guard.get_mut().add_message(
|
||||
Input::from_str(format!("msg-{i}")),
|
||||
format!("reply-{i}"),
|
||||
);
|
||||
}
|
||||
handle.save().await.unwrap();
|
||||
})
|
||||
}).collect();
|
||||
|
||||
for t in tasks { t.await.unwrap(); }
|
||||
|
||||
let handle = store.open(None, id).await.unwrap();
|
||||
let guard = handle.lock().await;
|
||||
assert_eq!(guard.get().messages.len(), 200); // 100 user + 100 assistant
|
||||
}
|
||||
```
|
||||
|
||||
The second test specifically verifies that the per-session mutex serialization prevents lost updates — the flaw in today's code.
|
||||
|
||||
**Verification:** Both tests pass. `cargo test` green overall.
|
||||
|
||||
### Step 7: Legacy migration smoke test
|
||||
|
||||
Copy a real user's `sessions/my-project.yaml` file into a test fixture directory. Run `FileSessionStore::open_or_create_by_alias("my-project")` and assert:
|
||||
|
||||
- A new `by-id/<uuid>/state.yaml` exists with identical content
|
||||
- A new `by-name/my-project` file exists containing the UUID
|
||||
- The original `sessions/my-project.yaml` is still there, untouched
|
||||
- A second `open_or_create_by_alias("my-project")` call reuses the same UUID (idempotent)
|
||||
|
||||
**Verification:** Test passes with real fixture data including a session that has compressed messages and agent variables.
|
||||
|
||||
### Step 8: Manual smoke test
|
||||
|
||||
Run through a full REPL session covering every session-touching command:
|
||||
|
||||
1. `loki` → REPL starts, `.session foo` → new session created, check `by-id/` and `by-name/foo` exist
|
||||
2. Several messages → check `state.yaml` updates atomically
|
||||
3. `.save session bar` → check alias renamed, UUID unchanged
|
||||
4. `.empty session` → messages cleared, file still exists
|
||||
5. `.exit session` → session closed
|
||||
6. `loki --session bar` from command line → same UUID resumes
|
||||
7. `.delete` then choose session → both new and legacy files gone
|
||||
8. Agent with `.agent sisyphus my-work` → agent-scoped session in `agents/sisyphus/sessions/`
|
||||
9. Auto-continuation in an agent → compression fires, concurrent writes serialize cleanly
|
||||
|
||||
Every interaction should behave identically to pre-Phase-3.
|
||||
|
||||
---
|
||||
|
||||
## Risks and Watch Items
|
||||
|
||||
| Risk | Severity | Mitigation |
|
||||
|---|---|---|
|
||||
| **Legacy file discovery** | Medium | The migration path must handle every legacy layout: `sessions/<name>.yaml`, `sessions/_/<timestamp>-<autoname>.yaml`, and agent-scoped `agents/<agent>/sessions/<name>.yaml`. Write a fixture test for each variant. |
|
||||
| **Alias collisions during migration** | Medium | If two processes simultaneously migrate the same legacy session, they could create two different UUIDs. Mitigation: the `open_or_create_by_alias` path should acquire a file lock on the alias file itself during creation, not just rely on the store's in-memory map. |
|
||||
| **`RequestContext.session` type change blast radius** | Medium | Using the compatibility helpers (`session_read` / `session_write`) in Step 3 contains the blast radius. Only remove them in Step 5 once everything compiles. |
|
||||
| **Session::save deadlock via re-entry** | Medium | If `Session::compress()` or `add_message()` internally trigger anything that tries to re-lock the session's mutex, we get a deadlock. Audit every `Session` method called inside a `guard.get_mut()` scope to make sure none of them take the lock again. Document the invariant in `SessionHandle` rustdoc. |
|
||||
| **Tempfile cleanup on crash** | Low | If the process dies after writing `.yaml.tmp` but before the rename, we leave a stray file. On startup, `FileSessionStore::new` should sweep `by-id/*/state.yaml.tmp` files and remove them. |
|
||||
| **Alias index corruption** | Low | If `by-name/foo` contains garbage (not a valid UUID), treat it as a missing alias and log a warning. Don't crash the process. |
|
||||
| **Serde compatibility with old files** | Low | The `Session` struct's serde shape doesn't change in Phase 3, so old YAML files deserialize identically. Verify with a fixture test that includes every optional field set. |
|
||||
| **CLI `--session <uuid>` vs `--session <alias>` ambiguity** | Low | `SessionId::parse` recognizes UUID format; fall back to treating the argument as an alias if parsing fails. Document in `--help`. |
|
||||
| **Concurrent delete while handle held** | Low | If one task is using a handle while another deletes the session, the first task's save will fail (file missing). This is acceptable behavior — log a warning and return `StoreError::NotFound`. Tests should cover this. |
|
||||
|
||||
---
|
||||
|
||||
## What Phase 3 Does NOT Do
|
||||
|
||||
- **No schema migration.** YAML format stays identical. `Session` struct unchanged.
|
||||
- **No database.** `FileSessionStore` is the only implementation.
|
||||
- **No session TTL / eviction.** Sessions live until explicitly deleted.
|
||||
- **No cross-process locking.** Two Loki processes can still race, but writes are atomic so files never corrupt.
|
||||
- **No session encryption.** Vault handles secrets; sessions are plain YAML.
|
||||
- **No session sharing between users.** Each process has its own config directory.
|
||||
- **No optimistic concurrency (mtime check).** Deferred to Phase 5+ as a UX enhancement.
|
||||
- **No session versioning / rollback.** Deferred.
|
||||
- **No changes to `Session::build_messages()`, compression logic, or autoname generation.** The behaviors that read/mutate `Session` stay the same — only how they're reached changes.
|
||||
|
||||
The sole goal of Phase 3 is: **route all session persistence through a `SessionStore` trait with UUID-primary identity, lazy migration from the legacy layout, per-session mutex serialization, and atomic writes.**
|
||||
|
||||
---
|
||||
|
||||
## Entry Criteria (from Phase 2)
|
||||
|
||||
- [ ] `Engine::run` is the only path to the LLM pipeline
|
||||
- [ ] `CoreCommand::UseSession`, `ExitSession`, `EmptySession`, `CompressSession`, `SaveSession`, `EditSession` are all implemented and tested
|
||||
- [ ] `CollectingEmitter` integration tests cover session-touching scenarios
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
- [ ] CLI and REPL manual smoke tests match pre-Phase-2 behavior
|
||||
|
||||
## Exit Criteria (Phase 3 complete)
|
||||
|
||||
- [ ] `src/session/` module exists with `SessionStore` trait, `FileSessionStore`, `SessionId`, `SessionAlias`, `SessionHandle`, `SessionGuard`
|
||||
- [ ] `AppState.sessions: Arc<dyn SessionStore>` is wired in
|
||||
- [ ] `RequestContext.session: Option<SessionHandle>` (not `Option<Session>`)
|
||||
- [ ] All 13 session callsites go through the store; no direct `Session::load` or `Session::save` calls remain outside `FileSessionStore`
|
||||
- [ ] Legacy layout files are lazily migrated on first access
|
||||
- [ ] New layout (`by-id/<uuid>/state.yaml` + `by-name/<alias>`) is the canonical on-disk format for all new sessions
|
||||
- [ ] Atomic writes via tempfile+rename
|
||||
- [ ] Per-session mutex serialization verified by concurrent-write integration tests
|
||||
- [ ] Legacy fixture test passes (existing user data still loads)
|
||||
- [ ] Full REPL smoke test covers every session command
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
- [ ] Phase 4 (REST API) can address sessions by UUID without touching persistence code
|
||||
@@ -1,824 +0,0 @@
|
||||
# Phase 4 Implementation Plan: REST API Server
|
||||
|
||||
## Overview
|
||||
|
||||
Phase 4 introduces a `--serve` mode that starts an HTTP server exposing Loki's functionality as a RESTful API. The server is a thin axum layer on top of `Engine::run()` — most of the work is mapping HTTP requests into `RunRequest`s, mapping `Emitter` events into JSON or Server-Sent Events, and providing baseline auth, cancellation, and graceful shutdown. By the end of this phase, Loki can run as a backend service that multiple clients can talk to simultaneously, each with their own session.
|
||||
|
||||
**Estimated effort:** ~1–2 weeks
|
||||
**Risk:** Low–medium. The core pipeline (Engine) is unchanged; the risk is in the HTTP layer's correctness around streaming, cancellation, and concurrent session handling.
|
||||
**Depends on:** Phases 1–3 complete. `SessionStore` with UUID identity, `Engine::run()` as the pipeline entrypoint, `Emitter` trait with working `TerminalEmitter` + `CollectingEmitter`.
|
||||
|
||||
---
|
||||
|
||||
## Why Phase 4 Exists
|
||||
|
||||
After Phase 3, everything the API server needs is already in place:
|
||||
- `AppState` is a clonable `Arc` holding global services, safe to share across concurrent HTTP handlers.
|
||||
- `RequestContext` is per-request mutable state with no hidden global singletons.
|
||||
- `Engine::run()` is the single pipeline entrypoint that works for any frontend.
|
||||
- `SessionStore` serves sessions by UUID with per-session mutex serialization.
|
||||
- `Emitter` trait decouples output from destination.
|
||||
|
||||
What's missing is the last mile: accepting HTTP requests, routing them to `Engine::run()`, and turning `Event`s into HTTP responses. This phase builds exactly that.
|
||||
|
||||
The mental model is "Loki as a backend service." A frontend developer should be able to `curl -X POST http://localhost:3400/v1/completions -d '{"prompt":"hello"}'` and get a sensible response. A JavaScript app should be able to open an EventSource to `/v1/sessions/:id/completions?stream=true` and get live token streaming. An automation script should be able to maintain session state across many requests by passing back the same session UUID.
|
||||
|
||||
---
|
||||
|
||||
## The Architecture After Phase 4
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ loki --serve --port 3400 │
|
||||
│ ┌───────────────────────────────────────┐ │
|
||||
│ │ axum Router │ │
|
||||
│ │ ┌─────────────┐ ┌────────────────┐ │ │
|
||||
│ │ │ Middleware│ │ Handlers │ │ │
|
||||
│ │ │ - Auth │ │ /v1/* │ │ │
|
||||
│ │ │ - Trace │ │ │ │ │
|
||||
│ │ │ - CORS │ │ │ │ │
|
||||
│ │ │ - Limit │ │ │ │ │
|
||||
│ │ └──────┬──────┘ └────────┬───────┘ │ │
|
||||
│ └─────────┼──────────────────┼──────────┘ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌───────────────────────────────────┐ │
|
||||
│ │ Arc<AppState> (shared) │ │
|
||||
│ └────────────────┬──────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────────────┐ │
|
||||
│ │ Per-request RequestContext + │ │
|
||||
│ │ JsonEmitter or SseEmitter │ │
|
||||
│ └────────────────┬──────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────────────┐ │
|
||||
│ │ Engine::run() │ │
|
||||
│ └───────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API Surface
|
||||
|
||||
### Versioning
|
||||
|
||||
All endpoints live under `/v1/`. The version prefix lets us ship breaking changes later without breaking existing clients. `/v2/` endpoints can coexist with `/v1/` indefinitely.
|
||||
|
||||
### Endpoint summary
|
||||
|
||||
```
|
||||
Authentication
|
||||
POST /v1/auth/check # validate API key, returns subject info
|
||||
|
||||
Metadata
|
||||
GET /v1/models # list available LLM models
|
||||
GET /v1/agents # list installed agents
|
||||
GET /v1/roles # list installed roles
|
||||
GET /v1/rags # list standalone RAGs
|
||||
GET /v1/info # server build info, health
|
||||
|
||||
One-shot completions
|
||||
POST /v1/completions # stateless completion (no session)
|
||||
|
||||
Sessions
|
||||
POST /v1/sessions # create a new session (returns UUID)
|
||||
GET /v1/sessions # list sessions visible to this caller
|
||||
GET /v1/sessions/:id # get session metadata + message history
|
||||
DELETE /v1/sessions/:id # delete a session
|
||||
POST /v1/sessions/:id/completions # send a prompt into a session
|
||||
POST /v1/sessions/:id/compress # manually trigger compression
|
||||
POST /v1/sessions/:id/empty # clear messages (keep session record)
|
||||
|
||||
Role attachment
|
||||
POST /v1/sessions/:id/role # activate role on session
|
||||
DELETE /v1/sessions/:id/role # detach role
|
||||
|
||||
Agent attachment
|
||||
POST /v1/sessions/:id/agent # activate agent on session
|
||||
DELETE /v1/sessions/:id/agent # deactivate agent
|
||||
|
||||
RAG attachment
|
||||
POST /v1/sessions/:id/rag # attach standalone RAG
|
||||
DELETE /v1/sessions/:id/rag # detach RAG
|
||||
POST /v1/rags/:name/rebuild # rebuild a RAG index
|
||||
```
|
||||
|
||||
### Request/response shapes
|
||||
|
||||
**One-shot completion:**
|
||||
|
||||
```
|
||||
POST /v1/completions
|
||||
Content-Type: application/json
|
||||
Authorization: Bearer <api-key>
|
||||
|
||||
{
|
||||
"prompt": "Explain TCP handshake",
|
||||
"model": "openai:gpt-4o", // optional: overrides default
|
||||
"role": "explain", // optional: apply role for this one request
|
||||
"agent": "oracle", // optional: run through an agent (no session retention)
|
||||
"stream": false, // optional: SSE vs JSON
|
||||
"files": [ // optional: file attachments
|
||||
{"path": "/abs/path/doc.pdf"},
|
||||
{"url": "https://example.com/x"}
|
||||
],
|
||||
"temperature": 0.7, // optional override
|
||||
"auto_continue": false // optional: enable agent auto-continuation
|
||||
}
|
||||
```
|
||||
|
||||
**Non-streaming response (default):**
|
||||
|
||||
```json
|
||||
{
|
||||
"request_id": "7a1b...",
|
||||
"session_id": null,
|
||||
"final_message": "The TCP handshake is a three-way protocol ...",
|
||||
"tool_calls": [
|
||||
{"id": "tc_1", "name": "web_search", "args": "...", "result": "...", "is_error": false}
|
||||
],
|
||||
"turns": 2,
|
||||
"compressed": false,
|
||||
"auto_continued": 0,
|
||||
"usage": {
|
||||
"input_tokens": 120,
|
||||
"output_tokens": 458
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Streaming response** (`Accept: text/event-stream` or `stream: true`):
|
||||
|
||||
```
|
||||
event: started
|
||||
data: {"request_id":"7a1b...","session_id":null}
|
||||
|
||||
event: assistant_delta
|
||||
data: {"text":"The TCP "}
|
||||
|
||||
event: assistant_delta
|
||||
data: {"text":"handshake is "}
|
||||
|
||||
event: tool_call
|
||||
data: {"id":"tc_1","name":"web_search","args":"..."}
|
||||
|
||||
event: tool_result
|
||||
data: {"id":"tc_1","name":"web_search","result":"...","is_error":false}
|
||||
|
||||
event: assistant_delta
|
||||
data: {"text":" a three-way protocol..."}
|
||||
|
||||
event: finished
|
||||
data: {"outcome":{"turns":2,"tool_calls":1,"compressed":false}}
|
||||
```
|
||||
|
||||
**Create session:**
|
||||
|
||||
```
|
||||
POST /v1/sessions
|
||||
|
||||
{
|
||||
"alias": "my-project", // optional; UUID-only if omitted
|
||||
"role": "explain", // optional: pre-attach a role
|
||||
"agent": "sisyphus", // optional: pre-attach an agent
|
||||
"rag": "mydocs", // optional: pre-attach a RAG
|
||||
"model": "openai:gpt-4o" // optional: pre-set model
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"alias": "my-project",
|
||||
"agent": "sisyphus",
|
||||
"role": "explain",
|
||||
"rag": "mydocs",
|
||||
"model": "openai:gpt-4o",
|
||||
"created_at": "2026-04-10T15:32:11Z"
|
||||
}
|
||||
```
|
||||
|
||||
**Session completion:**
|
||||
|
||||
```
|
||||
POST /v1/sessions/550e8400-.../completions
|
||||
|
||||
{
|
||||
"prompt": "what was the bug we found yesterday?",
|
||||
"stream": true,
|
||||
"auto_continue": true
|
||||
}
|
||||
```
|
||||
|
||||
Returns the same shape as `/v1/completions`, but with `session_id` populated and agent runtime state preserved across calls.
|
||||
|
||||
**Error responses** (standard across all endpoints):
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"code": "session_not_found",
|
||||
"message": "No session with id 550e8400-...",
|
||||
"request_id": "7a1b..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
HTTP status codes map from `CoreError::http_status()` (defined in Phase 2):
|
||||
- `InvalidRequest` → 400
|
||||
- `Unauthorized` → 401
|
||||
- `NotFound` → 404
|
||||
- `InvalidState` → 409 (expected state doesn't match)
|
||||
- `Cancelled` → 499 (client-closed request, borrowed from nginx)
|
||||
- `ProviderError` → 502 (upstream LLM failed)
|
||||
- `ToolError` → 500
|
||||
- `Other` → 500
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
### `ApiConfig`
|
||||
|
||||
```rust
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct ApiConfig {
|
||||
pub enabled: bool,
|
||||
pub listen_addr: SocketAddr,
|
||||
pub auth: AuthConfig,
|
||||
pub cors: CorsConfig,
|
||||
pub limits: LimitsConfig,
|
||||
pub request_timeout_seconds: u64,
|
||||
pub shutdown_grace_seconds: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub enum AuthConfig {
|
||||
Disabled, // dev only
|
||||
StaticKeys { keys: Vec<AuthKeyEntry> }, // simple key list
|
||||
// future: JwtIssuer { ... }, OAuthIntrospect { ... }
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct AuthKeyEntry {
|
||||
pub subject: String, // for logs
|
||||
pub key_hash: String, // bcrypt or argon2 hash
|
||||
pub scopes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct CorsConfig {
|
||||
pub allowed_origins: Vec<String>, // empty = no CORS
|
||||
pub allow_credentials: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct LimitsConfig {
|
||||
pub max_body_bytes: usize, // request body limit
|
||||
pub max_concurrent_requests: usize, // semaphore
|
||||
pub rate_limit_per_minute: Option<usize>, // optional per-subject
|
||||
}
|
||||
```
|
||||
|
||||
`ApiConfig` loads from `config.yaml` under a new top-level `api:` block. It's NOT part of `AppConfig` because it only matters in `--serve` mode; in CLI/REPL mode it's ignored.
|
||||
|
||||
```yaml
|
||||
# config.yaml
|
||||
api:
|
||||
enabled: false # false = --serve refuses to start without explicit enable
|
||||
listen_addr: "127.0.0.1:3400"
|
||||
auth:
|
||||
mode: StaticKeys
|
||||
keys:
|
||||
- subject: "alice"
|
||||
key_hash: "$argon2id$..."
|
||||
scopes: ["read", "write"]
|
||||
cors:
|
||||
allowed_origins: []
|
||||
allow_credentials: false
|
||||
limits:
|
||||
max_body_bytes: 1048576 # 1 MiB
|
||||
max_concurrent_requests: 64
|
||||
rate_limit_per_minute: null
|
||||
request_timeout_seconds: 300 # 5 minutes default
|
||||
shutdown_grace_seconds: 30
|
||||
```
|
||||
|
||||
### `ApiState`
|
||||
|
||||
```rust
|
||||
#[derive(Clone)]
|
||||
pub struct ApiState {
|
||||
pub app: Arc<AppState>,
|
||||
pub engine: Arc<Engine>,
|
||||
pub config: Arc<ApiConfig>,
|
||||
pub request_counter: Arc<AtomicU64>,
|
||||
pub active_requests: Arc<Semaphore>,
|
||||
}
|
||||
```
|
||||
|
||||
`ApiState` is the axum-friendly wrapper that every handler receives via the `State` extractor. It's clonable (cheap — all fields are `Arc` or atomic) and thread-safe. Handlers get a clone per request.
|
||||
|
||||
### `JsonEmitter`
|
||||
|
||||
Phase 2 promised `JsonEmitter` and `SseEmitter` as deferred deliverables. Phase 4 implements them.
|
||||
|
||||
```rust
|
||||
pub struct JsonEmitter {
|
||||
events: Mutex<Vec<OwnedEvent>>,
|
||||
tool_calls: Mutex<Vec<ToolCallRecord>>,
|
||||
final_message: Mutex<Option<String>>,
|
||||
outcome: Mutex<Option<RunOutcome>>,
|
||||
}
|
||||
|
||||
impl JsonEmitter {
|
||||
pub fn new() -> Self { /* ... */ }
|
||||
|
||||
/// Consume the emitter and return the JSON response body.
|
||||
pub fn into_response(self) -> serde_json::Value { /* ... */ }
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Emitter for JsonEmitter {
|
||||
async fn emit(&self, event: Event<'_>) -> Result<(), EmitError> {
|
||||
match event {
|
||||
Event::AssistantDelta(text) => { /* accumulate */ }
|
||||
Event::AssistantMessageEnd { full_text } => { /* set final_message */ }
|
||||
Event::ToolCall { .. } | Event::ToolResult { .. } => { /* record */ }
|
||||
Event::Finished { outcome } => { /* store */ }
|
||||
_ => { /* record as event */ }
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The non-streaming HTTP handler creates a `JsonEmitter`, calls `Engine::run`, and then calls `.into_response()` to get the final JSON body.
|
||||
|
||||
### `SseEmitter`
|
||||
|
||||
```rust
|
||||
pub struct SseEmitter {
|
||||
sender: mpsc::Sender<Result<axum::response::sse::Event, axum::Error>>,
|
||||
client_disconnected: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Emitter for SseEmitter {
|
||||
async fn emit(&self, event: Event<'_>) -> Result<(), EmitError> {
|
||||
if self.client_disconnected.load(Ordering::Relaxed) {
|
||||
return Err(EmitError::ClientDisconnected);
|
||||
}
|
||||
let sse_event = to_sse_event(&event)?;
|
||||
self.sender
|
||||
.send(Ok(sse_event))
|
||||
.await
|
||||
.map_err(|_| {
|
||||
self.client_disconnected.store(true, Ordering::Relaxed);
|
||||
EmitError::ClientDisconnected
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn to_sse_event(event: &Event<'_>) -> Result<axum::response::sse::Event, serde_json::Error> {
|
||||
let (name, data) = match event {
|
||||
Event::Started { .. } => ("started", serde_json::to_string(event)?),
|
||||
Event::AssistantDelta(text) => ("assistant_delta", json!({ "text": text }).to_string()),
|
||||
Event::AssistantMessageEnd { .. } => ("assistant_message_end", serde_json::to_string(event)?),
|
||||
Event::ToolCall { .. } => ("tool_call", serde_json::to_string(event)?),
|
||||
Event::ToolResult { .. } => ("tool_result", serde_json::to_string(event)?),
|
||||
Event::AutoContinueTriggered { .. } => ("auto_continue_triggered", serde_json::to_string(event)?),
|
||||
Event::SessionCompressing => ("session_compressing", "{}".to_string()),
|
||||
Event::SessionCompressed { .. } => ("session_compressed", serde_json::to_string(event)?),
|
||||
Event::SessionAutonamed(_) => ("session_autonamed", serde_json::to_string(event)?),
|
||||
Event::Info(msg) => ("info", json!({ "message": msg }).to_string()),
|
||||
Event::Warning(msg) => ("warning", json!({ "message": msg }).to_string()),
|
||||
Event::Error(err) => ("error", serde_json::to_string(err)?),
|
||||
Event::Finished { outcome } => ("finished", serde_json::to_string(outcome)?),
|
||||
};
|
||||
Ok(axum::response::sse::Event::default().event(name).data(data))
|
||||
}
|
||||
```
|
||||
|
||||
The streaming handler creates an mpsc channel, hands the sender half to an `SseEmitter`, and returns an `axum::response::sse::Sse` wrapping the receiver half. axum streams each event as it's emitted, with automatic flushing. If the client disconnects, the send fails, `client_disconnected` is set, and subsequent emits return `ClientDisconnected` — which the engine respects by continuing to completion without emitting further (Phase 2 designed this behavior in).
|
||||
|
||||
---
|
||||
|
||||
## Middleware Stack
|
||||
|
||||
The axum router wraps handlers in a layered middleware stack. Order matters because middleware is applied outside-in on requests, inside-out on responses.
|
||||
|
||||
```rust
|
||||
let router = Router::new()
|
||||
.route("/v1/auth/check", post(handlers::auth_check))
|
||||
.route("/v1/models", get(handlers::list_models))
|
||||
.route("/v1/agents", get(handlers::list_agents))
|
||||
.route("/v1/roles", get(handlers::list_roles))
|
||||
.route("/v1/rags", get(handlers::list_rags))
|
||||
.route("/v1/info", get(handlers::info))
|
||||
.route("/v1/completions", post(handlers::one_shot_completion))
|
||||
.route("/v1/sessions", post(handlers::create_session).get(handlers::list_sessions))
|
||||
.route("/v1/sessions/:id", get(handlers::get_session).delete(handlers::delete_session))
|
||||
.route("/v1/sessions/:id/completions", post(handlers::session_completion))
|
||||
.route("/v1/sessions/:id/compress", post(handlers::compress_session))
|
||||
.route("/v1/sessions/:id/empty", post(handlers::empty_session))
|
||||
.route("/v1/sessions/:id/role", post(handlers::set_role).delete(handlers::clear_role))
|
||||
.route("/v1/sessions/:id/agent", post(handlers::set_agent).delete(handlers::clear_agent))
|
||||
.route("/v1/sessions/:id/rag", post(handlers::set_rag).delete(handlers::clear_rag))
|
||||
.route("/v1/rags/:name/rebuild", post(handlers::rebuild_rag))
|
||||
.layer(middleware::from_fn_with_state(state.clone(), middleware::auth))
|
||||
.layer(middleware::from_fn(middleware::request_id))
|
||||
.layer(middleware::from_fn_with_state(state.clone(), middleware::concurrency_limit))
|
||||
.layer(middleware::from_fn(middleware::tracing))
|
||||
.layer(middleware::from_fn(middleware::error_handler))
|
||||
.layer(tower_http::timeout::TimeoutLayer::new(Duration::from_secs(
|
||||
state.config.request_timeout_seconds,
|
||||
)))
|
||||
.layer(tower_http::limit::RequestBodyLimitLayer::new(state.config.limits.max_body_bytes))
|
||||
.layer(cors_layer(&state.config.cors))
|
||||
.with_state(state);
|
||||
```
|
||||
|
||||
### Middleware responsibilities
|
||||
|
||||
**auth** — Validates `Authorization: Bearer <key>` header against the configured auth provider. Compares against stored hashes (bcrypt/argon2), never plaintext. On success, attaches an `AuthContext { subject, scopes }` to request extensions. On failure, returns 401 immediately without calling the handler. If `AuthConfig::Disabled`, synthesizes an `AuthContext { subject: "anonymous", scopes: vec!["*"] }` for local dev.
|
||||
|
||||
**request_id** — Generates a UUID request ID, attaches it to request extensions for downstream correlation, emits it as `X-Request-Id` in the response headers. Used by tracing and error handlers.
|
||||
|
||||
**concurrency_limit** — Acquires a permit from `state.active_requests` semaphore with a short timeout. If the server is saturated, returns 503 Service Unavailable immediately. This protects against runaway connection counts exhausting resources.
|
||||
|
||||
**tracing** — Wraps the request in a `tracing::Span` carrying the request ID, subject, method, path, and session ID if present. Every log line and every tool call emitted during the request carries this span context. Essential for debugging production issues.
|
||||
|
||||
**error_handler** — Catches `CoreError` from handler results and maps to proper HTTP responses using `CoreError::http_status()` and a JSON error body. Ensures no handler leaks an `anyhow::Error` or raw `?` into an axum 500.
|
||||
|
||||
**timeout** — Overall request deadline. After N seconds (default 300), the request is aborted. This is a backstop — the engine's per-request cancellation token is the primary cancellation mechanism.
|
||||
|
||||
**body limit** — Rejects requests larger than the configured max. Default 1 MiB is enough for prompts with several files attached; adjustable in config.
|
||||
|
||||
**cors** — Attaches `Access-Control-Allow-Origin` headers for cross-origin browsers. Empty allowed origins = no CORS headers emitted (safe default). `allow_credentials: true` enables cookie/auth forwarding.
|
||||
|
||||
### What's NOT in middleware
|
||||
|
||||
- **Rate limiting per subject** — deferred. The `rate_limit_per_minute` config option is wired through but the middleware is a stub in Phase 4. Real rate limiting with sliding windows lands in a follow-up.
|
||||
- **Request/response logging** — use the tracing middleware's output; don't add a separate HTTP log layer.
|
||||
- **Metrics** — deferred to Phase 4.5 (Prometheus endpoint). Phase 4 just exposes counters in `ApiState`.
|
||||
- **Content negotiation** — Phase 4 assumes JSON requests. `Accept: text/event-stream` is the only alternate content type we handle, and only on completion endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Handler Pattern
|
||||
|
||||
Every handler follows the same shape:
|
||||
|
||||
```rust
|
||||
pub async fn session_completion(
|
||||
State(state): State<ApiState>,
|
||||
Extension(auth): Extension<AuthContext>,
|
||||
Extension(request_id): Extension<Uuid>,
|
||||
Path(session_id): Path<String>,
|
||||
Json(req): Json<CompletionRequest>,
|
||||
) -> Result<Response, ApiError> {
|
||||
// 1. Parse domain types
|
||||
let session_id = SessionId::parse(&session_id)
|
||||
.map_err(|_| ApiError::bad_request("invalid session id"))?;
|
||||
|
||||
// 2. Open the session handle
|
||||
let handle = state.app.sessions.open(None, session_id).await
|
||||
.map_err(|e| match e {
|
||||
StoreError::NotFound { .. } => ApiError::not_found("session", &session_id.to_string()),
|
||||
other => ApiError::from(other),
|
||||
})?;
|
||||
|
||||
// 3. Build RequestContext from AppState + session
|
||||
let mut ctx = RequestContext::new(state.app.clone(), WorkingMode::Api);
|
||||
ctx.session = Some(handle);
|
||||
ctx.auth = Some(auth);
|
||||
|
||||
// 4. Build cancellation token that fires on client disconnect
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
// 5. Convert the HTTP request to a RunRequest
|
||||
let run_req = RunRequest {
|
||||
input: Some(UserInput::from_api(req.prompt, req.files)?),
|
||||
command: None,
|
||||
options: {
|
||||
let mut o = if req.session_active {
|
||||
RunOptions::api_session()
|
||||
} else {
|
||||
RunOptions::api_oneshot()
|
||||
};
|
||||
o.stream = req.stream;
|
||||
o.auto_continue = req.auto_continue.unwrap_or(false);
|
||||
o.cancel = cancel.clone();
|
||||
o
|
||||
},
|
||||
};
|
||||
|
||||
// 6. Branch on streaming vs JSON
|
||||
if req.stream {
|
||||
// Create SseEmitter + channel, spawn engine task, return Sse response
|
||||
let (tx, rx) = mpsc::channel(32);
|
||||
let emitter = SseEmitter::new(tx);
|
||||
let engine = state.engine.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let _ = engine.run(&mut ctx, run_req, &emitter).await;
|
||||
// Emitter Drop closes the channel; Sse stream ends naturally
|
||||
});
|
||||
|
||||
Ok(Sse::new(ReceiverStream::new(rx))
|
||||
.keep_alive(KeepAlive::default())
|
||||
.into_response())
|
||||
} else {
|
||||
// Use JsonEmitter synchronously, return JSON body
|
||||
let emitter = JsonEmitter::new();
|
||||
state.engine.run(&mut ctx, run_req, &emitter).await
|
||||
.map_err(ApiError::from)?;
|
||||
Ok(Json(emitter.into_response()).into_response())
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The streaming path spawns a background task because axum needs to return the `Response` (with the SSE stream) before the engine finishes its work. The task owns the `ctx` and `emitter`, runs to completion, and naturally terminates when the engine returns. The channel closing signals the end of the stream to axum.
|
||||
|
||||
The non-streaming path runs synchronously in the handler task because we need the full result before returning the response body.
|
||||
|
||||
---
|
||||
|
||||
## Cancellation and Client Disconnect
|
||||
|
||||
Two cancellation sources, one unified mechanism:
|
||||
|
||||
1. **Client disconnect during streaming.** axum signals this by dropping the SSE receiver. The next `SseEmitter::emit` call fails with `ClientDisconnected`, which the engine handles by stopping further emits but continuing to completion so session state is persisted correctly.
|
||||
|
||||
2. **Request timeout.** The outer tower timeout layer fires after N seconds, dropping the handler's future. This cancels any pending awaits in the engine, which propagates through tokio cancellation. Active tool calls (especially bash/python/typescript subprocesses) need to be killed cleanly — this is the same concern as Phase 2's Ctrl-C handling.
|
||||
|
||||
The engine's `CancellationToken` handles both cases uniformly. For streaming, the handler watches the SSE sender's `closed()` signal and triggers `cancel.cancel()` when the client goes away. For timeout, tower's dropped future causes the handler task to be aborted, which drops `cancel` and fires any `cancelled()` waiters in the engine.
|
||||
|
||||
```rust
|
||||
// Inside the streaming handler:
|
||||
let cancel_for_disconnect = cancel.clone();
|
||||
let send_tx = tx.clone();
|
||||
tokio::spawn(async move {
|
||||
send_tx.closed().await; // resolves when receiver drops
|
||||
cancel_for_disconnect.cancel();
|
||||
});
|
||||
```
|
||||
|
||||
**Tool call cancellation** is the interesting case. A running bash/python/typescript subprocess must be killed when `cancel` fires. The existing tool execution code uses `AbortSignal` from the `abort_on_ctrlc` crate; Phase 2's shim layer adapts it to `CancellationToken`. Phase 4 doesn't need to change this — it just needs to verify that the adapter is still firing correctly when cancellation comes from HTTP disconnect instead of Ctrl-C.
|
||||
|
||||
---
|
||||
|
||||
## Per-Request State Isolation
|
||||
|
||||
The critical correctness property: **two concurrent requests must not share mutable state.** The architecture from Phases 1–3 makes this structural rather than something we have to police:
|
||||
|
||||
- `AppState` is `Arc`-wrapped and contains only immutable config and shared services (vault, RAG cache, MCP factory, session store).
|
||||
- `RequestContext` is constructed fresh in each handler — two requests get two independent contexts.
|
||||
- `SessionHandle` uses per-session `Mutex` serialization — two concurrent requests on the *same* session wait their turn (by design).
|
||||
- `McpFactory` acquires handles via per-key sharing — two requests using the same MCP server share one process; two using different servers get independent processes.
|
||||
- `RagCache` shares `Arc<Rag>` via weak refs — same sharing property.
|
||||
|
||||
The one place where the architecture can't help us is **agent runtime isolation**. Two concurrent API requests on two different sessions, both running agents, must get two fully independent `AgentRuntime`s with their own supervisors, inboxes, todo lists, and escalation queues. Phase 1 Step 6.5 made this work by putting `AgentRuntime` on `RequestContext`, which is already per-request. Phase 4 just needs to verify nothing regresses.
|
||||
|
||||
**Integration test for this:** spin up 10 concurrent requests, each running a different agent with tools, and assert that each one gets its own tool call history, its own todo list, and its own eventual response. Use a mock LLM so the test is deterministic.
|
||||
|
||||
---
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
### Step 1: Add dependencies and scaffolding
|
||||
|
||||
Add to `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
axum = { version = "0.8", features = ["macros"] }
|
||||
tower = "0.5"
|
||||
tower-http = { version = "0.6", features = ["cors", "limit", "timeout", "trace"] }
|
||||
argon2 = "0.5"
|
||||
```
|
||||
|
||||
`hyper` is already present. `tokio-stream` for SSE.
|
||||
|
||||
Create module structure:
|
||||
|
||||
- `src/api/mod.rs` — module root, `serve()` entrypoint
|
||||
- `src/api/config.rs` — `ApiConfig`, `AuthConfig`, etc.
|
||||
- `src/api/state.rs` — `ApiState`
|
||||
- `src/api/auth.rs` — middleware + `AuthContext`
|
||||
- `src/api/middleware.rs` — other middlewares (request_id, tracing, concurrency_limit, error_handler)
|
||||
- `src/api/error.rs` — `ApiError` + conversion from `CoreError`
|
||||
- `src/api/emitters/json.rs` — `JsonEmitter`
|
||||
- `src/api/emitters/sse.rs` — `SseEmitter`
|
||||
- `src/api/handlers/mod.rs` — handler module root
|
||||
- `src/api/handlers/completions.rs` — one-shot and session completions
|
||||
- `src/api/handlers/sessions.rs` — session CRUD
|
||||
- `src/api/handlers/metadata.rs` — list models/agents/roles/rags
|
||||
- `src/api/handlers/scope.rs` — role/agent/rag attachment endpoints
|
||||
- `src/api/handlers/rag.rs` — rebuild endpoint
|
||||
|
||||
Register `pub mod api;` in `src/main.rs`. Add a `--serve` CLI flag that calls `api::serve(app_state).await`.
|
||||
|
||||
**Verification:** `cargo check` clean with empty handler stubs returning 501 Not Implemented.
|
||||
|
||||
### Step 2: Implement auth middleware and error handling
|
||||
|
||||
Build the auth middleware against `AuthConfig::StaticKeys` using argon2 for verification. Implement `ApiError` with `IntoResponse` that produces the JSON error body. Implement `From<CoreError>` for `ApiError` using `CoreError::http_status()` and `CoreError::message()` (add those methods to `CoreError` in Phase 2 if they don't exist yet; otherwise add here).
|
||||
|
||||
Write unit tests:
|
||||
- Valid key → handler runs, `AuthContext` is attached
|
||||
- Invalid key → 401
|
||||
- Missing key → 401
|
||||
- `AuthConfig::Disabled` → anonymous context synthesized
|
||||
|
||||
**Verification:** Auth tests pass. `curl -H "Authorization: Bearer <valid-key>" http://localhost:3400/v1/info` returns info; without the header returns 401.
|
||||
|
||||
### Step 3: Implement `JsonEmitter` and `SseEmitter`
|
||||
|
||||
Both are relatively mechanical. `JsonEmitter` accumulates events into a buffer and exposes `into_response()`. `SseEmitter` converts each event to an axum SSE frame and pushes into an mpsc channel.
|
||||
|
||||
Write unit tests using `NullEmitter` → feed a scripted sequence of events → assert the resulting JSON or SSE frames.
|
||||
|
||||
**Verification:** Both emitters have unit tests that drive a scripted `Event` sequence and compare to golden outputs.
|
||||
|
||||
### Step 4: Implement metadata handlers
|
||||
|
||||
Start with the easy endpoints: `GET /v1/models`, `/v1/agents`, `/v1/roles`, `/v1/rags`, `/v1/info`. These don't call the engine — they just read from `AppState` and return JSON.
|
||||
|
||||
**Verification:** `curl` each endpoint and inspect output. Write integration tests that spin up the router and hit each endpoint.
|
||||
|
||||
### Step 5: Implement session CRUD handlers
|
||||
|
||||
`POST /v1/sessions` creates via `SessionStore::create`. `GET /v1/sessions` lists via `SessionStore::list`. `GET /v1/sessions/:id` reads metadata + message history via `SessionStore::open` + handle lock. `DELETE /v1/sessions/:id` calls `SessionStore::delete`.
|
||||
|
||||
These handlers don't call the engine either. They're thin wrappers around `SessionStore`.
|
||||
|
||||
**Verification:** Create a session via POST, list it, read it, delete it, confirm 404 after delete. All through `curl`.
|
||||
|
||||
### Step 6: Implement one-shot completion handler
|
||||
|
||||
`POST /v1/completions` is the first engine-calling handler. It constructs a fresh `RequestContext` with no session, builds a `RunRequest` from the HTTP body, and calls `Engine::run` with either `JsonEmitter` or `SseEmitter` based on the `stream` flag.
|
||||
|
||||
This is where the streaming infrastructure first gets exercised end-to-end. Test both modes:
|
||||
|
||||
```bash
|
||||
# Non-streaming
|
||||
curl -X POST http://localhost:3400/v1/completions \
|
||||
-H "Authorization: Bearer <key>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"prompt":"hello"}'
|
||||
|
||||
# Streaming
|
||||
curl -N -X POST http://localhost:3400/v1/completions \
|
||||
-H "Authorization: Bearer <key>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Accept: text/event-stream" \
|
||||
-d '{"prompt":"hello","stream":true}'
|
||||
```
|
||||
|
||||
**Verification:** Both modes work with a real LLM. Disconnect the streaming client mid-response (Ctrl-C on curl) and verify the engine task gets cancelled cleanly — no orphaned MCP subprocesses, no hung tool executions.
|
||||
|
||||
### Step 7: Implement session completion handler
|
||||
|
||||
`POST /v1/sessions/:id/completions` is the same as one-shot but with a session attached. The handler calls `store.open(id)`, builds a context with `ctx.session = Some(handle)`, and proceeds as before. Session state is automatically persisted by the engine at the end of the turn.
|
||||
|
||||
Concurrent request test: spin up 10 concurrent `curl` commands all hitting the same session. Assert:
|
||||
- All 10 complete successfully
|
||||
- The session has 10 message pairs appended in some order (serialized by the per-session mutex)
|
||||
- No lost updates, no corrupted YAML
|
||||
|
||||
**Verification:** Concurrent test passes reliably. Run it 100 times in a loop to catch races.
|
||||
|
||||
### Step 8: Implement scope attachment handlers
|
||||
|
||||
`POST /v1/sessions/:id/role`, `/agent`, `/rag` and their `DELETE` counterparts. Each one opens the session handle, constructs a `RunRequest` with a `CoreCommand` variant (`UseRole`, `UseAgent`, `UseRag`), and calls the engine with no input — just the command. The engine dispatches the command, mutates state, and the session is persisted.
|
||||
|
||||
**Verification:** `POST /v1/sessions/<id>/role {"name":"explain"}` activates the role. Subsequent completion on the session uses the role. `DELETE /v1/sessions/<id>/role` clears it.
|
||||
|
||||
### Step 9: Implement miscellaneous handlers
|
||||
|
||||
`POST /v1/sessions/:id/compress`, `/empty`, `POST /v1/rags/:name/rebuild`. Same pattern: translate to `CoreCommand` and dispatch.
|
||||
|
||||
**Verification:** All endpoints respond correctly.
|
||||
|
||||
### Step 10: Graceful shutdown
|
||||
|
||||
axum's graceful shutdown requires a signal future. Wire it up:
|
||||
|
||||
```rust
|
||||
pub async fn serve(app: Arc<AppState>, config: ApiConfig) -> Result<()> {
|
||||
let state = ApiState::new(app, config);
|
||||
let router = build_router(state.clone());
|
||||
let listener = tokio::net::TcpListener::bind(state.config.listen_addr).await?;
|
||||
|
||||
let shutdown_signal = async {
|
||||
tokio::signal::ctrl_c().await.ok();
|
||||
info!("Received shutdown signal, draining requests...");
|
||||
};
|
||||
|
||||
axum::serve(listener, router)
|
||||
.with_graceful_shutdown(shutdown_signal)
|
||||
.await?;
|
||||
|
||||
info!("Draining active sessions...");
|
||||
tokio::time::timeout(
|
||||
Duration::from_secs(state.config.shutdown_grace_seconds),
|
||||
drain_active_requests(&state),
|
||||
).await.ok();
|
||||
|
||||
info!("Shutdown complete.");
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
`drain_active_requests` waits for the semaphore to return to full capacity, bounded by `shutdown_grace_seconds`. After the grace period, any remaining requests are force-cancelled.
|
||||
|
||||
**Verification:** Start server, send a long streaming request, hit Ctrl-C. The server should finish the in-flight request (up to the grace period) before exiting, not cut it off mid-stream.
|
||||
|
||||
### Step 11: Configuration loading and docs
|
||||
|
||||
Wire `ApiConfig` through `config.yaml` parsing. Add a default `api.enabled: false` so the server refuses to start without explicit opt-in. Document the config shape, endpoint schemas, and auth setup in `docs/REST-API-SERVER.md`.
|
||||
|
||||
**Verification:** Start with `api.enabled: false` → fatal error with helpful message. Start with `api.enabled: true` + no auth keys → fatal error demanding at least one key (unless `AuthConfig::Disabled` is explicit).
|
||||
|
||||
### Step 12: Integration test suite
|
||||
|
||||
Write a comprehensive integration test suite in `tests/api/` that exercises the full HTTP surface with a mock LLM:
|
||||
|
||||
- Auth: valid, invalid, missing, disabled
|
||||
- Metadata: list each resource type
|
||||
- Session lifecycle: create → list → read → delete
|
||||
- One-shot completion: JSON + SSE
|
||||
- Session completion: single + concurrent
|
||||
- Scope attachment: role, agent, rag (set + clear)
|
||||
- Cancellation: client disconnect mid-stream, timeout expiry
|
||||
- Graceful shutdown: in-flight requests complete within grace period
|
||||
- Concurrent sessions: 20 sessions, each with a few turns, all running at once
|
||||
|
||||
Use `reqwest` as the test client. Spin up the server on a random port per test. The mock LLM lives as a fake `Client` implementation that returns scripted responses.
|
||||
|
||||
**Verification:** All tests pass. CI runs them on every PR.
|
||||
|
||||
---
|
||||
|
||||
## Risks and Watch Items
|
||||
|
||||
| Risk | Severity | Mitigation |
|
||||
|---|---|---|
|
||||
| **SSE client disconnect detection lag** | High | The mpsc channel's `closed()` signal is the primary disconnect detector. Verify it fires within <1s of a real client disconnect. Add integration test with `reqwest` that opens a stream, sends a few events, drops the connection, and asserts the engine's cancellation token fires within 2s. |
|
||||
| **Concurrent session writes losing data** | High | Phase 3's per-session mutex handles this structurally. Verify with the 100-concurrent-writers integration test from Phase 3 adapted to hit the HTTP layer. |
|
||||
| **Orphaned tool subprocesses on timeout** | High | Tool execution must respect the cancellation token. Test: start a completion that triggers a bash tool running `sleep 60`, timeout at 5s, verify the `sleep` process is killed (not reparented to init). |
|
||||
| **Auth key storage** | High | Store argon2 hashes, never plaintext. Rotate via config reload (future). Log subject (not key) on every request. Audit: no `println!` of any part of the key anywhere. |
|
||||
| **Streaming body size growth** | Medium | A long session with many tool calls produces a lot of SSE frames. Verify the mpsc channel size (32) is enough; if not, backpressure causes the engine task to block on emit. Document in the emitter: `emit()` can await. |
|
||||
| **CORS misconfiguration** | Medium | Default to no CORS. Require explicit origin allowlist. Log warnings on wildcard usage. Browser-accessible deployments should use a reverse proxy to terminate CORS. |
|
||||
| **Auth bypass via malformed header** | Medium | Use axum's `Authorization` typed header extractor, not raw string parsing. Reject unknown schemes (only Bearer accepted). |
|
||||
| **Rate limit stub** | Low | Document that `rate_limit_per_minute` is not yet implemented. Add an issue for follow-up. Protect against DoS with `max_concurrent_requests` in the meantime. |
|
||||
| **Session metadata leak across users** | Low | `GET /v1/sessions` lists all sessions regardless of caller identity in Phase 4. Document this limitation: Phase 4's auth is coarse-grained (anyone with a valid key sees all sessions). Per-subject session ownership lands in a follow-up phase. Treat Phase 4 as single-tenant-per-key for now. |
|
||||
| **Body size abuse** | Low | `max_body_bytes` caps payload. File uploads (not yet supported) would need separate multipart handling. |
|
||||
| **Port binding failure** | Low | Fail fast with clear error if the configured port is in use or unreachable. Don't silently retry. |
|
||||
|
||||
---
|
||||
|
||||
## What Phase 4 Does NOT Do
|
||||
|
||||
- **No WebSocket support.** SSE is sufficient for server-to-client streaming; WebSockets would add bidirectional complexity we don't need. Client-to-server commands use regular HTTP POST.
|
||||
- **No multi-tenancy.** All sessions are visible to any authenticated caller. Per-subject session ownership is a follow-up.
|
||||
- **No rate limiting.** `rate_limit_per_minute` config exists but is a stub.
|
||||
- **No metrics endpoint.** Counters are in memory; Prometheus scraping lands later.
|
||||
- **No API versioning beyond `/v1/`.** Breaking changes would introduce `/v2/`.
|
||||
- **No JWT or OAuth.** Static API keys only. JWT introspection can extend `AuthConfig` later.
|
||||
- **No request signing.** Bearer tokens over HTTPS (users provide their own TLS termination via reverse proxy).
|
||||
- **No admin endpoints.** Server management (reload config, view metrics, kill sessions) is not exposed.
|
||||
- **No file upload.** File references in requests use absolute paths or URLs that the server fetches; no multipart uploads in Phase 4.
|
||||
- **No MCP tool exposure over API.** The API calls the engine, which runs tools internally. Direct "execute this tool" API endpoints don't exist and are not planned.
|
||||
|
||||
---
|
||||
|
||||
## Entry Criteria (from Phase 3)
|
||||
|
||||
- [ ] `SessionStore` trait is the only path to session persistence
|
||||
- [ ] `FileSessionStore` is wired into `AppState.sessions`
|
||||
- [ ] Concurrent-write integration test from Phase 3 passes
|
||||
- [ ] All session-touching callsites go through the store
|
||||
- [ ] `Engine::run` handles `RunOptions::api_oneshot()` and `RunOptions::api_session()` modes
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
|
||||
## Exit Criteria (Phase 4 complete)
|
||||
|
||||
- [ ] `--serve` flag starts an HTTP server on the configured port
|
||||
- [ ] `src/api/` module exists with all handlers, middleware, emitters
|
||||
- [ ] `JsonEmitter` and `SseEmitter` implemented and tested
|
||||
- [ ] Auth middleware validates argon2-hashed API keys
|
||||
- [ ] All 19 endpoints listed in the API surface are implemented and return sensible responses
|
||||
- [ ] Concurrent-session integration test passes (20 sessions, multiple turns, parallel)
|
||||
- [ ] Client disconnect during streaming triggers engine cancellation within 2s
|
||||
- [ ] Request timeout fires at the configured deadline
|
||||
- [ ] Graceful shutdown drains in-flight requests within the grace period
|
||||
- [ ] Tool subprocesses are killed on cancellation, not orphaned
|
||||
- [ ] `docs/REST-API-SERVER.md` documents config, endpoints, and auth setup
|
||||
- [ ] Full integration test suite in `tests/api/` passes
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
- [ ] Phase 5 (Tool Scope Pooling) can optimize the hot path without changing the API surface
|
||||
@@ -1,755 +0,0 @@
|
||||
# Phase 5 Implementation Plan: Tool Scope Pooling and Lifecycle
|
||||
|
||||
## Overview
|
||||
|
||||
Phase 5 turns the trivial no-pool `McpFactory` from Phase 1 Step 6.5 into a production-grade pooling layer with idle timeouts, a background reaper, health checks, and graceful shutdown integration. The architecture doesn't change — `McpFactory::acquire()` is still the only entry point, `Arc<McpServerHandle>` is still the reference type — but the factory now aggressively shares MCP subprocesses across scopes to keep warm-path latency near zero.
|
||||
|
||||
**Estimated effort:** ~1 week
|
||||
**Risk:** Medium. The pooling logic has subtle ordering concerns (handle Drop → idle pool vs teardown → reaper eviction). Get those wrong and you leak processes or double-free.
|
||||
**Depends on:** Phases 1–4 complete. Phase 4 is important because it's the first workload where pooling actually matters — CLI and REPL don't generate enough concurrent scope transitions to justify the complexity.
|
||||
|
||||
---
|
||||
|
||||
## Why Phase 5 Exists
|
||||
|
||||
After Phase 4 lands, the API server works correctly but has a performance problem: every API session activates its own MCP processes, and when the session closes, those processes tear down immediately. A realistic production workload — 20 concurrent users each sending a burst of requests — spawns and kills MCP subprocesses at an unsustainable rate. For servers like `github` that take 1–2 seconds to start (subprocess + stdio handshake + OAuth + `tools/list`), every API call adds visible cold-start latency.
|
||||
|
||||
The architectural framing for the fix was already designed in Phase 1 Step 6.5 and Phase 1's "MCP Lifecycle Policy" section:
|
||||
|
||||
1. **Layer 1: active Arc reference counting.** Already done in Phase 1. Scopes hold `Arc<McpServerHandle>`; the last drop triggers teardown.
|
||||
2. **Layer 2: idle grace period.** Not yet implemented. After the last Arc drops, the handle moves to an idle pool with a timestamp instead of tearing down. A background reaper evicts entries that have been idle past the configured threshold.
|
||||
3. **Acquisition order.** `acquire(key)` checks the active map first, then the idle pool (revival = zero latency), then spawns fresh.
|
||||
|
||||
Phase 5 implements Layer 2 + the reaper + the revival logic + the health check + graceful shutdown integration. No changes to the caller API. No changes to any other phase's code.
|
||||
|
||||
**This is a pure optimization phase.** Correctness is unchanged; only performance improves.
|
||||
|
||||
---
|
||||
|
||||
## The Architecture After Phase 5
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ McpFactory │
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────────┐ │
|
||||
│ │ active: │ │ idle: │ │
|
||||
│ │ HashMap<K, │ │ HashMap<K, │ │
|
||||
│ │ Weak<H>> │ │ IdleEntry> │ │
|
||||
│ └──────┬───────┘ └────────┬─────────┘ │
|
||||
│ │ │ │
|
||||
│ │ upgrade() │ remove() │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────────────────────────────┐ │
|
||||
│ │ acquire(key): │ │
|
||||
│ │ 1. Try active.upgrade() → share │ │
|
||||
│ │ 2. Try idle.remove() → revive │ │
|
||||
│ │ 3. Spawn fresh subprocess │ │
|
||||
│ └──────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌──────────────────────────────────────┐ │
|
||||
│ │ Background reaper (tokio::spawn): │ │
|
||||
│ │ every cleanup_interval: │ │
|
||||
│ │ walk idle, evict stale entries │ │
|
||||
│ │ (optional: health check) │ │
|
||||
│ └──────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────┘
|
||||
│
|
||||
│ Arc<McpServerHandle>
|
||||
▼
|
||||
┌────────────────────────┐
|
||||
│ scope's ToolScope │
|
||||
│ (CLI/REPL/API request)│
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
### `McpFactory` (expanded)
|
||||
|
||||
```rust
|
||||
pub struct McpFactory {
|
||||
active: Mutex<HashMap<McpServerKey, Weak<McpServerHandleInner>>>,
|
||||
idle: Mutex<HashMap<McpServerKey, IdleEntry>>,
|
||||
config: McpFactoryConfig,
|
||||
shutdown: Arc<AtomicBool>,
|
||||
reaper_handle: Mutex<Option<JoinHandle<()>>>,
|
||||
}
|
||||
|
||||
struct IdleEntry {
|
||||
handle: Arc<McpServerHandleInner>,
|
||||
idle_since: Instant,
|
||||
last_health_check: Option<Instant>,
|
||||
}
|
||||
|
||||
pub struct McpFactoryConfig {
|
||||
pub idle_timeout: Duration,
|
||||
pub cleanup_interval: Duration,
|
||||
pub max_idle_servers: Option<usize>,
|
||||
pub health_check: Option<HealthCheckPolicy>,
|
||||
}
|
||||
|
||||
pub struct HealthCheckPolicy {
|
||||
pub interval: Duration,
|
||||
pub timeout: Duration,
|
||||
pub on_failure: HealthFailureAction,
|
||||
}
|
||||
|
||||
pub enum HealthFailureAction {
|
||||
Evict,
|
||||
EvictAndLog,
|
||||
LogOnly,
|
||||
}
|
||||
```
|
||||
|
||||
The factory grows three new pieces of state compared to Phase 1's stub:
|
||||
|
||||
- **`idle` map** — stores handles that nobody currently owns but that we've decided to keep warm.
|
||||
- **`shutdown` flag** — tells the reaper to exit and prevents new inserts into `idle` during drain.
|
||||
- **`reaper_handle`** — the `JoinHandle` of the background task, awaited during graceful shutdown.
|
||||
|
||||
### `McpServerHandle` (refined)
|
||||
|
||||
Phase 1's `Arc<McpServerHandle>` becomes `Arc<McpServerHandleInner>`, and we add a `Drop` impl on the inner type that handles the "return to idle pool" logic:
|
||||
|
||||
```rust
|
||||
pub struct McpServerHandleInner {
|
||||
key: McpServerKey,
|
||||
service: RwLock<RunningService<RoleClient, ()>>,
|
||||
factory: Weak<McpFactory>,
|
||||
spawned_at: Instant,
|
||||
returning_to_pool: AtomicBool,
|
||||
}
|
||||
|
||||
impl Drop for McpServerHandleInner {
|
||||
fn drop(&mut self) {
|
||||
// If we're already returning to pool (revived from idle),
|
||||
// don't re-insert — the factory is handling it.
|
||||
if self.returning_to_pool.load(Ordering::Acquire) {
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(factory) = self.factory.upgrade() else {
|
||||
// Factory is gone — just let the service die via its own drop.
|
||||
return;
|
||||
};
|
||||
|
||||
if factory.shutdown.load(Ordering::Acquire) {
|
||||
// Shutting down — don't put it back in idle, just die.
|
||||
return;
|
||||
}
|
||||
|
||||
// Take ownership of self.service and move to idle pool.
|
||||
// This requires unsafe or a different ownership structure — see
|
||||
// "The Drop trick" section below.
|
||||
factory.return_to_idle(self);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**The Drop trick** — the issue is that `Drop::drop` can't actually move `self`'s fields out without `unsafe`, but we need to move the `RunningService` into the idle pool. The clean solution is to wrap the service in an `Option<RunningService>`:
|
||||
|
||||
```rust
|
||||
pub struct McpServerHandleInner {
|
||||
key: McpServerKey,
|
||||
service: Mutex<Option<RunningService<RoleClient, ()>>>, // Option so we can take() in Drop
|
||||
factory: Weak<McpFactory>,
|
||||
spawned_at: Instant,
|
||||
}
|
||||
|
||||
impl Drop for McpServerHandleInner {
|
||||
fn drop(&mut self) {
|
||||
let Some(factory) = self.factory.upgrade() else { return; };
|
||||
if factory.shutdown.load(Ordering::Acquire) { return; }
|
||||
|
||||
// Take the service out. After this, self.service is None.
|
||||
let service = match self.service.get_mut().take() {
|
||||
Some(s) => s,
|
||||
None => return, // Already taken — e.g., by shutdown drain.
|
||||
};
|
||||
|
||||
// Spawn a task to move it into the idle pool (can't await in Drop).
|
||||
let key = self.key.clone();
|
||||
let factory = factory.clone();
|
||||
tokio::spawn(async move {
|
||||
factory.accept_returning_handle(key, service).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This has the right shape but introduces a subtle race: the `tokio::spawn` inside `Drop` runs asynchronously, so if a new `acquire(key)` arrives between the Drop and the spawned task completing, it won't find the handle in `idle` yet and will spawn a fresh subprocess. That's acceptable — it's slightly wasteful but not incorrect, and the race window is microseconds.
|
||||
|
||||
An alternative that avoids the race: use a dedicated `return_tx: mpsc::UnboundedSender<ReturningHandle>` on the factory, push synchronously into it from Drop, and a single "idle manager" task owns the idle map. This is cleaner because the idle map only mutates from one task, but it adds a coordination point. **Recommendation: start with the `tokio::spawn` approach; switch to the mpsc pattern only if the race causes visible issues.**
|
||||
|
||||
### `McpServerHandle` (the public Arc wrapper)
|
||||
|
||||
```rust
|
||||
pub struct McpServerHandle(Arc<McpServerHandleInner>);
|
||||
|
||||
impl McpServerHandle {
|
||||
pub async fn call_tool(&self, tool: &str, args: Value) -> Result<ToolResult> {
|
||||
let guard = self.0.service.lock().await;
|
||||
let service = guard.as_ref().ok_or(McpError::HandleDrained)?;
|
||||
service.call_tool(tool, args).await
|
||||
}
|
||||
|
||||
pub async fn list_tools(&self) -> Result<Vec<ToolSpec>> {
|
||||
let guard = self.0.service.lock().await;
|
||||
let service = guard.as_ref().ok_or(McpError::HandleDrained)?;
|
||||
service.list_tools().await
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for McpServerHandle {
|
||||
fn clone(&self) -> Self { Self(self.0.clone()) }
|
||||
}
|
||||
```
|
||||
|
||||
Callers get a `McpServerHandle` (which is `Arc<Inner>` internally) from `acquire()`. Cloning is cheap. Dropping the last clone fires the `Drop` on `Inner`, which returns the underlying service to the idle pool or kills it.
|
||||
|
||||
---
|
||||
|
||||
## The `acquire` Path
|
||||
|
||||
Three cases in order:
|
||||
|
||||
```rust
|
||||
impl McpFactory {
|
||||
pub async fn acquire(&self, key: &McpServerKey) -> Result<McpServerHandle> {
|
||||
// Case 1: Active share
|
||||
{
|
||||
let active = self.active.lock();
|
||||
if let Some(weak) = active.get(key) {
|
||||
if let Some(inner) = weak.upgrade() {
|
||||
metrics::mcp_acquire_hit_active();
|
||||
return Ok(McpServerHandle(inner));
|
||||
}
|
||||
// Weak is dangling; let it fall through.
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2: Revive from idle
|
||||
{
|
||||
let mut idle = self.idle.lock();
|
||||
if let Some(entry) = idle.remove(key) {
|
||||
metrics::mcp_acquire_hit_idle(entry.idle_since.elapsed());
|
||||
let inner = self.revive_idle_entry(entry);
|
||||
// Re-register in active map.
|
||||
self.active.lock().insert(key.clone(), Arc::downgrade(&inner));
|
||||
return Ok(McpServerHandle(inner));
|
||||
}
|
||||
}
|
||||
|
||||
// Case 3: Spawn fresh
|
||||
metrics::mcp_acquire_miss();
|
||||
let inner = self.spawn_new(key).await?;
|
||||
self.active.lock().insert(key.clone(), Arc::downgrade(&inner));
|
||||
Ok(McpServerHandle(inner))
|
||||
}
|
||||
|
||||
fn revive_idle_entry(&self, entry: IdleEntry) -> Arc<McpServerHandleInner> {
|
||||
// Wrap the handle in a fresh Arc. The IdleEntry held an Arc; we're
|
||||
// just transferring ownership here.
|
||||
entry.handle
|
||||
}
|
||||
|
||||
async fn spawn_new(&self, key: &McpServerKey) -> Result<Arc<McpServerHandleInner>> {
|
||||
let spec = self.resolve_spec(key)?;
|
||||
let service = McpServer::start(&spec).await?;
|
||||
let inner = Arc::new(McpServerHandleInner {
|
||||
key: key.clone(),
|
||||
service: Mutex::new(Some(service)),
|
||||
factory: Arc::downgrade(&self.weak_self()),
|
||||
spawned_at: Instant::now(),
|
||||
});
|
||||
Ok(inner)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Concurrency in `acquire`:** the `active.lock()` critical section is short — just a hashmap lookup and maybe an insert. It never holds across an `.await`. The `idle.lock()` critical section is equally short. The `spawn_new` path is the expensive one (subprocess spawn + stdio handshake + `tools/list`), and it runs OUTSIDE any lock. This means two concurrent `acquire(key)` calls that both miss can both spawn fresh, producing two subprocesses for the same key briefly. Once both register themselves in `active`, the second insert clobbers the first, and the first handle's Drop returns it to the idle pool. The net effect is one "wasted" spawn per race, which is acceptable.
|
||||
|
||||
If you want to eliminate the race entirely, add a per-key `OnceCell`-style coordinator:
|
||||
|
||||
```rust
|
||||
pending: Mutex<HashMap<McpServerKey, broadcast::Receiver<Arc<McpServerHandleInner>>>>,
|
||||
```
|
||||
|
||||
A caller that misses both active and idle checks `pending` — if another task is already spawning, it subscribes to the broadcast and waits. The first spawner publishes the result. Clean but adds a layer of complexity. Start simple; add this if races become a problem in practice.
|
||||
|
||||
---
|
||||
|
||||
## The Reaper Task
|
||||
|
||||
```rust
|
||||
async fn reaper_loop(factory: Arc<McpFactory>) {
|
||||
let mut ticker = interval(factory.config.cleanup_interval);
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
|
||||
if factory.shutdown.load(Ordering::Acquire) {
|
||||
info!("Reaper exiting (shutdown requested)");
|
||||
return;
|
||||
}
|
||||
|
||||
factory.evict_stale_idle().await;
|
||||
|
||||
if let Some(policy) = &factory.config.health_check {
|
||||
factory.run_health_checks(policy).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl McpFactory {
|
||||
async fn evict_stale_idle(&self) {
|
||||
let now = Instant::now();
|
||||
let timeout = self.config.idle_timeout;
|
||||
|
||||
// Phase 1: collect stale keys while holding the lock briefly.
|
||||
let stale: Vec<McpServerKey> = {
|
||||
let idle = self.idle.lock();
|
||||
idle.iter()
|
||||
.filter(|(_, entry)| now.duration_since(entry.idle_since) >= timeout)
|
||||
.map(|(k, _)| k.clone())
|
||||
.collect()
|
||||
};
|
||||
|
||||
// Phase 2: remove them from the idle map and terminate.
|
||||
for key in stale {
|
||||
let entry = {
|
||||
let mut idle = self.idle.lock();
|
||||
idle.remove(&key)
|
||||
};
|
||||
if let Some(entry) = entry {
|
||||
self.terminate_idle_handle(entry).await;
|
||||
metrics::mcp_idle_evicted();
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: enforce max_idle_servers cap via LRU.
|
||||
if let Some(max) = self.config.max_idle_servers {
|
||||
self.enforce_max_idle(max).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn enforce_max_idle(&self, max: usize) {
|
||||
let victims: Vec<(McpServerKey, Instant)> = {
|
||||
let idle = self.idle.lock();
|
||||
if idle.len() <= max {
|
||||
return;
|
||||
}
|
||||
let mut entries: Vec<_> = idle.iter()
|
||||
.map(|(k, v)| (k.clone(), v.idle_since))
|
||||
.collect();
|
||||
entries.sort_by_key(|(_, t)| *t); // oldest first
|
||||
entries.into_iter().take(idle.len() - max).collect()
|
||||
};
|
||||
|
||||
for (key, _) in victims {
|
||||
let entry = self.idle.lock().remove(&key);
|
||||
if let Some(entry) = entry {
|
||||
self.terminate_idle_handle(entry).await;
|
||||
metrics::mcp_lru_evicted();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn terminate_idle_handle(&self, entry: IdleEntry) {
|
||||
// Take the service out of the Arc<Inner> and cancel it.
|
||||
// At this point, there are no other Arc refs — it's just us.
|
||||
if let Ok(inner) = Arc::try_unwrap(entry.handle) {
|
||||
if let Some(service) = inner.service.into_inner().take() {
|
||||
service.cancel().await.ok();
|
||||
}
|
||||
}
|
||||
// If try_unwrap fails, something else grabbed a ref — skip, it'll
|
||||
// return to idle on its own Drop.
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Ordering:** `cleanup_interval` runs on a tokio `interval` ticker. Default is 30 seconds. Setting it too low wastes CPU; too high means idle servers linger slightly longer than `idle_timeout`. A tolerance of `idle_timeout + cleanup_interval` worst case is the tradeoff.
|
||||
|
||||
**`Arc::try_unwrap`** is the key to safe teardown. By the time the reaper decides to evict an entry, the only Arc to that `Inner` is the one in the `IdleEntry`. Any subsequent `acquire(key)` would have removed it from the idle map first. So `try_unwrap` should always succeed — but if it doesn't (e.g., because of the Drop-race described earlier), we just skip this eviction and catch it next cycle.
|
||||
|
||||
---
|
||||
|
||||
## The Health Check Path
|
||||
|
||||
```rust
|
||||
impl McpFactory {
|
||||
async fn run_health_checks(&self, policy: &HealthCheckPolicy) {
|
||||
let now = Instant::now();
|
||||
let candidates: Vec<McpServerKey> = {
|
||||
let idle = self.idle.lock();
|
||||
idle.iter()
|
||||
.filter(|(_, entry)| {
|
||||
entry.last_health_check
|
||||
.map(|t| now.duration_since(t) >= policy.interval)
|
||||
.unwrap_or(true)
|
||||
})
|
||||
.map(|(k, _)| k.clone())
|
||||
.collect()
|
||||
};
|
||||
|
||||
for key in candidates {
|
||||
let handle = {
|
||||
let idle = self.idle.lock();
|
||||
idle.get(&key).map(|e| e.handle.clone())
|
||||
};
|
||||
let Some(handle) = handle else { continue };
|
||||
|
||||
let result = tokio::time::timeout(
|
||||
policy.timeout,
|
||||
self.ping_handle(&handle),
|
||||
).await;
|
||||
|
||||
match result {
|
||||
Ok(Ok(())) => {
|
||||
let mut idle = self.idle.lock();
|
||||
if let Some(entry) = idle.get_mut(&key) {
|
||||
entry.last_health_check = Some(now);
|
||||
}
|
||||
metrics::mcp_health_ok();
|
||||
}
|
||||
Ok(Err(e)) | Err(_) => {
|
||||
metrics::mcp_health_failed();
|
||||
match policy.on_failure {
|
||||
HealthFailureAction::Evict | HealthFailureAction::EvictAndLog => {
|
||||
let entry = self.idle.lock().remove(&key);
|
||||
if let Some(entry) = entry {
|
||||
self.terminate_idle_handle(entry).await;
|
||||
}
|
||||
if matches!(policy.on_failure, HealthFailureAction::EvictAndLog) {
|
||||
warn!(key = ?key, error = ?e, "evicted unhealthy MCP server");
|
||||
}
|
||||
}
|
||||
HealthFailureAction::LogOnly => {
|
||||
warn!(key = ?key, error = ?e, "MCP server failed health check");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn ping_handle(&self, handle: &Arc<McpServerHandleInner>) -> Result<()> {
|
||||
let guard = handle.service.lock().await;
|
||||
let service = guard.as_ref().ok_or(McpError::HandleDrained)?;
|
||||
// `list_tools` is cheap and standard across all MCP servers.
|
||||
service.list_tools().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Health checks are optional (`health_check: None` disables them). When enabled, they run on the same interval as the reaper and only check idle entries whose last check was more than `policy.interval` ago. This avoids hammering servers that are currently in active use.
|
||||
|
||||
---
|
||||
|
||||
## Graceful Shutdown Integration
|
||||
|
||||
The factory coordinates with the process shutdown signal (Ctrl-C for CLI, SIGTERM for server mode). When shutdown fires:
|
||||
|
||||
1. Set `factory.shutdown = true`. Any subsequent `acquire()` still works but new handles won't be returned to idle on Drop.
|
||||
2. Cancel the reaper's `JoinHandle`.
|
||||
3. Drain the idle pool: walk it, call `terminate_idle_handle` for each entry.
|
||||
4. Wait for active handles to drop naturally as their scopes finish. If there's a shutdown grace period (Phase 4's `shutdown_grace_seconds`), bound the wait with that.
|
||||
|
||||
```rust
|
||||
impl McpFactory {
|
||||
pub async fn shutdown(&self, grace: Duration) {
|
||||
info!("McpFactory entering shutdown");
|
||||
self.shutdown.store(true, Ordering::Release);
|
||||
|
||||
// Stop the reaper.
|
||||
if let Some(handle) = self.reaper_handle.lock().take() {
|
||||
handle.abort();
|
||||
let _ = handle.await;
|
||||
}
|
||||
|
||||
// Drain the idle pool immediately.
|
||||
let idle_entries: Vec<IdleEntry> = {
|
||||
let mut idle = self.idle.lock();
|
||||
idle.drain().map(|(_, v)| v).collect()
|
||||
};
|
||||
for entry in idle_entries {
|
||||
self.terminate_idle_handle(entry).await;
|
||||
}
|
||||
|
||||
// Wait for active scopes to release their handles.
|
||||
let deadline = Instant::now() + grace;
|
||||
while Instant::now() < deadline {
|
||||
if self.active_count() == 0 {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
// Force-terminate any remaining active handles.
|
||||
let remaining = self.active_count();
|
||||
if remaining > 0 {
|
||||
warn!(count = remaining, "force-terminating MCP servers after grace period");
|
||||
self.force_terminate_active().await;
|
||||
}
|
||||
|
||||
info!("McpFactory shutdown complete");
|
||||
}
|
||||
|
||||
fn active_count(&self) -> usize {
|
||||
let active = self.active.lock();
|
||||
active.values().filter(|w| w.strong_count() > 0).count()
|
||||
}
|
||||
|
||||
async fn force_terminate_active(&self) {
|
||||
// Walk the active map, upgrade the weak refs, and call cancel
|
||||
// directly on the underlying service. This is a last resort.
|
||||
let handles: Vec<Arc<McpServerHandleInner>> = {
|
||||
let active = self.active.lock();
|
||||
active.values().filter_map(|w| w.upgrade()).collect()
|
||||
};
|
||||
for handle in handles {
|
||||
if let Ok(inner) = Arc::try_unwrap(handle) {
|
||||
if let Some(service) = inner.service.into_inner().take() {
|
||||
service.cancel().await.ok();
|
||||
}
|
||||
}
|
||||
// If try_unwrap fails, we can't force-kill without leaking
|
||||
// the service. Log and move on.
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Phase 4's `serve()` function calls `factory.shutdown(grace)` after the axum server has stopped accepting new requests. This chains cleanly: axum drains requests → factory drains scopes → factory drains idle pool → process exits.
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
Add to `config.yaml`:
|
||||
|
||||
```yaml
|
||||
mcp_pool:
|
||||
idle_timeout_seconds: 300 # how long idle servers stay warm (default: 300 for --serve, 0 for CLI/REPL)
|
||||
cleanup_interval_seconds: 30 # how often the reaper runs
|
||||
max_idle_servers: 50 # LRU cap (null = unbounded)
|
||||
health_check:
|
||||
interval_seconds: 60
|
||||
timeout_seconds: 5
|
||||
on_failure: EvictAndLog # or Evict, LogOnly
|
||||
```
|
||||
|
||||
Per-server overrides live in `functions/mcp.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"github": { "command": "...", "idle_timeout_seconds": 900 },
|
||||
"filesystem": { "command": "...", "idle_timeout_seconds": 60 },
|
||||
"jira": { "command": "...", "idle_timeout_seconds": 300 }
|
||||
}
|
||||
```
|
||||
|
||||
The per-server override wins over the global config. The resolution is: look up the server spec, check if it has `idle_timeout_seconds`, use that if present, else use `mcp_pool.idle_timeout_seconds`, else use the mode default (0 for CLI/REPL, 300 for `--serve`).
|
||||
|
||||
**Mode defaults** are critical because they preserve Phase 1 Step 6.5's behavior. CLI and REPL users get `idle_timeout = 0`, which means the factory behaves exactly like the no-pool version — drop = terminate. The pool is inert for single-user scenarios. Only `--serve` mode turns it on by default. This avoids regressing REPL users who don't want MCP subprocess churn quirks.
|
||||
|
||||
```rust
|
||||
pub fn default_idle_timeout(mode: WorkingMode) -> Duration {
|
||||
match mode {
|
||||
WorkingMode::Cmd | WorkingMode::Repl => Duration::ZERO,
|
||||
WorkingMode::Api => Duration::from_secs(300),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Metrics
|
||||
|
||||
Phase 5 is the right time to add basic observability counters. They're cheap and the factory is where the interesting operational questions live.
|
||||
|
||||
```rust
|
||||
mod metrics {
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
pub static MCP_SPAWNED: AtomicU64 = AtomicU64::new(0);
|
||||
pub static MCP_ACQUIRE_ACTIVE_HIT: AtomicU64 = AtomicU64::new(0);
|
||||
pub static MCP_ACQUIRE_IDLE_HIT: AtomicU64 = AtomicU64::new(0);
|
||||
pub static MCP_ACQUIRE_MISS: AtomicU64 = AtomicU64::new(0);
|
||||
pub static MCP_IDLE_EVICTED: AtomicU64 = AtomicU64::new(0);
|
||||
pub static MCP_LRU_EVICTED: AtomicU64 = AtomicU64::new(0);
|
||||
pub static MCP_HEALTH_OK: AtomicU64 = AtomicU64::new(0);
|
||||
pub static MCP_HEALTH_FAILED: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
pub fn mcp_acquire_hit_active() { MCP_ACQUIRE_ACTIVE_HIT.fetch_add(1, Ordering::Relaxed); }
|
||||
pub fn mcp_acquire_hit_idle(age: Duration) {
|
||||
MCP_ACQUIRE_IDLE_HIT.fetch_add(1, Ordering::Relaxed);
|
||||
// In a real metrics system, record a histogram of age for revival latency.
|
||||
}
|
||||
pub fn mcp_acquire_miss() { MCP_ACQUIRE_MISS.fetch_add(1, Ordering::Relaxed); }
|
||||
pub fn mcp_spawned() { MCP_SPAWNED.fetch_add(1, Ordering::Relaxed); }
|
||||
pub fn mcp_idle_evicted() { MCP_IDLE_EVICTED.fetch_add(1, Ordering::Relaxed); }
|
||||
pub fn mcp_lru_evicted() { MCP_LRU_EVICTED.fetch_add(1, Ordering::Relaxed); }
|
||||
pub fn mcp_health_ok() { MCP_HEALTH_OK.fetch_add(1, Ordering::Relaxed); }
|
||||
pub fn mcp_health_failed() { MCP_HEALTH_FAILED.fetch_add(1, Ordering::Relaxed); }
|
||||
|
||||
pub fn snapshot() -> MetricsSnapshot {
|
||||
MetricsSnapshot {
|
||||
spawned: MCP_SPAWNED.load(Ordering::Relaxed),
|
||||
acquire_active_hit: MCP_ACQUIRE_ACTIVE_HIT.load(Ordering::Relaxed),
|
||||
acquire_idle_hit: MCP_ACQUIRE_IDLE_HIT.load(Ordering::Relaxed),
|
||||
acquire_miss: MCP_ACQUIRE_MISS.load(Ordering::Relaxed),
|
||||
idle_evicted: MCP_IDLE_EVICTED.load(Ordering::Relaxed),
|
||||
lru_evicted: MCP_LRU_EVICTED.load(Ordering::Relaxed),
|
||||
health_ok: MCP_HEALTH_OK.load(Ordering::Relaxed),
|
||||
health_failed: MCP_HEALTH_FAILED.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Expose the snapshot via `GET /v1/info/mcp` in the API server (piggybacks on Phase 4's `/v1/info`). CLI/REPL users can inspect via a new `.info mcp` dot-command.
|
||||
|
||||
**Derived metrics worth computing:**
|
||||
- Hit rate = `(active_hit + idle_hit) / (active_hit + idle_hit + miss)` — should be >0.9 for a well-tuned pool.
|
||||
- Revival latency distribution — how old were idle entries when revived? Informs tuning of `idle_timeout`.
|
||||
- Eviction rate — how often is the pool churning?
|
||||
|
||||
None of this is Prometheus-compatible yet; that integration is a follow-up. For Phase 5, plain counters are enough to diagnose issues.
|
||||
|
||||
---
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
### Step 1: Expand `McpFactory` to support the idle pool
|
||||
|
||||
Add the `idle` map, `shutdown` flag, and `reaper_handle` fields. Keep the existing `active` map. Don't change any caller code yet.
|
||||
|
||||
Implement `acquire()` with the three-case logic (active → idle → spawn). At this point the idle pool is always empty because nothing puts anything in it, so the logic reduces to Phase 1's behavior. Tests should still pass.
|
||||
|
||||
**Verification:** `cargo check` + existing Phase 1 tests pass.
|
||||
|
||||
### Step 2: Implement `Drop` on `McpServerHandleInner` with return-to-idle
|
||||
|
||||
Switch `service` to `Mutex<Option<RunningService>>`. Implement `Drop` that spawns a task to call `factory.accept_returning_handle(key, service)`. The factory method inserts into `idle`.
|
||||
|
||||
At this point, dropped handles start populating the idle pool. The reaper isn't running yet, so idle entries accumulate without bound.
|
||||
|
||||
**Verification:** Manual test: acquire a handle, drop it, assert the idle map now has the entry. Then acquire the same key again and assert it comes from idle (not a fresh spawn).
|
||||
|
||||
### Step 3: Implement the reaper task
|
||||
|
||||
Add `reaper_loop` and `evict_stale_idle`. Start the reaper in `McpFactory::new()` via `tokio::spawn`, store the `JoinHandle`. Default `idle_timeout` based on working mode.
|
||||
|
||||
**Verification:** Unit test with a tiny timeout (e.g., 100ms) — acquire, drop, wait 200ms, assert the idle map is empty. Use a mock MCP server (or a no-op `RunningService` for tests).
|
||||
|
||||
### Step 4: Add configuration plumbing
|
||||
|
||||
Parse `mcp_pool` from `config.yaml` into `McpFactoryConfig`. Parse per-server `idle_timeout_seconds` overrides from `functions/mcp.json`. Wire everything through `AppState::init()`.
|
||||
|
||||
**Verification:** Config tests that verify defaults, overrides, and mode-specific behavior.
|
||||
|
||||
### Step 5: Implement health checks
|
||||
|
||||
Add `run_health_checks`, `ping_handle`, and the `HealthCheckPolicy` config. Wire into the reaper loop. Default is `None` (disabled).
|
||||
|
||||
**Verification:** Unit test with a mock MCP server that returns an error on `list_tools` after N calls — verify the factory evicts it and logs.
|
||||
|
||||
### Step 6: Implement graceful shutdown
|
||||
|
||||
Add `McpFactory::shutdown(grace)`. Wire into Phase 4's `serve()` shutdown sequence and into the CLI/REPL exit path (for clean subprocess termination).
|
||||
|
||||
**Verification:** Start the API server, send several requests to warm up the pool, send SIGTERM, verify all MCP subprocesses terminate within the grace period (use `ps` or process tree inspection).
|
||||
|
||||
### Step 7: Expose metrics
|
||||
|
||||
Add the atomic counters, the snapshot function, and the `.info mcp` dot-command. Add `GET /v1/info/mcp` handler in the API server.
|
||||
|
||||
**Verification:** `.info mcp` shows sensible numbers after a few REPL turns. `/v1/info/mcp` returns JSON. Hit rate climbs over time as the pool warms.
|
||||
|
||||
### Step 8: Load testing
|
||||
|
||||
Write a test harness that spins up `--serve` mode and fires 100 concurrent completion requests, each using a mix of 2–3 MCP servers, across a pool of 10 different server configurations. Assert:
|
||||
|
||||
- No test failures
|
||||
- No orphaned subprocesses (check `ps` before and after)
|
||||
- MCP spawn count stays low (hit rate >80%)
|
||||
- p99 latency for the warm path is <200ms (allowing for LLM latency)
|
||||
|
||||
This is the practical validation that Phase 5 delivered on its performance promise.
|
||||
|
||||
**Verification:** Load test passes. Metrics snapshot shows expected hit rate.
|
||||
|
||||
### Step 9: Document tuning knobs
|
||||
|
||||
Update `docs/function-calling/MCP-SERVERS.md` with the new config options and tuning guidance:
|
||||
|
||||
- How to choose `idle_timeout` for different workloads
|
||||
- When to enable health checks
|
||||
- How to read the metrics
|
||||
- What the `max_idle_servers` cap protects against
|
||||
|
||||
Add an "MCP Pool Lifecycle" section to `docs/REST-API-ARCHITECTURE.md` describing the production topology.
|
||||
|
||||
---
|
||||
|
||||
## Risks and Watch Items
|
||||
|
||||
| Risk | Severity | Mitigation |
|
||||
|---|---|---|
|
||||
| **Drop-race between `acquire` and `return_to_idle`** | Medium | The `tokio::spawn` inside Drop runs asynchronously. If an `acquire(key)` fires between Drop and the spawned task completing, it misses the idle pool and spawns fresh. Acceptable for correctness; monitor hit rate metrics, switch to the mpsc coordinator pattern if races show up in production. |
|
||||
| **`Arc::try_unwrap` failing in `terminate_idle_handle`** | Medium | If something holds an extra Arc to an idle entry (shouldn't happen under normal flow), `try_unwrap` returns `Err` and we skip eviction. The entry stays in the idle map forever. Mitigation: log every such failure with a WARN. Write a test that verifies the shape never produces such extra refs. |
|
||||
| **`tokio::time::interval` drift** | Low | `interval` drifts if the system is under load — a tick can be delayed. This means `cleanup_interval` is a lower bound, not a guarantee. For a 30-second interval this is irrelevant; document it. |
|
||||
| **Reaper task panic** | Medium | If the reaper task panics (unreachable under normal flow, but possible under library bugs), the pool stops cleaning up. Mitigation: wrap the reaper body in `tokio::task::JoinHandle` inspection, restart on failure. Add a metric for reaper restarts. |
|
||||
| **MCP server state on revival** | High | Reviving a server from idle assumes it's still in the same state it was when it went idle. Most MCP servers are stateless (they reload config on each tool call), but some might maintain in-memory state that's stale after 5 minutes of idle. Mitigation: health checks during idle provide an early warning; document that pool idle is only safe for stateless servers. |
|
||||
| **Credential rotation** | High | If the user rotates their GitHub token (or any MCP-server-side credential), the idle pool entries hold the old credential baked into the subprocess env. A rotation requires restarting affected MCP servers. Mitigation: expose a `.reload mcp` REPL command and `POST /v1/mcp/reload` API that clears the idle pool, forcing fresh spawns with the new credentials on next acquire. |
|
||||
| **Per-server timeout resolution** | Low | The `idle_timeout` lookup (per-server override → pool default → mode default) happens at `return_to_idle` time. Changing config at runtime won't affect already-idle entries. Document this; config reload flushes idle pool. |
|
||||
| **`max_idle_servers` thrashing** | Medium | If the cap is set too low relative to the working set, every new `acquire` evicts an old idle entry, destroying the hit rate. Default to 50, document the signal: rising eviction rate + falling hit rate = raise the cap. |
|
||||
| **Subprocess leak on factory drop** | High | If `AppState` (which owns `McpFactory`) drops without calling `shutdown()`, the idle pool Arc holds die, their Drops run, but the factory's Weak self-ref is already dead so nothing puts them back in idle — they just terminate via `RunningService::drop`. Verify this actually fires cleanly (not via the tokio::spawn hack). Add a test. |
|
||||
|
||||
---
|
||||
|
||||
## What Phase 5 Does NOT Do
|
||||
|
||||
- **No LLM response caching.** The factory pools MCP subprocesses, not LLM responses.
|
||||
- **No distributed pooling.** A single factory instance owns its pool. Running multiple Loki server instances means each has its own pool; MCP processes are not shared across hosts.
|
||||
- **No background server restart on crash.** If an MCP subprocess dies while idle, the reaper's health check evicts it; the next `acquire` spawns fresh. There's no "always keep N warm" preflight.
|
||||
- **No OAuth token refresh for MCP.** If a server uses OAuth and its token expires during an idle period, the next `acquire` gets an expired handle. The server must handle its own refresh, or the user must rotate and `.reload mcp`.
|
||||
- **No Prometheus integration.** Plain atomic counters; Prometheus support is a follow-up.
|
||||
- **No adaptive tuning.** `idle_timeout` is a fixed config value, not auto-adjusted based on usage patterns.
|
||||
- **No cross-process coordination.** Two Loki processes running `--serve` on the same host each have independent pools. They can't share MCP subprocesses across processes.
|
||||
- **No changes to the factory's public API.** `acquire()` still takes `&McpServerKey`, still returns `McpServerHandle`. Callers don't notice Phase 5 happened.
|
||||
|
||||
The sole goal of Phase 5 is: **make the warm path free by keeping recently-used MCP subprocesses alive, with automatic eviction of stale ones, a background reaper, health checks, and graceful shutdown integration.**
|
||||
|
||||
---
|
||||
|
||||
## Entry Criteria (from Phase 4)
|
||||
|
||||
- [ ] API server runs in production-like conditions
|
||||
- [ ] Concurrent request handling verified by integration tests
|
||||
- [ ] `McpFactory::acquire()` is the only MCP acquisition path
|
||||
- [ ] Phase 4's integration test suite passes
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
|
||||
## Exit Criteria (Phase 5 complete)
|
||||
|
||||
- [ ] `McpFactory` has the idle map and reaper task
|
||||
- [ ] `McpServerHandleInner::Drop` returns handles to the idle pool instead of terminating
|
||||
- [ ] Reaper evicts idle entries past `idle_timeout`
|
||||
- [ ] `max_idle_servers` LRU cap enforced
|
||||
- [ ] Optional health checks working and configurable
|
||||
- [ ] Per-server `idle_timeout_seconds` overrides parsed and respected
|
||||
- [ ] Mode-specific defaults (CLI/REPL = 0, API = 300) preserve pre-Phase-5 behavior
|
||||
- [ ] Graceful shutdown drains the pool within the grace period
|
||||
- [ ] Metrics counters exposed via `.info mcp` and `GET /v1/info/mcp`
|
||||
- [ ] Load test shows hit rate >0.8 and no orphaned subprocesses
|
||||
- [ ] `docs/function-calling/MCP-SERVERS.md` documents the pool config
|
||||
- [ ] `docs/REST-API-ARCHITECTURE.md` "MCP Pool Lifecycle" section updated
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
- [ ] Phase 6 (production hardening) can proceed
|
||||
@@ -1,744 +0,0 @@
|
||||
# Phase 6 Implementation Plan: Production Hardening
|
||||
|
||||
## Overview
|
||||
|
||||
Phase 6 closes out the refactor by picking up every "deferred to production hardening" item from Phases 1–5 and delivering a Loki build that's safe to run as a multi-tenant service. The preceding phases made Loki *functionally* a server — Phase 6 makes it *operationally* a server. That means real rate limiting instead of a stub, per-subject session ownership instead of flat visibility, Prometheus metrics instead of in-memory counters, structured JSON logging, deployment manifests, security headers, config validation, and operational runbooks.
|
||||
|
||||
This is the final phase. After it lands, Loki v1 is production-ready: you can run `loki --serve` in a container behind a reverse proxy, scrape its metrics from Prometheus, route requests through a rate limiter, and have multiple tenants share the same instance without seeing each other's data.
|
||||
|
||||
**Estimated effort:** ~1 week
|
||||
**Risk:** Low. Most of the work is applying well-known patterns (sliding-window rate limiting, row-level authz, Prometheus, structured logging) on top of the architecture the previous phases already built. No new core types, no new pipelines.
|
||||
**Depends on:** Phases 1–5 complete. The API server runs, MCP pool works, sessions are UUID-keyed.
|
||||
|
||||
---
|
||||
|
||||
## Why Phase 6 Exists
|
||||
|
||||
Phases 4 and 5 got the API server running with correct semantics, but several explicit gaps were called out as "stubs" or "follow-ups." A Phase 4 deployment is usable for a trusted single-tenant context (an internal tool, a personal server) but unsafe for anything else:
|
||||
|
||||
- **Anyone with a valid API key can see every session.** Phase 4 flagged this as "single-tenant-per-key." In a multi-tenant deployment where Alice and Bob both have keys, Alice can list Bob's sessions and read their messages. This is a security issue, not a feature gap.
|
||||
- **No real rate limiting.** Phase 4's `max_concurrent_requests` semaphore caps parallelism but doesn't throttle per-subject request rates. A single runaway client can exhaust the whole concurrency budget.
|
||||
- **No metrics for external observability.** Phase 5 added in-memory counters, but they're only reachable via the `.info mcp` dot-command or a one-shot JSON endpoint. Production needs Prometheus scraping so alerting and dashboards work.
|
||||
- **Logs aren't structured.** The `tracing` spans from Phase 4 middleware emit human-readable text. Aggregators like Loki (the other one), Datadog, or CloudWatch want JSON with correlation IDs.
|
||||
- **No deployment story.** There's no Dockerfile, no systemd unit, no documented way to actually run the thing in production. Every deploying team has to reinvent this.
|
||||
- **Security headers missing.** Phase 4's CORS handles cross-origin; it doesn't set `X-Content-Type-Options`, `X-Frame-Options`, or similar defaults that a browser-facing endpoint should have.
|
||||
- **No config validation at startup.** Mistyped config values produce runtime errors hours after deployment instead of failing fast at startup.
|
||||
- **Operational procedures are undocumented.** How do you rotate auth keys? How do you reload MCP credentials? What's the runbook when the MCP hit rate drops? None of this is written down.
|
||||
|
||||
Phase 6 delivers answers to all of the above. It's the "you can actually deploy this" phase.
|
||||
|
||||
---
|
||||
|
||||
## What Phase 6 Delivers
|
||||
|
||||
Grouped by theme rather than by dependency order. Each item is independently valuable and can be worked in parallel.
|
||||
|
||||
### Security and isolation
|
||||
|
||||
1. **Per-subject session ownership** — every session records the authenticated subject that created it; reads/writes are authz-checked against the caller's subject.
|
||||
2. **Scope-based authorization** — `AuthContext.scopes` are enforced per endpoint (e.g., `read:sessions`, `write:sessions`, `admin:mcp`). Phase 4's middleware already populates scopes; Phase 6 adds the enforcement.
|
||||
3. **JWT support** — extends `AuthConfig` with a `Jwt { issuer, audience, jwks_url }` variant that validates tokens against a JWKS endpoint and extracts subject + scopes from claims.
|
||||
4. **Security headers middleware** — `X-Content-Type-Options: nosniff`, `X-Frame-Options: DENY`, `Referrer-Policy: strict-origin`, optional HSTS when behind HTTPS.
|
||||
5. **Audit logging** — structured audit events for every authenticated request (subject, action, target, result), written to a dedicated sink so they survive log rotation.
|
||||
|
||||
### Throughput and fairness
|
||||
|
||||
6. **Per-subject rate limiting** — sliding-window limiter keyed by subject. Enforces `rate_limit_per_minute` and related config. Returns `429 Too Many Requests` with a `Retry-After` header.
|
||||
7. **Per-subject concurrency limit** — subject-scoped semaphore so one noisy neighbor can't exhaust the global concurrency budget.
|
||||
8. **Backpressure signal** — expose a `/healthz/ready` endpoint that returns 503 when the server is saturated, so upstream load balancers can drain traffic.
|
||||
|
||||
### Observability
|
||||
|
||||
9. **Structured JSON logging** — every log line is JSON with `timestamp`, `level`, `target`, `request_id`, `subject`, `session_id`, and `fields`. Routes through `tracing_subscriber` with `fmt::layer().json()`.
|
||||
10. **Prometheus metrics endpoint** — `/metrics` exposing the existing Phase 5 counters plus new HTTP metrics (`http_requests_total`, `http_request_duration_seconds`, `http_requests_in_flight`), MCP metrics (`mcp_pool_size`, `mcp_acquire_latency_seconds` histogram), and session metrics (`sessions_active_total`, `sessions_created_total`).
|
||||
11. **Liveness and readiness probes** — `/healthz/live` for process liveness (always 200 unless shutting down), `/healthz/ready` for dependency readiness (config loaded, MCP pool initialized, storage writable).
|
||||
|
||||
### Operability
|
||||
|
||||
12. **Config validation at startup** — a dedicated `ApiConfig::validate()` that checks every field against a schema and fails fast with a readable error message listing *all* problems, not just the first one.
|
||||
13. **SIGHUP config reload** — reloads auth keys, log level, and rate limit settings without restarting the server. Does NOT reload MCP pool config (requires restart because the pool holds live subprocesses).
|
||||
14. **Dockerfile + multi-stage build** — minimal runtime image based on `debian:bookworm-slim` with the compiled binary, config directory, and non-root user.
|
||||
15. **systemd service unit** — with `Type=notify`, sandboxing directives, and resource limits.
|
||||
16. **docker-compose example** — for local development with nginx-as-TLS-terminator in front.
|
||||
17. **Kubernetes manifests** — Deployment, Service, ConfigMap, Secret, HorizontalPodAutoscaler.
|
||||
|
||||
### Documentation
|
||||
|
||||
18. **Operational runbook** (`docs/RUNBOOK.md`) — documented procedures for common scenarios.
|
||||
19. **Deployment guide** (`docs/DEPLOYMENT.md`) — end-to-end instructions for each deployment target.
|
||||
20. **Security guide** (`docs/SECURITY.md`) — threat model, hardening checklist, key rotation procedures.
|
||||
|
||||
---
|
||||
|
||||
## Core Type Additions
|
||||
|
||||
Most of Phase 6 hangs off existing types. A few new concepts need introducing.
|
||||
|
||||
### `AuthContext` enrichment
|
||||
|
||||
Phase 4 defined `AuthContext { subject: String, scopes: Vec<String> }`. Phase 6 extends it:
|
||||
|
||||
```rust
|
||||
pub struct AuthContext {
|
||||
pub subject: String,
|
||||
pub scopes: Scopes,
|
||||
pub key_id: Option<String>, // for audit log correlation
|
||||
pub claims: Option<JwtClaims>, // present when auth mode is Jwt
|
||||
}
|
||||
|
||||
pub struct Scopes(HashSet<String>);
|
||||
|
||||
impl Scopes {
|
||||
pub fn has(&self, scope: &str) -> bool;
|
||||
pub fn has_any(&self, required: &[&str]) -> bool;
|
||||
pub fn has_all(&self, required: &[&str]) -> bool;
|
||||
}
|
||||
|
||||
pub enum Scope {
|
||||
ReadSessions, // "read:sessions"
|
||||
WriteSessions, // "write:sessions"
|
||||
ReadAgents, // "read:agents"
|
||||
RunAgents, // "run:agents"
|
||||
ReadModels, // "read:models"
|
||||
AdminMcp, // "admin:mcp"
|
||||
AdminSessions, // "admin:sessions" — can see all users' sessions
|
||||
}
|
||||
```
|
||||
|
||||
The `Scope` enum provides typed constants for the well-known scope strings used in the handlers. Custom scopes (for callers to define their own access tiers) continue to work as raw strings.
|
||||
|
||||
### `SessionOwnership` in the session store
|
||||
|
||||
The session metadata needs to record who owns each session so reads/writes can be authorized:
|
||||
|
||||
```rust
|
||||
pub struct SessionMeta {
|
||||
pub id: SessionId,
|
||||
pub alias: Option<SessionAlias>,
|
||||
pub owner: Option<String>, // subject that created it; None = legacy
|
||||
pub last_modified: SystemTime,
|
||||
pub is_autoname: bool,
|
||||
}
|
||||
```
|
||||
|
||||
On disk, the ownership field goes into the session's YAML file under a reserved `_meta` block:
|
||||
|
||||
```yaml
|
||||
_meta:
|
||||
owner: "alice"
|
||||
created_at: "2026-04-10T15:32:11Z"
|
||||
created_by_key_id: "key_3f2a..."
|
||||
# ... rest of session fields unchanged
|
||||
```
|
||||
|
||||
The `SessionStore` trait gets two new methods and an enriched `open` signature:
|
||||
|
||||
```rust
|
||||
#[async_trait]
|
||||
pub trait SessionStore: Send + Sync {
|
||||
// existing methods unchanged except:
|
||||
async fn open(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
id: SessionId,
|
||||
caller: Option<&AuthContext>, // NEW: for authz check
|
||||
) -> Result<SessionHandle, StoreError>;
|
||||
|
||||
async fn list(
|
||||
&self,
|
||||
agent: Option<&str>,
|
||||
caller: Option<&AuthContext>, // NEW: for filtering
|
||||
) -> Result<Vec<SessionMeta>, StoreError>;
|
||||
|
||||
// NEW: transfer ownership (e.g., admin reassignment)
|
||||
async fn set_owner(
|
||||
&self,
|
||||
id: SessionId,
|
||||
new_owner: Option<String>,
|
||||
) -> Result<(), StoreError>;
|
||||
}
|
||||
```
|
||||
|
||||
`caller: None` means internal or legacy access (CLI/REPL) — skip authz entirely. `caller: Some(...)` means an API call — enforce ownership.
|
||||
|
||||
**Authz rules:**
|
||||
- Own session: full access.
|
||||
- Other subject's session: denied unless caller has `admin:sessions` scope.
|
||||
- Legacy sessions with `owner: None`: accessible to anyone (grandfathered); every mutation attempts to set the owner to the current caller so they get claimed forward.
|
||||
- `list`: returns only sessions owned by the caller (or all if they have `admin:sessions`).
|
||||
|
||||
### `RateLimiter` and `ConcurrencyLimiter`
|
||||
|
||||
```rust
|
||||
pub struct RateLimiter {
|
||||
windows: DashMap<String, SlidingWindow>,
|
||||
config: RateLimitConfig,
|
||||
}
|
||||
|
||||
struct SlidingWindow {
|
||||
bucket_a: AtomicU64,
|
||||
bucket_b: AtomicU64,
|
||||
last_reset: AtomicU64,
|
||||
}
|
||||
|
||||
pub struct RateLimitConfig {
|
||||
pub per_minute: u32,
|
||||
pub burst: u32,
|
||||
}
|
||||
|
||||
impl RateLimiter {
|
||||
pub fn check(&self, subject: &str) -> Result<(), RateLimitError>;
|
||||
}
|
||||
|
||||
pub struct RateLimitError {
|
||||
pub retry_after: Duration,
|
||||
pub limit: u32,
|
||||
pub remaining: u32,
|
||||
}
|
||||
|
||||
pub struct SubjectConcurrencyLimiter {
|
||||
semaphores: DashMap<String, Arc<Semaphore>>,
|
||||
per_subject: usize,
|
||||
}
|
||||
|
||||
impl SubjectConcurrencyLimiter {
|
||||
pub async fn acquire(&self, subject: &str) -> OwnedSemaphorePermit;
|
||||
}
|
||||
```
|
||||
|
||||
Both live in `ApiState` and are applied via middleware. Rate limiting runs first (cheap atomic operations), then concurrency acquisition (may block briefly).
|
||||
|
||||
### `MetricsRegistry`
|
||||
|
||||
```rust
|
||||
pub struct MetricsRegistry {
|
||||
pub http_requests_total: IntCounterVec,
|
||||
pub http_request_duration: HistogramVec,
|
||||
pub http_requests_in_flight: IntGaugeVec,
|
||||
pub sessions_active: IntGauge,
|
||||
pub sessions_created_total: IntCounter,
|
||||
pub mcp_pool_size: IntGaugeVec,
|
||||
pub mcp_acquire_latency: HistogramVec,
|
||||
pub mcp_spawns_total: IntCounter,
|
||||
pub mcp_idle_evictions_total: IntCounter,
|
||||
pub auth_failures_total: IntCounterVec,
|
||||
pub rate_limit_rejections_total: IntCounterVec,
|
||||
}
|
||||
```
|
||||
|
||||
Built on top of the `prometheus` crate. Exposed via `GET /metrics` with the Prometheus text exposition format. The registry bridges Phase 5's atomic counters into the Prometheus types without requiring Phase 5's code to change — Phase 5 keeps its simple counters, and Phase 6 reads them on each scrape to populate the Prometheus gauges.
|
||||
|
||||
### `AuditLogger`
|
||||
|
||||
```rust
|
||||
pub struct AuditLogger {
|
||||
sink: AuditSink,
|
||||
}
|
||||
|
||||
pub enum AuditSink {
|
||||
Stderr, // default
|
||||
File { path: PathBuf, rotation: Rotation },
|
||||
Syslog { facility: String },
|
||||
}
|
||||
|
||||
pub struct AuditEvent<'a> {
|
||||
pub timestamp: OffsetDateTime,
|
||||
pub request_id: Uuid,
|
||||
pub subject: Option<&'a str>,
|
||||
pub action: AuditAction,
|
||||
pub target: Option<&'a str>,
|
||||
pub result: AuditResult,
|
||||
pub details: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
pub enum AuditAction {
|
||||
SessionCreate,
|
||||
SessionRead,
|
||||
SessionUpdate,
|
||||
SessionDelete,
|
||||
AgentActivate,
|
||||
ToolExecute,
|
||||
McpReload,
|
||||
ConfigReload,
|
||||
AuthFailure,
|
||||
RateLimitRejection,
|
||||
}
|
||||
|
||||
pub enum AuditResult {
|
||||
Success,
|
||||
Denied { reason: String },
|
||||
Error { message: String },
|
||||
}
|
||||
|
||||
impl AuditLogger {
|
||||
pub fn log(&self, event: AuditEvent<'_>);
|
||||
}
|
||||
```
|
||||
|
||||
Audit events are emitted from handler middleware after request completion. The audit stream is deliberately separate from the regular tracing logs because audit logs have stricter retention/integrity requirements in regulated environments — you want to be able to pipe them to a WORM storage or SIEM without mixing in debug logs.
|
||||
|
||||
---
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
### Step 1: Per-subject session ownership
|
||||
|
||||
The highest-impact security fix. No new deps, no new config — just enriching existing types.
|
||||
|
||||
1. Add `owner: Option<String>` and `created_by_key_id: Option<String>` to the session YAML `_meta` block. Serde skip if absent (backward compat for legacy files).
|
||||
2. Update `SessionStore::create` to record the caller's subject.
|
||||
3. Update `SessionStore::open` to take `caller: Option<&AuthContext>` and enforce ownership.
|
||||
4. Update `SessionStore::list` to filter by caller subject (unless caller has `admin:sessions` scope).
|
||||
5. Add `SessionStore::set_owner` for admin reassignment.
|
||||
6. Implement the "claim on first mutation" behavior for legacy sessions.
|
||||
7. Update all API handlers to pass the `AuthContext` through to store calls.
|
||||
8. Add integration tests: Alice creates a session, Bob tries to read it (403), admin Claire can read it (200), Alice's `list` returns only her own, Claire's `list` with `admin:sessions` scope returns everything.
|
||||
|
||||
**Verification:** all new authz tests pass. CLI/REPL tests still pass because they pass `caller: None`.
|
||||
|
||||
### Step 2: Scope-based authorization for endpoints
|
||||
|
||||
Phase 4's middleware attaches `AuthContext` with a `scopes: Vec<String>` field but handlers don't check it. Phase 6 adds the enforcement.
|
||||
|
||||
1. Change `AuthContext.scopes` from `Vec<String>` to a `Scopes(HashSet<String>)` newtype with `has`/`has_any`/`has_all` methods.
|
||||
2. Define the `Scope` enum with well-known constants.
|
||||
3. Add a `require_scope` helper and a `#[require_scope("read:sessions")]` proc macro (or a handler-side check if proc macros add too much complexity).
|
||||
4. Annotate every handler with the required scope(s):
|
||||
- `GET /v1/sessions` → `read:sessions`
|
||||
- `POST /v1/sessions` → `write:sessions`
|
||||
- `GET /v1/sessions/:id` → `read:sessions`
|
||||
- `DELETE /v1/sessions/:id` → `write:sessions`
|
||||
- `POST /v1/sessions/:id/completions` → `write:sessions` + `run:agents` (if the session has an agent)
|
||||
- `POST /v1/rags/:name/rebuild` → `admin:mcp`
|
||||
- `GET /v1/agents`, `/v1/roles`, `/v1/rags`, `/v1/models` → `read:agents`, `read:roles`, etc.
|
||||
- `/metrics` → `admin:metrics` (or unauthenticated if the endpoint is bound to a private network)
|
||||
5. Document the scope model in `docs/SECURITY.md`.
|
||||
|
||||
**Verification:** per-endpoint authz tests. A key with only `read:sessions` can list and read but not write.
|
||||
|
||||
### Step 3: JWT support in `AuthConfig`
|
||||
|
||||
Extend the auth mode enum:
|
||||
|
||||
```rust
|
||||
pub enum AuthConfig {
|
||||
Disabled,
|
||||
StaticKeys { keys: Vec<AuthKeyEntry> },
|
||||
Jwt(JwtConfig),
|
||||
}
|
||||
|
||||
pub struct JwtConfig {
|
||||
pub issuer: String,
|
||||
pub audience: String,
|
||||
pub jwks_url: String,
|
||||
pub jwks_refresh_interval: Duration,
|
||||
pub subject_claim: String, // e.g., "sub"
|
||||
pub scopes_claim: String, // e.g., "scope" or "permissions"
|
||||
pub leeway_seconds: u64,
|
||||
}
|
||||
```
|
||||
|
||||
1. Add `jsonwebtoken` and `reqwest` (already present) to dependencies.
|
||||
2. Implement a `JwksCache` that fetches `jwks_url` on startup and refreshes every `jwks_refresh_interval`. Uses `reqwest` with a short timeout. Refreshes in the background via `tokio::spawn`.
|
||||
3. The auth middleware branches on `AuthConfig`: `StaticKeys` continues to work, `Jwt` calls `jsonwebtoken::decode` with the cached JWKS.
|
||||
4. Extract subject from the configured claim name. Extract scopes from either a space-separated string (`scope` claim) or an array claim (`permissions`).
|
||||
5. Handle key rotation gracefully: if decoding fails with "unknown key ID," trigger an immediate JWKS refresh (debounced to once per minute) and retry once.
|
||||
6. Integration tests with a fake JWKS endpoint (use `mockito` or `wiremock`).
|
||||
|
||||
**Verification:** valid JWT authenticates; expired JWT rejected; invalid signature rejected; JWKS refresh handles key rotation.
|
||||
|
||||
### Step 4: Real rate limiting
|
||||
|
||||
Replace the Phase 4 stub with a working sliding-window implementation.
|
||||
|
||||
1. Add `dashmap` dependency for the per-subject map (lock-free reads/writes).
|
||||
2. Implement `SlidingWindow` with two adjacent one-minute buckets; the effective rate is the weighted sum of the current bucket plus the tail of the previous bucket based on how far into the current window we are.
|
||||
3. Add `RateLimiter::check(subject) -> Result<(), RateLimitError>`.
|
||||
4. Write middleware that calls `check` before dispatching to handlers. On `Err`, return 429 with `Retry-After` header.
|
||||
5. Add `rate_limit_per_minute` and `rate_limit_burst` config fields. Reasonable defaults: 60/min, burst 10.
|
||||
6. Expose per-subject current rate as a gauge in the Prometheus registry.
|
||||
7. Integration test: fire N+1 requests as the same subject within a minute, assert the N+1th gets 429.
|
||||
|
||||
**Verification:** rate limiting works correctly across subjects; non-limited subjects aren't affected; burst allowance works.
|
||||
|
||||
### Step 5: Per-subject concurrency limiter
|
||||
|
||||
Complements rate limiting — rate limits the *count* of requests over time, concurrency limits the *simultaneous* count.
|
||||
|
||||
1. Implement `SubjectConcurrencyLimiter` with a `DashMap<String, Arc<Semaphore>>`.
|
||||
2. Lazy-init semaphores per subject with `per_subject_concurrency` slots (default 8).
|
||||
3. Middleware acquires a permit per request. If the subject's semaphore is full, queue briefly (`try_acquire_owned` with a short timeout), then 503 if still full.
|
||||
4. Garbage-collect unused semaphores periodically (entries with no waiters and full availability count haven't been used).
|
||||
5. Integration test: fire 10 concurrent requests as one subject with `per_subject_concurrency: 5`, assert at least 5 serialize.
|
||||
|
||||
**Verification:** no subject can exceed its concurrency budget; other subjects unaffected.
|
||||
|
||||
### Step 6: Prometheus metrics endpoint
|
||||
|
||||
1. Add `prometheus` crate.
|
||||
2. Implement `MetricsRegistry` with the metrics listed in the types section.
|
||||
3. Wire metric updates into existing code:
|
||||
- HTTP middleware: `http_requests_total.inc()` on response, `http_request_duration.observe(elapsed)`, `http_requests_in_flight.inc()/dec()`
|
||||
- Session creation: `sessions_created_total.inc()`, `sessions_active.set(store.count())`
|
||||
- MCP factory: read the Phase 5 atomic counters on scrape and populate the Prometheus types
|
||||
4. Add `GET /metrics` handler that writes the Prometheus text exposition format.
|
||||
5. Auth policy for `/metrics`: configurable — either requires `admin:metrics` scope, or is opened to a private network via `metrics_listen_addr: "127.0.0.1:9090"` on a separate port (recommended).
|
||||
6. Integration test: scrape `/metrics`, parse the response, assert expected metrics are present with sensible values.
|
||||
|
||||
**Verification:** Prometheus scraping works; metrics increment correctly.
|
||||
|
||||
### Step 7: Structured JSON logging
|
||||
|
||||
Replace the default `tracing_subscriber` format with JSON output.
|
||||
|
||||
1. Add a `log_format: Text | Json` config field, default `Text` for CLI/REPL, `Json` for `--serve` mode.
|
||||
2. Configure `tracing_subscriber::fmt::layer().json()` conditionally.
|
||||
3. Ensure every span has a `request_id` field (already present from Phase 4 middleware).
|
||||
4. Add `subject` and `session_id` as span fields when present, so they get included in every child log line automatically.
|
||||
5. Add a `log_level` config field that SIGHUP reloads at runtime (see Step 12).
|
||||
6. Integration test: capture stdout during a request, parse as JSON, assert the fields are present and correctly scoped.
|
||||
|
||||
**Verification:** `loki --serve` produces one-line-per-event JSON output suitable for log aggregators.
|
||||
|
||||
### Step 8: Audit logging
|
||||
|
||||
Dedicated sink for security-relevant events.
|
||||
|
||||
1. Implement `AuditLogger` with `Stderr`, `File`, and `Syslog` sinks. Start with just `Stderr` and `File` — `Syslog` via `syslog` crate can follow.
|
||||
2. Emit audit events from:
|
||||
- Auth middleware: `AuditAction::AuthFailure` on any auth rejection
|
||||
- Rate limiter: `AuditAction::RateLimitRejection` on 429
|
||||
- Session handlers: `AuditAction::SessionCreate/Read/Update/Delete`
|
||||
- Agent handlers: `AuditAction::AgentActivate`
|
||||
- MCP reload endpoint: `AuditAction::McpReload`
|
||||
3. Audit events are JSON lines with a schema documented in `docs/SECURITY.md`.
|
||||
4. Audit events don't interfere with the main tracing stream — they go to the configured audit sink independently.
|
||||
5. File rotation via `tracing-appender` or manual rotation with size + date cap.
|
||||
|
||||
**Verification:** every security-relevant action produces an audit event; failures include a `reason`.
|
||||
|
||||
### Step 9: Security headers and misc middleware
|
||||
|
||||
1. Add a `security_headers` middleware layer that attaches:
|
||||
- `X-Content-Type-Options: nosniff`
|
||||
- `X-Frame-Options: DENY`
|
||||
- `Referrer-Policy: strict-origin-when-cross-origin`
|
||||
- `Strict-Transport-Security: max-age=31536000; includeSubDomains` (only when `api.force_https: true`)
|
||||
- Do NOT set CSP — this is an API, not a browser app; CSP would confuse clients.
|
||||
2. Remove `Server: ...` and other fingerprinting headers.
|
||||
3. Handle `OPTIONS` preflight correctly (Phase 4's CORS layer does this; verify).
|
||||
|
||||
**Verification:** `curl -I` inspects headers; automated test asserts each required header is present.
|
||||
|
||||
### Step 10: Config validation at startup
|
||||
|
||||
A single `ApiConfig::validate()` method that checks every field and aggregates ALL errors before failing.
|
||||
|
||||
1. Implement validation for:
|
||||
- `listen_addr` is parseable and bindable
|
||||
- `auth.mode` has a valid configuration (e.g., `StaticKeys` with non-empty key list, `Jwt` with reachable JWKS URL)
|
||||
- `auth.keys[].key_hash` starts with `$argon2id$` (catches plaintext keys)
|
||||
- `rate_limit_per_minute > 0` and `burst > 0`
|
||||
- `max_body_bytes > 0` and `< 100 MiB` (sanity)
|
||||
- `request_timeout_seconds > 0` and `< 3600`
|
||||
- `shutdown_grace_seconds >= 0`
|
||||
- `cors.allowed_origins` entries are valid URLs or `"*"`
|
||||
2. Return a `ConfigValidationError` that lists every problem, not just the first.
|
||||
3. Call `validate()` in `serve()` before binding the listener.
|
||||
4. Test: a deliberately-broken config produces an error listing all problems.
|
||||
|
||||
**Verification:** startup validation catches common mistakes; error message is actionable.
|
||||
|
||||
### Step 11: Health check endpoints
|
||||
|
||||
1. `GET /healthz/live` — always returns 200 OK unless the process is in graceful shutdown. Body: `{"status":"ok"}`. No auth required.
|
||||
2. `GET /healthz/ready` — returns 200 OK when fully initialized and not saturated, otherwise 503 Service Unavailable. Readiness criteria:
|
||||
- `AppState` fully initialized
|
||||
- Session store writable (attempt a probe write to a reserved path)
|
||||
- MCP pool initialized (at least the factory is alive)
|
||||
- Concurrency semaphore has at least 10% available (not saturated)
|
||||
3. Both endpoints are unauthenticated and unmetered — load balancers hit them constantly.
|
||||
4. Document in `docs/DEPLOYMENT.md` how Kubernetes, systemd, and other supervisors should use these.
|
||||
|
||||
**Verification:** endpoints return correct status under various load conditions.
|
||||
|
||||
### Step 12: SIGHUP config reload
|
||||
|
||||
Reload a subset of config without restarting.
|
||||
|
||||
1. Reloadable fields:
|
||||
- Auth keys (StaticKeys mode)
|
||||
- JWT config (including JWKS URL)
|
||||
- Log level
|
||||
- Rate limit config
|
||||
- Per-subject concurrency limits
|
||||
- Audit logger sink
|
||||
2. NOT reloadable (requires full restart):
|
||||
- Listen address
|
||||
- MCP pool config (pool holds live subprocesses)
|
||||
- Session storage paths
|
||||
- TLS certs (use a reverse proxy)
|
||||
3. Implementation: SIGHUP handler that re-reads `config.yaml`, validates it, and atomically swaps the affected fields in `ApiState`. Uses `arc-swap` crate for lock-free swaps.
|
||||
4. Audit every reload: `AuditAction::ConfigReload` with before/after diff summary.
|
||||
5. Document: rotation procedures for auth keys, logging level adjustments, etc.
|
||||
|
||||
**Verification:** start server, modify `config.yaml`, send SIGHUP, assert new config is in effect without dropped requests.
|
||||
|
||||
### Step 13: Deployment manifests
|
||||
|
||||
#### 13a. Dockerfile
|
||||
|
||||
Multi-stage build for a minimal runtime image:
|
||||
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM rust:1.82-slim AS builder
|
||||
WORKDIR /build
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY src ./src
|
||||
COPY assets ./assets
|
||||
RUN cargo build --release --bin loki
|
||||
|
||||
# Runtime stage
|
||||
FROM debian:bookworm-slim
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
tini \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
RUN useradd --system --home /loki --shell /bin/false loki
|
||||
COPY --from=builder /build/target/release/loki /usr/local/bin/loki
|
||||
COPY --from=builder /build/assets /opt/loki/assets
|
||||
USER loki
|
||||
WORKDIR /loki
|
||||
ENV LOKI_CONFIG_DIR=/loki/config
|
||||
EXPOSE 3400
|
||||
ENTRYPOINT ["/usr/bin/tini", "--"]
|
||||
CMD ["/usr/local/bin/loki", "--serve"]
|
||||
```
|
||||
|
||||
Build args for targeting specific architectures. Result is a ~100 MB image.
|
||||
|
||||
#### 13b. systemd unit
|
||||
|
||||
```ini
|
||||
[Unit]
|
||||
Description=Loki AI Server
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=/usr/local/bin/loki --serve
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
User=loki
|
||||
Group=loki
|
||||
|
||||
# Sandboxing
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
PrivateDevices=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths=/var/lib/loki
|
||||
ProtectKernelTunables=true
|
||||
ProtectKernelModules=true
|
||||
ProtectControlGroups=true
|
||||
RestrictSUIDSGID=true
|
||||
RestrictRealtime=true
|
||||
LockPersonality=true
|
||||
|
||||
# Resource limits
|
||||
LimitNOFILE=65536
|
||||
LimitNPROC=512
|
||||
MemoryMax=4G
|
||||
|
||||
# Reload
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
`Type=notify` requires Loki to call `sd_notify(READY=1)` after successful startup — add this with the `sd-notify` crate.
|
||||
|
||||
#### 13c. docker-compose example
|
||||
|
||||
For local development with TLS via nginx:
|
||||
|
||||
```yaml
|
||||
version: "3.9"
|
||||
services:
|
||||
loki:
|
||||
build: .
|
||||
environment:
|
||||
LOKI_CONFIG_DIR: /loki/config
|
||||
volumes:
|
||||
- ./config:/loki/config:ro
|
||||
- loki_data:/loki/data
|
||||
ports:
|
||||
- "127.0.0.1:3400:3400"
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3400/healthz/live"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
volumes:
|
||||
- ./deploy/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
- ./deploy/certs:/etc/nginx/certs:ro
|
||||
ports:
|
||||
- "443:443"
|
||||
depends_on:
|
||||
- loki
|
||||
|
||||
volumes:
|
||||
loki_data:
|
||||
```
|
||||
|
||||
Include a sample `nginx.conf` that terminates TLS and forwards to `loki:3400`.
|
||||
|
||||
#### 13d. Kubernetes manifests
|
||||
|
||||
Provide `deploy/k8s/` with:
|
||||
- `namespace.yaml`
|
||||
- `deployment.yaml` (3 replicas, resource requests/limits, liveness/readiness probes)
|
||||
- `service.yaml` (ClusterIP)
|
||||
- `configmap.yaml` (non-secret config)
|
||||
- `secret.yaml` (API keys, JWT config)
|
||||
- `hpa.yaml` (HorizontalPodAutoscaler based on CPU + custom metric for requests/sec)
|
||||
- `ingress.yaml` (optional example using nginx-ingress)
|
||||
|
||||
Document storage strategy: sessions use a PVC mounted at `/loki/data`; RAG embeddings use a read-only ConfigMap or a separate PVC.
|
||||
|
||||
**Verification:** each deployment target produces a running Loki that passes health checks.
|
||||
|
||||
### Step 14: Operational runbook
|
||||
|
||||
Write `docs/RUNBOOK.md` with sections for:
|
||||
|
||||
- **Starting and stopping** the server
|
||||
- **Rotating auth keys** (StaticKeys mode) — edit config, SIGHUP, verify in audit log
|
||||
- **Rotating auth keys** (Jwt mode) — update JWKS at issuer, Loki auto-refreshes
|
||||
- **Rotating MCP credentials** — update env vars, `POST /v1/mcp/reload` (new endpoint in this phase) or restart
|
||||
- **Diagnosing high latency** — check MCP hit rate, check LLM provider latency, check concurrency saturation
|
||||
- **Diagnosing auth failures** — audit log `AuthFailure` events, check key hash, check JWKS reachability
|
||||
- **Diagnosing rate limit rejections** — check per-subject counter, adjust limit or identify runaway client
|
||||
- **Diagnosing orphaned MCP subprocesses** — `ps aux | grep loki`, check logs for `McpFactory shutdown complete`
|
||||
- **Diagnosing session corruption** — check `.yaml.tmp` files (should not exist when server is idle), inspect session YAML for validity
|
||||
- **Backup and restore** — tar the `sessions/` and `agents/` directories
|
||||
- **Scaling horizontally** — each replica has its own MCP pool and session store; share sessions via shared filesystem (NFS/EFS) or deferred to a database-backed SessionStore (not in this phase)
|
||||
- **Incident response** — what logs to collect, what metrics to snapshot, how to reach a minimal reproducing state
|
||||
|
||||
**Verification:** walk through each procedure on a test deployment; fix any unclear steps.
|
||||
|
||||
### Step 15: Deployment and security guides
|
||||
|
||||
`docs/DEPLOYMENT.md` — step-by-step for Docker, systemd, docker-compose, Kubernetes. Pre-flight checklist, first-time setup, upgrade procedure.
|
||||
|
||||
`docs/SECURITY.md` — threat model, hardening checklist, scope model, audit event schema, key rotation, reverse proxy configuration, network security recommendations, CVE reporting contact.
|
||||
|
||||
Cross-reference from `README.md` and add a "Production Deployment" section to the README that points to both docs.
|
||||
|
||||
**Verification:** a developer unfamiliar with Loki can deploy it successfully using only the docs.
|
||||
|
||||
---
|
||||
|
||||
## Risks and Watch Items
|
||||
|
||||
| Risk | Severity | Mitigation |
|
||||
|---|---|---|
|
||||
| **Session ownership migration breaks legacy users** | Medium | Legacy sessions with `owner: None` stay readable by anyone; they get claimed forward on first mutation. Document this in `RUNBOOK.md`. Add a one-shot migration CLI command (`loki migrate sessions --claim-to <subject>`) that assigns ownership of all unowned sessions to a specific subject. |
|
||||
| **JWT JWKS fetch failures block startup** | Medium | JWKS URL must be reachable at startup; if it's not, log an error and fall back to "reject all" mode until the fetch succeeds. A retry loop with exponential backoff runs in the background. Do NOT crash on JWKS failure. |
|
||||
| **Rate limiter DashMap growth** | Low | Per-subject windows accumulate forever without cleanup. Add a background reaper that removes entries with zero recent activity every few minutes. Cap total entries at 100k as a safety valve. |
|
||||
| **Prometheus metric cardinality explosion** | Low | `http_requests_total` with per-path labels could explode if routes have dynamic segments (`/v1/sessions/:id`). Use route templates as labels, not concrete paths. Validate label sets at registration. |
|
||||
| **Audit log retention compliance** | Low | Audit logs might need to be retained for regulatory reasons. Phase 6 provides the emission; retention is the operator's responsibility. Document this in `SECURITY.md`. |
|
||||
| **SIGHUP reload partial failure** | Medium | If the new config is invalid, don't swap it in — keep the old config running. Log the validation error. The operator can fix the file and SIGHUP again. Never leave the server in an inconsistent state. |
|
||||
| **Docker image size** | Low | `debian:bookworm-slim` is ~80 MB; final image ~100 MB. If smaller is needed, use `distroless/cc-debian12` for a ~35 MB image at the cost of not having `tini` or debugging tools. Document both options. |
|
||||
| **systemd Type=notify missing implementation** | Medium | Adding `sd_notify` requires the `sd-notify` crate AND calling it after listener bind. Missing this call makes systemd think the service failed. Add an integration test that fakes systemd and asserts the notification is sent. |
|
||||
| **Kubernetes pod disruption** | Low | HPA scales down during low traffic, but in-flight requests on the terminating pod must complete gracefully. Set `terminationGracePeriodSeconds` to at least `shutdown_grace_seconds + 10`. Document in `DEPLOYMENT.md`. |
|
||||
| **Running under a reverse proxy** | Low | CORS, `Host` header handling, `X-Forwarded-For` for rate limiter subject identification. Document the expected proxy config (trust `X-Forwarded-*` headers only from trusted proxies). |
|
||||
|
||||
---
|
||||
|
||||
## What Phase 6 Does NOT Do
|
||||
|
||||
- **No multi-region replication.** Loki is a single-instance service; scale out by running multiple instances behind a load balancer, each with its own pool. Cross-instance state sharing is not in scope.
|
||||
- **No database-backed session store.** `FileSessionStore` is still the only implementation. A `PostgresSessionStore` is a clean extension point (`SessionStore` trait is already there) but belongs to a follow-up.
|
||||
- **No cluster coordination.** Each Loki instance is independent. Running Loki in a "cluster" mode where instances share work is a separate project.
|
||||
- **No advanced ML observability.** LLM call costs, token usage trends, provider error rates — these are tracked as counters but not aggregated into dashboards. Follow-up work.
|
||||
- **No built-in TLS termination.** Use a reverse proxy (nginx, Caddy, Traefik, a cloud load balancer). Supporting TLS in-process adds complexity and key management concerns that reverse proxies solve better.
|
||||
- **No SAML or LDAP.** Only StaticKeys and JWT. SAML/LDAP integration can extend `AuthConfig` later.
|
||||
- **No plugin system.** Extensions to auth, storage, or middleware require forking and rebuilding. A dynamic plugin loader is explicitly out of scope.
|
||||
- **No multi-tenancy beyond session ownership.** Tenants share the same process, same MCP pool, same RAG cache, same resources. Strict tenant isolation (separate processes per tenant) requires orchestration outside Loki.
|
||||
- **No cost accounting per tenant.** LLM API calls are tracked per-subject in audit logs but not aggregated into billing-grade cost reports.
|
||||
|
||||
---
|
||||
|
||||
## Entry Criteria (from Phase 5)
|
||||
|
||||
- [ ] `McpFactory` pooling works and has metrics
|
||||
- [ ] Graceful shutdown drains the MCP pool
|
||||
- [ ] Phase 5 load test passes (hit rate >0.8, no orphaned subprocesses)
|
||||
- [ ] Phase 4 API integration test suite passes
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
|
||||
## Exit Criteria (Phase 6 complete — v1 ready)
|
||||
|
||||
- [ ] Per-subject session ownership enforced; integration tests prove Alice can't read Bob's sessions
|
||||
- [ ] Scope-based authorization enforced on every endpoint
|
||||
- [ ] JWT authentication works with a real JWKS endpoint
|
||||
- [ ] Real rate limiting replaces the Phase 4 stub; 429 responses include `Retry-After`
|
||||
- [ ] Per-subject concurrency limiter prevents noisy-neighbor saturation
|
||||
- [ ] Prometheus `/metrics` endpoint scrapes cleanly
|
||||
- [ ] Structured JSON logs emitted in `--serve` mode
|
||||
- [ ] Audit events written for all security-relevant actions
|
||||
- [ ] Security headers set on all responses
|
||||
- [ ] Config validation fails fast at startup with readable errors
|
||||
- [ ] `/healthz/live` and `/healthz/ready` endpoints work
|
||||
- [ ] SIGHUP reloads auth keys, log level, and rate limits without restart
|
||||
- [ ] Dockerfile produces a minimal runtime image
|
||||
- [ ] systemd unit with `Type=notify` works correctly
|
||||
- [ ] docker-compose example runs end-to-end with TLS via nginx
|
||||
- [ ] Kubernetes manifests deploy successfully
|
||||
- [ ] `docs/RUNBOOK.md` covers all common operational scenarios
|
||||
- [ ] `docs/DEPLOYMENT.md` guides a first-time deployer to success
|
||||
- [ ] `docs/SECURITY.md` documents threat model, scopes, and hardening
|
||||
- [ ] `cargo check`, `cargo test`, `cargo clippy` all clean
|
||||
- [ ] End-to-end production smoke test: deploy to Kubernetes, send real traffic, scrape metrics, rotate a key, induce a failure, observe recovery
|
||||
|
||||
---
|
||||
|
||||
## v1 Release Summary
|
||||
|
||||
After Phase 6 lands, Loki v1 has transformed from a single-user CLI tool into a production-ready multi-tenant AI service. Here's what the v1 release notes should say:
|
||||
|
||||
**New in Loki v1:**
|
||||
|
||||
- **REST API** — full HTTP surface for completions, sessions, agents, roles, RAGs, and metadata. Streaming via Server-Sent Events, synchronous via JSON.
|
||||
- **Multi-tenant sessions** — UUID-primary identity with optional human-readable aliases. Per-subject ownership with scope-based access control.
|
||||
- **Concurrent safety** — per-session mutex serialization, per-MCP-server Arc sharing, per-agent runtime isolation. Run dozens of concurrent requests without corruption.
|
||||
- **MCP pooling** — recently-used MCP subprocesses stay warm across requests. Near-zero warm-path latency. Configurable idle timeout and LRU cap.
|
||||
- **Authentication** — static API keys or JWT with JWKS. Argon2-hashed credentials. Scope-based authorization per endpoint.
|
||||
- **Observability** — Prometheus metrics, structured JSON logging with correlation IDs, dedicated audit log stream.
|
||||
- **Rate limiting** — sliding-window per subject with configurable limits and burst allowance.
|
||||
- **Graceful shutdown** — in-flight requests complete within a grace period; MCP subprocesses terminate cleanly; session state is persisted.
|
||||
- **Deployment manifests** — Dockerfile, systemd unit, docker-compose example, Kubernetes manifests.
|
||||
- **Full documentation** — runbook, deployment guide, security guide, API reference.
|
||||
|
||||
**Backward compatibility:**
|
||||
|
||||
CLI and REPL continue to work identically to pre-v1 builds. Existing `config.yaml`, `roles/`, `sessions/`, `agents/`, `rags/`, and `functions/` directories are read-compatible. The legacy session layout is migrated lazily on first access without destroying the old files.
|
||||
|
||||
**What's next (v2+):**
|
||||
|
||||
- Database-backed session store for cross-instance sharing
|
||||
- Native TLS termination option
|
||||
- SAML / LDAP authentication extensions
|
||||
- Per-tenant cost accounting and quotas
|
||||
- Dynamic plugin system for custom auth, storage, and middleware
|
||||
- Multi-region replication
|
||||
- WebSocket transport alongside SSE
|
||||
@@ -1,232 +0,0 @@
|
||||
# Loki QA Checklist
|
||||
|
||||
Behavioral verification checklist for the REST API refactor.
|
||||
Run after each step or phase to confirm existing functionality
|
||||
is preserved.
|
||||
|
||||
## How to use
|
||||
|
||||
- [ ] = not yet verified for current step
|
||||
- [x] = verified working
|
||||
- SKIP = not applicable to current step
|
||||
|
||||
Check each item manually in the REPL and/or CLI. If a check
|
||||
fails, stop and investigate before proceeding.
|
||||
|
||||
---
|
||||
|
||||
## 1. Build & Test Baseline
|
||||
|
||||
- [ ] `cargo check` — zero warnings, zero errors
|
||||
- [ ] `cargo clippy` — zero warnings
|
||||
- [ ] `cargo test` — all tests pass (63 as of Step 8g)
|
||||
|
||||
## 2. CLI — Basic Operations
|
||||
|
||||
- [ ] `loki "hello"` — single-shot chat works, response printed
|
||||
- [ ] `loki --role <name> "hello"` — role applied, response uses role context
|
||||
- [ ] `loki --session <name> "hello"` — session created/resumed, response saved
|
||||
- [ ] `loki --model <model_id> "hello"` — specified model used
|
||||
- [ ] `loki --prompt "you are a pirate" "hello"` — temp role applied
|
||||
- [ ] `loki --info` — system info printed, exits cleanly
|
||||
- [ ] `loki --list-models` — model list printed
|
||||
- [ ] `loki --list-roles` — role list printed (no hidden files)
|
||||
- [ ] `loki --list-sessions` — session list printed
|
||||
- [ ] `loki --list-agents` — agent list printed (no `.shared` directory)
|
||||
- [ ] `loki --dry-run "hello"` — no API call, input echoed
|
||||
- [ ] `loki --no-stream "hello"` — non-streaming response
|
||||
|
||||
## 3. CLI — File Input
|
||||
|
||||
- [ ] `loki --file /tmp/test.txt "summarize"` — file content included
|
||||
- [ ] `loki --file /tmp/test.txt` — file content sent without extra text
|
||||
|
||||
## 4. CLI — Agent (non-interactive)
|
||||
|
||||
- [ ] `loki --agent <name> "do something"` — agent starts, tools available, response returned
|
||||
- [ ] Agent MCP servers start (if configured)
|
||||
- [ ] Agent tool calls execute correctly (e.g., execute_command)
|
||||
|
||||
## 5. CLI — Shell Execute
|
||||
|
||||
- [ ] `loki -e "list files in /tmp"` — shell command generated
|
||||
- [ ] Shell command explanation shown (describe mode)
|
||||
- [ ] Shell command execution works when confirmed
|
||||
|
||||
## 6. CLI — Macro
|
||||
|
||||
- [ ] `loki --macro <name> "input"` — macro executes
|
||||
|
||||
## 7. REPL — Startup & Exit
|
||||
|
||||
- [ ] `loki` — REPL starts, welcome message shown
|
||||
- [ ] `.exit` — REPL exits cleanly
|
||||
- [ ] Ctrl+D — REPL exits cleanly
|
||||
- [ ] Ctrl+C — prints exit hint, does not exit
|
||||
|
||||
## 8. REPL — Chat
|
||||
|
||||
- [ ] Type a message — response printed
|
||||
- [ ] `.continue` — continues previous response
|
||||
- [ ] `.regenerate` — regenerates last response
|
||||
- [ ] `.copy` — copies last response to clipboard
|
||||
|
||||
## 9. REPL — Roles
|
||||
|
||||
- [ ] `.role <name>` — switches to role, prompt changes
|
||||
- [ ] `.role <name> <text>` — one-shot role message
|
||||
- [ ] `.info role` — shows role info
|
||||
- [ ] `.edit role` — opens editor for current role
|
||||
- [ ] `.save role <name>` — saves current role
|
||||
- [ ] `.exit role` — exits role, prompt resets
|
||||
- [ ] Role with MCP servers — servers start on `.role <name>`
|
||||
- [ ] Role with MCP servers — MCP tools available in chat
|
||||
- [ ] `.exit role` with MCP — servers stop, MCP tools removed
|
||||
|
||||
## 10. REPL — Sessions
|
||||
|
||||
- [ ] `.session` — starts temp session
|
||||
- [ ] `.session <name>` — starts/resumes named session
|
||||
- [ ] `.info session` — shows session info
|
||||
- [ ] `.edit session` — opens editor
|
||||
- [ ] `.save session <name>` — saves session
|
||||
- [ ] `.empty session` — clears messages
|
||||
- [ ] `.compress session` — compresses session
|
||||
- [ ] `.exit session` — exits session
|
||||
- [ ] Session with MCP servers — servers start
|
||||
- [ ] Session carry-over prompt — "incorporate last Q&A?" appears when applicable
|
||||
|
||||
## 11. REPL — Agents
|
||||
|
||||
- [ ] `.agent <name>` — agent starts, tools compiled, prompt changes
|
||||
- [ ] `.agent <name> <session>` — agent starts with specific session
|
||||
- [ ] `.agent <name> key=value` — agent starts with variables
|
||||
- [ ] `.info agent` — shows agent info
|
||||
- [ ] `.starter` — shows conversation starters
|
||||
- [ ] `.starter <n>` — executes starter
|
||||
- [ ] `.edit agent-config` — opens agent config editor
|
||||
- [ ] `.exit agent` — exits agent cleanly
|
||||
- [ ] Agent with MCP servers — servers start
|
||||
- [ ] Agent tool calls work (execute_command, fs_read, etc.)
|
||||
- [ ] Agent global tools work (tools listed in `global_tools`)
|
||||
- [ ] Agent tool file changes picked up on restart (delete .ts, .sh used instead)
|
||||
- [ ] Auto-continuation works (todo list drives continuation)
|
||||
- [ ] `.clear todo` — clears todo list
|
||||
|
||||
## 12. REPL — Sub-Agent Escalation
|
||||
|
||||
- [ ] Parent agent spawns sub-agent via tool call
|
||||
- [ ] Sub-agent runs at depth > 0
|
||||
- [ ] Sub-agent escalation: sub-agent calls user__ask → parent gets notification
|
||||
- [ ] Parent calls agent__reply_escalation → sub-agent unblocked, resumes
|
||||
- [ ] Multiple pending escalations shown in notification
|
||||
- [ ] Max depth enforcement — sub-agent spawn rejected beyond max_agent_depth
|
||||
|
||||
## 13. REPL — RAG
|
||||
|
||||
- [ ] `.rag <name>` — initializes/loads RAG
|
||||
- [ ] `.info rag` — shows RAG info
|
||||
- [ ] `.sources rag` — shows citation sources
|
||||
- [ ] `.edit rag-docs` — modify RAG documents
|
||||
- [ ] `.rebuild rag` — rebuilds RAG index
|
||||
- [ ] `.exit rag` — exits RAG
|
||||
- [ ] RAG embeddings used in chat (search results included)
|
||||
|
||||
## 14. REPL — MCP Servers
|
||||
|
||||
- [ ] MCP servers start at REPL init (if globally enabled)
|
||||
- [ ] `.set enabled_mcp_servers <name>` — changes active servers
|
||||
- [ ] `.set mcp_server_support true/false` — toggles support
|
||||
- [ ] MCP tool invocation works (mcp__invoke_<server>)
|
||||
- [ ] MCP tool search works (mcp__search_<server>)
|
||||
- [ ] MCP tool describe works (mcp__describe_<server>)
|
||||
|
||||
## 15. REPL — Settings
|
||||
|
||||
- [ ] `.set temperature 0.5` — changes temperature
|
||||
- [ ] `.set top_p 0.9` — changes top_p
|
||||
- [ ] `.set model <name>` — changes model
|
||||
- [ ] `.set dry_run true` — enables dry run
|
||||
- [ ] `.set stream false` — disables streaming
|
||||
- [ ] `.set save true/false` — toggles save
|
||||
- [ ] `.set highlight true/false` — toggles highlighting
|
||||
- [ ] `.set save_session true/false/null` — changes session save behavior
|
||||
- [ ] `.set compression_threshold <n>` — changes threshold
|
||||
|
||||
## 16. REPL — Tab Completion
|
||||
|
||||
- [ ] `.role<TAB>` — shows role names (no hidden files)
|
||||
- [ ] `.agent<TAB>` — shows agent names (no `.shared` directory)
|
||||
- [ ] `.session<TAB>` — shows session names
|
||||
- [ ] `.rag<TAB>` — shows RAG names
|
||||
- [ ] `.macro<TAB>` — shows macro names
|
||||
- [ ] `.model<TAB>` — shows model names with descriptions
|
||||
- [ ] `.set <TAB>` — shows setting names
|
||||
- [ ] `.set temperature <TAB>` — shows current value
|
||||
- [ ] `.set enabled_tools <TAB>` — shows tool names
|
||||
- [ ] `.set enabled_mcp_servers <TAB>` — shows server names
|
||||
|
||||
## 17. REPL — Delete
|
||||
|
||||
- [ ] `.delete role <name>` — deletes role
|
||||
- [ ] `.delete session <name>` — deletes session
|
||||
- [ ] `.delete rag <name>` — deletes RAG
|
||||
- [ ] `.delete macro <name>` — deletes macro
|
||||
- [ ] `.delete agent-data <name>` — deletes agent data
|
||||
|
||||
## 18. REPL — Vault
|
||||
|
||||
- [ ] `.vault list` — lists secrets
|
||||
- [ ] `.vault add <name>` — adds secret
|
||||
- [ ] `.vault get <name>` — retrieves secret
|
||||
- [ ] `.vault update <name>` — updates secret
|
||||
- [ ] `.vault delete <name>` — deletes secret
|
||||
|
||||
## 19. REPL — Prelude
|
||||
|
||||
- [ ] `repl_prelude: "role:coder"` — auto-loads role on REPL start
|
||||
- [ ] `repl_prelude: "session:mysession"` — auto-loads session
|
||||
- [ ] `repl_prelude: "mysession:coder"` — auto-loads session with role
|
||||
|
||||
## 20. REPL — Miscellaneous
|
||||
|
||||
- [ ] `.help` — shows help text
|
||||
- [ ] `.info` — shows system info
|
||||
- [ ] `.authenticate` — OAuth flow (if configured)
|
||||
- [ ] `.file <path>` — includes file in next message
|
||||
- [ ] `.file <url>` — fetches URL content
|
||||
- [ ] Unknown command — shows error message
|
||||
- [ ] Multi-line input (:::) — works correctly
|
||||
- [ ] Ctrl+O — opens editor for input buffer
|
||||
|
||||
## 21. Session Compression & Autoname
|
||||
|
||||
- [ ] Session auto-compression triggers when threshold exceeded
|
||||
- [ ] Compression message shown ("Compressing the session.")
|
||||
- [ ] Session auto-naming triggers for new sessions
|
||||
- [ ] Auto-continuation after compression works (agent resumes)
|
||||
|
||||
## 22. Error Handling
|
||||
|
||||
- [ ] Invalid role name — error shown, REPL continues
|
||||
- [ ] Invalid model name — error shown, REPL continues
|
||||
- [ ] Network error during chat — error shown, REPL continues
|
||||
- [ ] MCP server crash — error shown, REPL continues
|
||||
- [ ] Tool execution failure — error returned to LLM as tool result
|
||||
|
||||
---
|
||||
|
||||
## Phase-specific notes
|
||||
|
||||
### Phase 1 (Steps 3-10): Config split into AppState + RequestContext
|
||||
|
||||
Known bridge-window limitations (acceptable until Steps 9-10):
|
||||
- `ReplCompleter`/`ReplPrompt` still hold `GlobalConfig`
|
||||
- `Input` still holds `GlobalConfig` internally
|
||||
- `eval_tool_calls` still takes `&GlobalConfig`
|
||||
- Dual sync (`sync_ctx_to_config`/`sync_config_to_ctx`) required
|
||||
|
||||
### Post-Phase 1 verification focus:
|
||||
- All items above should work identically to pre-refactor behavior
|
||||
- No new warnings or errors in build
|
||||
- Performance should be equivalent (no observable slowdown)
|
||||
@@ -1,307 +0,0 @@
|
||||
# RAG
|
||||
Retrieval Augmented Generation (RAG) is a method of minimizing LLM hallucinations and extending the model's context
|
||||
without consuming a significant portion of the context length. It uses documents and other additional resources that you
|
||||
provide to give the model more context for all of your prompts.
|
||||
|
||||
Loki has a built-in vector database and full-text search engine to support RAG knowledge bases for your queries.
|
||||
|
||||
The generated knowledge bases are stored in the `rag` subdirectory of your Loki configuration directory. The location of
|
||||
this directory varies by system, so you can use the following command to find your RAG directory:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'rags_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Usage](#usage)
|
||||
- [Persistent RAG](#persistent-rag)
|
||||
- [Ephemeral RAG](#ephemeral-rag)
|
||||
- [How It Works](#how-it-works)
|
||||
- [1. Build](#1-build)
|
||||
- [2. Lookup](#2-lookup)
|
||||
- [2a. Reranking (Optional)](#2a-reranking-optional)
|
||||
- [3. Prompt](#3-prompt)
|
||||
- [Supported Document Sources](#supported-document-sources)
|
||||
- [Document Loaders](#document-loaders)
|
||||
- [Document Loader Usage](#document-loader-usage)
|
||||
- [Advanced Customizations](#advanced-customizations)
|
||||
- [Embedding Model](#embedding-model)
|
||||
- [Reranker](#reranker)
|
||||
- [Chunk Size](#chunk-size)
|
||||
- [Trade-Offs](#chunk-size-trade-offs)
|
||||
- [Chunk Overlap](#chunk-overlap)
|
||||
- [Top K](#top-k)
|
||||
- [Trade-Offs](#top-k-trade-offs)
|
||||
- [RAG Template](#rag-template)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
There's two ways to use RAG in Loki: A persistent RAG that can be loaded on-demand for queries, and an ephemeral one for
|
||||
adding RAG to a single specific query.
|
||||
|
||||
### Persistent RAG
|
||||
In the REPL, persistent RAG is initialized via the `.rag` command:
|
||||
|
||||

|
||||
|
||||
The generated RAG is then saved to the `rag` subdirectory of the Loki configuration, and can then be loaded whenever you
|
||||
want that knowledge base via either `.rag <name>` or `loki --rag <RAG>`.
|
||||
|
||||
### Ephemeral RAG
|
||||
Short-lived RAG that is only used for a single session or query is loaded using `.file`/`--file`.
|
||||
|
||||
You can use it to either execute a prompt from a file, or for temporary RAG. The difference is the usage of the `--`
|
||||
separator. If you only specify a filename and no `--` separator, Loki will know to read the file contents and pass them
|
||||
as a query to the model. Otherwise, the `--` separator is read to indicate that this is the end of the list of documents
|
||||
to load into the ephemeral RAG, and what follows is the query to pass to the model.
|
||||
|
||||
```shell
|
||||
.file prompt.md # Read the file as a prompt
|
||||
.file %% -- translate the last reply to italian
|
||||
.file `git diff` -- generate a commit message
|
||||
```
|
||||
|
||||

|
||||
|
||||
Once the session ends, this RAG will no longer be accessible and is only visible to the current session.
|
||||
|
||||
#### The `%%` Document Type
|
||||
In addition to the usual documents that can be specified for persistent RAG, ephemeral RAG has a special `%%` value.
|
||||
This value references the content of the last reply. So you can use it like this:
|
||||
|
||||
```shell
|
||||
.file %% -- translate the last reply to italian
|
||||
```
|
||||
|
||||
The `--` indicates that this is the end of your documents and the beginning of your prompt.
|
||||
|
||||
#### The `cmd` Document Type
|
||||
Loki also lets you use command outputs for ephemeral RAG input. Simply enclose the command in backticks:
|
||||
|
||||
```shell
|
||||
.file `git diff` -- generate a commit message
|
||||
```
|
||||
|
||||
The `--` indicates that this is the end of your documents and the beginning of your prompt.
|
||||
|
||||
## How It Works
|
||||
#### 1. Build
|
||||
When you define RAG, Loki will first "build" the RAG. This means that Loki will consume the documents you specified and
|
||||
generate [embeddings](https://huggingface.co/spaces/hesamation/primer-llm-embedding) for that text. This essentially just means that Loki translates the document into a language
|
||||
the LLM can understand.
|
||||
|
||||
These embeddings are then stored in an in-memory vector database.
|
||||
|
||||
#### 2. Lookup
|
||||
Loki sits between you and the model. So when you submit a prompt to the model, before Loki ever sends it, it will first
|
||||
convert your prompt into embeddings (LLM language), and look for relevant snippets of text in the vector database.
|
||||
|
||||
Loki then passes the top `n`-snippets of text that it finds in the vector database as additional context to the model
|
||||
before your prompt.
|
||||
|
||||
#### 2a. Reranking (Optional)
|
||||
The lookup for relevant snippets of texts uses embeddings to find text that is semantically similar to your prompt, and
|
||||
returns the top `n`-results. This often works fairly well, however these top results aren't always the most relevant for
|
||||
answering the specific query.
|
||||
|
||||
Reranking improves these initial results (say, the top 20-100 text snippets) and re-scores them using a more
|
||||
sophisticated model. The reranker model will rank documents by their actual usefulness for answering the query to ensure
|
||||
the most relevant context is passed to the model alongside your query.
|
||||
|
||||
This reranking model can be customized for each RAG you build in Loki. See the [Custom Reranker](#reranker) section
|
||||
below for more details on how to customize this.
|
||||
|
||||
#### 3. Prompt
|
||||
Finally, the text snippets that were looked up in RAG are passed to the model as additional context to your prompt,
|
||||
giving the model query-specific context to answer your question.
|
||||
|
||||
## Supported Document Sources
|
||||
Loki supports a number of document sources that can be used for RAG:
|
||||
|
||||
| Source | Example | Comments |
|
||||
|--------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| Files | `/tmp/dir1/file1;/tmp/dir1/file2` | |
|
||||
| Directory | `/tmp/dir` | Picks up all files in a directory and all its subdirectories |
|
||||
| Directory (extensions} | `/tmp/dir2/**/*.{md,txt}` | Finds all files in all subdirectories with the specified extensions |
|
||||
| Recursive Filename | `/tmp/*/LOKI.md` | The following files will be picked up: <br><ul><li>`/tmp/dir1/LOKI.md`</li><li>`/tmp/dir2/subdir1/LOKI.md`</li><li>`/tmp/dir2/subdir2/LOKI.md`</li></ul> |
|
||||
| URL | `https://www.ohdsi.org/data-standardization/` | Downloads and loads the specified webpage into the <br>knowledge base |
|
||||
| Recursive URL (Websites) | `https://github.com/OHDSI/Vocabulary-v5.0/wiki/**` | Crawls all pages under the given URL and loads them <br>into the knowledge base |
|
||||
| Document Loader (custom) | `jina:https://cloud.google.com/bigquery/docs/reference/standard-sql/` | Use a custom document loader to parse the given document |
|
||||
|
||||
## Document Loaders
|
||||
Loki only has built-in support for loading text files. But that functionality can be extended to read all kinds of files
|
||||
into your knowledge bases. These custom loaders are used by both RAG and for documents specified using the
|
||||
`.file`/`--file` flags.
|
||||
|
||||
In the global configuration file, you can specify loaders for specific document types using the `document_loaders`
|
||||
setting. Each loader is defined by specifying a name and then a command that Loki will execute to load the document.
|
||||
|
||||
The following variables are interpolated at runtime by Loki and can be used as placeholders in your command definitions:
|
||||
* `$1` (Required) - The input file
|
||||
* `$2` (Optional) - The output file. If omitted, `stdout` is used as the output destination
|
||||
|
||||
**Note:** It is your responsibility to ensure that any tools used to parse documents into text that Loki can read are
|
||||
installed on your system and are available on your `$PATH`. Loki does not have any built-in way of installing
|
||||
dependencies for document loaders for you.
|
||||
|
||||
The following are some example loaders:
|
||||
```yaml
|
||||
document_loaders:
|
||||
pdf: 'pdftotext $1 -' # Use pdftotext to convert a PDF file to text
|
||||
# (see https://poppler.freedesktop.org for details on how to install pdftotext)
|
||||
docx: 'pandoc --to plain $1' # Use pandoc to convert a .docx file to text
|
||||
# (see https://pandoc.org for details on how to install pandoc)
|
||||
jina: 'curl -fsSL https://r.jina.ai/$1 -H "Authorization: Bearer {{JINA_API_KEY}}' # Use Jina to translate a website into text;
|
||||
# Requires a Jina API key to be added to the Loki vault
|
||||
git: > # Use yek to load a git repository into the knowledgebase (https://github.com/bodo-run/yek)
|
||||
sh -c "yek $1 --json | jq 'map({ path: .filename, contents: .content })'"
|
||||
```
|
||||
|
||||
### Document Loader Usage
|
||||
Once you have your loaders defined, you can specify when Loki should use them by prefixing any RAG file/directory/URI
|
||||
with the name of the loader.
|
||||
|
||||
**Example: Load a git repo into RAG**
|
||||

|
||||
|
||||
**Example: Use pdf loader for ephemeral RAG**
|
||||
```shell
|
||||
$ loki --file pdf:some-file.pdf
|
||||
```
|
||||
|
||||
## Advanced Customizations
|
||||
For those familiar with RAG, Loki exposes a handful of advanced global settings that can be used to tweak your default
|
||||
RAG configurations.
|
||||
|
||||
### Embedding Model
|
||||
When Loki queries your RAG knowledge bases, it needs to first convert your query into embeddings. By default, Loki uses
|
||||
the same embedding model that was used to create the knowledge base in the first place.
|
||||
|
||||
This can be customized to any other embedding model available in your configured clients by setting the
|
||||
`rag_embedding_model` setting in your global Loki configuration file:
|
||||
|
||||
```yaml
|
||||
rag_embedding_model: null # Specifies the embedding model used for context retrieval
|
||||
```
|
||||
|
||||
### Reranker
|
||||
By default, Loki uses [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/docs/reference/elasticsearch/rest-apis/reciprocal-rank-fusion) to merge vector and keyword search results.
|
||||
|
||||
You can change the default reranker model to any other reranking model in your configured clients. To change the default
|
||||
reranker model, simply change the value of the `rag_reranker_model` setting in your global configuration file:
|
||||
|
||||
```yaml
|
||||
rag_reranker_model: null # By default,
|
||||
```
|
||||
|
||||
### Chunk Size
|
||||
In the context of RAG, the chunk size is the maximum length of each text chunk (measured in characters) that is created
|
||||
when splitting documents. In Loki, this defaults to `2000` characters.
|
||||
|
||||
You can specify a different global default by setting the `rag_chunk_size` property in your global configuration file:
|
||||
|
||||
```yaml
|
||||
rag_chunk_size: null # Defines the size of chunks for document processing in characters
|
||||
```
|
||||
|
||||
#### Chunk Size Trade-Offs
|
||||
Keep in mind the following trade-offs when changing the chunk size:
|
||||
|
||||
* **Smaller chunks (e.g. 256 characters):** More precise retrieval, better semantic focus, but may lack context or split
|
||||
important information
|
||||
* **Larger chunks (e.g. 1024 characters):** More context preserved, fewer chunks to manage, but less precise matching
|
||||
and more noise in retrieved document
|
||||
|
||||
### Chunk Overlap
|
||||
Chunk overlap in RAG is the number of characters that overlap between consecutive chunks to maintain continuity.
|
||||
|
||||
---
|
||||
|
||||
**Example:** If the following sentence is cut off at the end of one chunk
|
||||
|
||||
`I was doing fine until someone brought up`
|
||||
|
||||
You'll ideally want that full sentence to be picked up at the beginning of the next chunk to make sure the full meaning
|
||||
is captured. So in this example, if your chunk overlap is 42 characters, then the start of the next chunk would look
|
||||
like this:
|
||||
|
||||
`I was doing fine until someone brought up the game. <next sentence>`
|
||||
|
||||
---
|
||||
|
||||
Often, this value is 10%-20% of the chunk size.
|
||||
|
||||
By default, in Loki, this value is 5% the chunk size. You can override this and specify the default chunk overlap (in
|
||||
characters) that Loki should use as a global default by setting the `rag_chunk_overlap` property in the global Loki
|
||||
configuration file:
|
||||
|
||||
```yaml
|
||||
rag_chunk_overlap: null # Defines the overlap between chunks
|
||||
```
|
||||
|
||||
### Top K
|
||||
In RAG, `top_k` represents the top `k`-chunks to return from the vector database query. Think of it like if you search
|
||||
something on Google and only care about the top 10 results, that's what you'll use for your context.
|
||||
|
||||
In Loki, the default value for this is `5`. You can customize this global default by setting the `rag_top_k` property in
|
||||
your global configuration file:
|
||||
|
||||
```yaml
|
||||
rag_top_k: 5 # Specifies the number of documents to retrieve for answering queries
|
||||
```
|
||||
|
||||
#### Top K Trade-Offs
|
||||
When customizing this value, keep in mind the following trade-offs so you get the best performance:
|
||||
|
||||
* **Lower top_k (e.g. 3):** Faster, more focused context, lower cost, but risks missing relevant information
|
||||
* **Higher top_k (e.g. 10):** More comprehensive coverage, but more noise, higher latency, increased token costs, and
|
||||
potential context window constraints
|
||||
|
||||
### RAG Template
|
||||
When you use RAG in Loki, after Loki performs the lookup for relevant chunks of text to add as context to your query, it
|
||||
will add the retrieved text chunks as context to your query before sending it to the model. The format of this context
|
||||
is determined by the `rag_template` setting in your global Loki configuration file.
|
||||
|
||||
This template utilizes three placeholders:
|
||||
* `__INPUT__`: The user's actual query
|
||||
* `__CONTEXT__`: The context retrieved from RAG
|
||||
* `__SOURCES__`: A numbered list of the source file paths or URLs that the retrieved context came from
|
||||
|
||||
These placeholders are replaced with the corresponding values into the template and make up what's actually passed to
|
||||
the model at query-time. The `__SOURCES__` placeholder enables the model to cite which documents its answer is based on,
|
||||
which is especially useful when building knowledge-base assistants that need to provide verifiable references.
|
||||
|
||||
The default template that Loki uses is the following:
|
||||
|
||||
```text
|
||||
Answer the query based on the context while respecting the rules. (user query, some textual context and rules, all inside xml tags)
|
||||
|
||||
<context>
|
||||
__CONTEXT__
|
||||
</context>
|
||||
|
||||
<sources>
|
||||
__SOURCES__
|
||||
</sources>
|
||||
|
||||
<rules>
|
||||
- If you don't know, just say so.
|
||||
- If you are not sure, ask for clarification.
|
||||
- Answer in the same language as the user query.
|
||||
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
||||
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
||||
- Answer directly and without using xml tags.
|
||||
- When using information from the context, cite the relevant source from the <sources> section.
|
||||
</rules>
|
||||
|
||||
<user_query>
|
||||
__INPUT__
|
||||
</user_query>
|
||||
```
|
||||
|
||||
You can customize this template by specifying the `rag_template` setting in your global Loki configuration file. Your
|
||||
template *must* include both the `__INPUT__` and `__CONTEXT__` placeholders in order for it to be valid. The
|
||||
`__SOURCES__` placeholder is optional. If it is omitted, source references will not be included in the prompt.
|
||||
@@ -1,117 +0,0 @@
|
||||
# Customize REPL Prompt
|
||||
|
||||
The prompt you see when you start the Loki REPL can be customized to your liking. This is achieved via the `left_prompt`
|
||||
and `right_prompt` settings in the global Loki configuration file:
|
||||
|
||||
```yaml
|
||||
left_prompt: '{color.red}{model}){color.green}{?session {?agent {agent}>}{session}{?role /}}{!session {?agent {agent}>}}{role}{?rag @{rag}}{color.cyan}{?session )}{!session >}{color.reset} '
|
||||
right_prompt: '{color.purple}{?session {?consume_tokens {consume_tokens}({consume_percent}%)}{!consume_tokens {consume_tokens}}}{color.reset}'
|
||||
```
|
||||
|
||||
The location of the global configuration file differs between systems, so you can use the following command to find your
|
||||
global configuration file's location:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'config_file' | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Syntax](#syntax)
|
||||
- [Variables](#variables)
|
||||
<!--toc:end-->
|
||||
|
||||
## Syntax
|
||||
The syntax for the prompts consists of plain text and templates contained in `{...}`. The plain text is
|
||||
printed exactly as given.
|
||||
|
||||
The syntax for the templates `{...}` is as follows:
|
||||
|
||||
* `{variable}` - Replaced with the value of `variable`
|
||||
* `{?variable <template>}` - Evaluate the `<template>` when `variable` is evaluated to `true`
|
||||
* `{!variable <template>}` - Evaluate the `<template>` when `variable` is evaluated to `false`
|
||||
|
||||
Where a `<template>` is another expression consisting of plain text and/or more special computations inside `{...}`.
|
||||
|
||||
Variables are evaluated to also be "truthy"; that is, if a variable is undefined, it is considered to be the exact same
|
||||
as if that variable's value was `false`.
|
||||
|
||||
**Example 1: Simple Boolean Usage**
|
||||
For the prompt `{?variable yay}{!variable boo}`, if `variable=true`, then the output will be
|
||||
```
|
||||
yay
|
||||
```
|
||||
|
||||
And if `variable=false`:
|
||||
```
|
||||
boo
|
||||
```
|
||||
|
||||
**Example 2: Nested Expressions**
|
||||
For the prompt `{?variable {!variable2 yay}>}`, and assuming
|
||||
* `variable=true`
|
||||
* `variable2=false`
|
||||
the output will be
|
||||
```
|
||||
yay>
|
||||
```
|
||||
|
||||
If `variable2=true`, the output will be empty.
|
||||
|
||||
If `variable=false`, the output will be empty.
|
||||
|
||||
## Variables
|
||||
The following variables and output modifiers are available to you when you're creating your prompts:
|
||||
|
||||
```yaml
|
||||
# Model Variables
|
||||
model: openai:gpt-4 # The active model's full name
|
||||
client_name: openai # The name of the client serving the active model
|
||||
model_name: gpt-4 # The aliased name of the active model
|
||||
max_input_tokens: 4096 # The maximum number of input tokens for the active model
|
||||
|
||||
# Configuration Variables
|
||||
temperature: 1.0 # The temperature for the active model
|
||||
top_p: 0.9 # The top_p for the active model
|
||||
dry_run: true # Whether the given command is flagged to be a dry run
|
||||
stream: false # Whether streaming responses are enabled
|
||||
save: true # Whether shell history is saved
|
||||
wrap: 120 # The number of characters to allow before wrapping around output to the next line
|
||||
|
||||
# Role Variables
|
||||
role: code # The active role
|
||||
|
||||
# Session Variables
|
||||
session: temp # The name of the active session
|
||||
dirty: false # Whether the session settings have been updated but not persisted
|
||||
consume_tokens: 200 # The number of tokens consumed
|
||||
consume_percent: 1% # The percentage of tokens consumed to the maximum input tokens
|
||||
user_messages_len: 0 # The total number of sent user messages
|
||||
|
||||
# RAG Variables
|
||||
rag: temp # The name of the active RAG
|
||||
|
||||
# Agent Variables
|
||||
agent: todo-sh # The name of the active agent
|
||||
|
||||
# ANSI COLORS
|
||||
color.reset:
|
||||
color.black:
|
||||
color.dark_gray:
|
||||
color.red:
|
||||
color.light_red:
|
||||
color.green:
|
||||
color.light_green:
|
||||
color.yellow:
|
||||
color.light_yellow:
|
||||
color.blue:
|
||||
color.light_blue:
|
||||
color.purple:
|
||||
color.light_purple:
|
||||
color.magenta:
|
||||
color.light_magenta:
|
||||
color.cyan:
|
||||
color.light_cyan:
|
||||
color.white:
|
||||
color.light_gray:
|
||||
```
|
||||
@@ -1,260 +0,0 @@
|
||||
# Loki REPL Guide
|
||||
In addition to being a CLI, Loki also has a built-in REPL (Read-Execute-Print-Loop). This enables users to quickly try
|
||||
out prompts, commands, configurations, and everything in between without having to modify the same command every time.
|
||||
|
||||
You can enter the REPL by simply typing `loki` without any follow-up flags or arguments.
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Features](#features)
|
||||
- [REPL Commands](#repl-commands)
|
||||
- [`.model` - Change the current LLM](#model---change-the-current-llm)
|
||||
- [`.role` - Role management](#role---role-management)
|
||||
- [`.prompt` - Set a temporary role using a prompt](#prompt---set-a-temporary-role-using-a-prompt)
|
||||
- [`.session` - Session management](#session---session-management)
|
||||
- [`.agent` - Chat with an AI agent](#agent---chat-with-an-ai-agent)
|
||||
- [`.rag` - Chat with documents](#rag---chat-with-documents)
|
||||
- [`.macro` - Execute a macro](#macro---execute-a-macro)
|
||||
- [`.file` - Read files and use them as input](#file---read-files-and-use-them-as-input)
|
||||
- [`.vault` - Manage the Loki vault](#vault---manage-the-loki-vault)
|
||||
- [`.continue` - Continue the previous response](#continue---continue-the-previous-response)
|
||||
- [`.regenerate` - Regenerate the last response](#regenerate---regenerate-the-last-response)
|
||||
- [`.copy` - Copy the last response to your clipboard](#copy---copy-the-last-response-to-your-clipboard)
|
||||
- [`.set` - Adjust runtime settings](#set---adjust-runtime-settings)
|
||||
- [`.edit` - Modify configuration files](#edit---modify-configuration-files)
|
||||
- [`.delete` - Delete configurations from Loki](#delete---delete-configurations-from-loki)
|
||||
- [`.info` - Display information about the current mode](#info---display-information-about-the-current-mode)
|
||||
- [`.authenticate` - Authenticate the current model client via OAuth](#authenticate---authenticate-the-current-model-client-via-oauth)
|
||||
- [`.exit` - Exit an agent/role/session/rag or the Loki REPL itself](#exit---exit-an-agentrolesessionrag-or-the-loki-repl-itself)
|
||||
- [`.help` - Show the help guide](#help---show-the-help-guide)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
The REPL has features that are intended to make your Loki experience as easy and as enjoyable as possible! This includes
|
||||
things like
|
||||
|
||||
* **Tab Autocompletion:** Every command in the REPL (i.e. everything that starts with a `.`) has fuzzy search auto
|
||||
completions.
|
||||
* `.<tab>` to complete REPL commands
|
||||
* `.model <tab>` to complete chat models
|
||||
* `.set <tab>` to complete configuration keys
|
||||
* `.set key <tab>` to complete configuration values
|
||||
* **Multi-Line Prompts:** You can also type prompts that span more than one line to help organize your thoughts. This
|
||||
can be done in the following ways:
|
||||
* `Ctrl-o` to open the current input buffer in your preferred editor (either the value of `editor` or `$EDITOR`)
|
||||
* You can paste multi-line text
|
||||
* You can type `:::` to start multi-line editing, and use `:::` to finish it.
|
||||
* And finally, you can use hotkeys like `{ctrl/shift/alt}+enter` or `ctrl-j` to insert a new line directly in the
|
||||
REPL.
|
||||
* **History Search** Press `ctrl+r` to search the REPL history, and navigate it with `↑↓`
|
||||
* **Configurable Keybindings:** You can switch between `emacs` style keybindings or `vi` style keybindings
|
||||
* [**Custom REPL Prompt:**](./REPL-PROMPT.md) You can even customize the REPL prompt to display information about the
|
||||
current context in the prompt
|
||||
* **Built-in user interaction tools:** When function calling is enabled in the REPL, the `user__ask`, `user__confirm`,
|
||||
`user__input`, and `user__checkbox` tools are always available for interactive prompts. These are not injected in the
|
||||
one-shot CLI mode.
|
||||
|
||||
---
|
||||
|
||||
## REPL Commands
|
||||
All REPL commands begin with a `.` to indicate that they're not part of a prompt. The following list details the
|
||||
commands available in Loki:
|
||||
|
||||
### `.model` - Change the current LLM
|
||||
When browsing models in the REPL, use the following legend to understand the purpose of each column in the model table:
|
||||
```
|
||||
openai:gpt-4o 128000 / 4096 | 5 / 15 👁 ⚒
|
||||
| | | | | | └─ supports function calling
|
||||
| | | | | └─ support vision (multi-modal)
|
||||
| | | | └─ output price ($/1M)
|
||||
| | | └─ input price ($/1M)
|
||||
| | |
|
||||
| | └─ max output tokens
|
||||
| └─ max input tokens
|
||||
└─ model id
|
||||
```
|
||||

|
||||
|
||||
For more information about how to add models to Loki, refer to the [clients documentation](./clients/CLIENTS.md).
|
||||
|
||||
### `.role` - Role management
|
||||
Loki offers the following commands to manage your roles:
|
||||
|
||||
| Command | Description |
|
||||
|--------------|-------------------------------------------------------------------------|
|
||||
| `.role` | Create or switch to a role |
|
||||
| `.info role` | Show information about the active role |
|
||||
| `.edit role` | Open the active role's configuration file in your preferred text editor |
|
||||
| `.save role` | Save the active role and its configurations to a configuration file |
|
||||
| `.exit role` | Exit the active role |
|
||||
|
||||

|
||||
|
||||
For more information about roles in Loki and how to build them, refer to the [roles documentation](./ROLES.md).
|
||||
|
||||
### `.prompt` - Set a temporary role using a prompt
|
||||
If you need to create a temporary role that you want to discard after use, you use `.prompt`. `.prompt`-based roles
|
||||
cannot be persisted to a file and saved.
|
||||
|
||||

|
||||
|
||||
### `.session` - Session management
|
||||
Use the following commands to manage sessions in Loki:
|
||||
|
||||
| Command | Description |
|
||||
|---------------------|---------------------------------------------------------------------------------------------|
|
||||
| `.session` | Start or switch to a session |
|
||||
| `.empty session` | Clear all messages for the active session |
|
||||
| `.compress session` | Compress the session messages using the `summarization_prompt` setting in the global config |
|
||||
| `.info session` | Display information about the active session |
|
||||
| `.edit session` | Open the active session's configuration in your preferred text editor |
|
||||
| `.save session` | Save the active session to a `session` configuration file |
|
||||
| `.exit session` | Exit the active session |
|
||||
|
||||

|
||||
|
||||
For more information on sessions and how to use them in Loki, refer to the [sessions documentation](./SESSIONS.md).
|
||||
|
||||
### `.agent` - Chat with an AI agent
|
||||
Loki lets you build OpenAI GPT-style agents. The following commands let you interact with and manage your agents in
|
||||
Loki:
|
||||
|
||||
| Command | Description |
|
||||
|----------------------|-----------------------------------------------------------------------------------------------|
|
||||
| `.agent` | Use an agent |
|
||||
| `.starter` | Display and use conversation starters for the active agent |
|
||||
| `.clear todo` | Clear the todo list and stop auto-continuation (requires `auto_continue: true` on the agent) |
|
||||
| `.edit agent-config` | Open the agent configuration in your preferred text editor |
|
||||
| `.info agent` | Display information about the active agent |
|
||||
| `.exit agent` | Leave the active agent |
|
||||
|
||||

|
||||
|
||||
For more information on agents in Loki and how to create them, refer to the [agents documentation](./AGENTS.md).
|
||||
|
||||
### `.rag` - Chat with documents
|
||||
RAG (Retrieval Augmented Generation) enables you to load documents into the LLM so you can ask questions about it or
|
||||
complete tasks using the documents as additional context.
|
||||
|
||||
| Command | Description |
|
||||
|------------------|------------------------------------------------------------------------------|
|
||||
| `.rag` | Initialize or access a RAG |
|
||||
| `.edit rag-docs` | Add or remove documents from the active RAG using your preferred text editor |
|
||||
| `.rebuild rag` | Rebuild the active RAG to accommodate document changes |
|
||||
| `.sources rag` | Show a works-cited of the sources used in the last query |
|
||||
| `.info rag` | Display information about the active RAG |
|
||||
| `.exit rag` | Exit the active RAG |
|
||||
|
||||

|
||||
|
||||
For more information about RAG in Loki and how to utilize it, refer to the [rag documentation](./RAG.md).
|
||||
|
||||
### `.macro` - Execute a macro
|
||||
Macros in Loki are like "scripts" of commands that can be run in isolated environments; that means they do not use any
|
||||
active settings and use the same settings they had when written. They are created/executed using the `.macro <name>`
|
||||
command.
|
||||
|
||||

|
||||
|
||||
For more information on macros in Loki and how to create them, refer to the [macros documentation](./MACROS.md).
|
||||
|
||||
### `.file` - Read files and use them as input
|
||||
Loki lets you specify any number of documents that you can load and use as ephemeral RAG to chat with the LLM. To see
|
||||
what files or values you can pass to it, simply run the command `.file` with no arguments:
|
||||
|
||||
```shell
|
||||
openai:gpt-4o)> .file
|
||||
Usage: .file <file|dir|url|%%|cmd>... [-- <text>...]
|
||||
```
|
||||
|
||||

|
||||
|
||||
For more information about ephemeral RAG, refer to the [ephemeral RAG documentation](./RAG.md#ephemeral-rag).
|
||||
|
||||
### `.vault` - Manage the Loki vault
|
||||
The Loki vault lets users store sensitive secrets and credentials securely so that there's no plaintext secrets
|
||||
anywhere in your configurations.
|
||||
|
||||

|
||||
|
||||
For more information about the Loki vault, refer to the [vault documentation](./VAULT.md).
|
||||
|
||||
### `.continue` - Continue the previous response
|
||||
When you have a response that exceeds the context length, you can use the `.continue` command to continue the generation
|
||||
of the last response.
|
||||
|
||||

|
||||
|
||||
### `.regenerate` - Regenerate the last response
|
||||
If ever your response is interrupted, or you want to try generating it again, you can use the `.regenerate` command to do
|
||||
this without having to retype your query:
|
||||
|
||||

|
||||
|
||||
### `.copy` - Copy the last response to your clipboard
|
||||
If you're trying to copy the last response (like copying some code), you can use the `.copy` command to copy the entire
|
||||
last response to your system clipboard:
|
||||
|
||||

|
||||
|
||||
### `.set` - Adjust runtime settings
|
||||
You can use `.set` to adjust select settings at runtime. This is useful when you're experimenting with settings and want
|
||||
to know how they'll affect Loki. To persist the changes you make, be sure to update them in the global configuration
|
||||
file.
|
||||
|
||||

|
||||
|
||||
### `.edit` - Modify configuration files
|
||||
The `.edit` command lets you modify configuration files for the current mode of the REPL. It will open the selected
|
||||
configuration in your preferred text editor. It lets you modify the following configurations:
|
||||
|
||||
* `.edit config` - Modify the global configuration
|
||||
* `.edit role` - Modify the active role's configuration
|
||||
* `.edit session` - Modify the active session's configuration
|
||||
* `.edit agent-config` - Modify the active agent's configuration
|
||||
* `.edit rag-docs` - Add or remove documents from the active RAG
|
||||
|
||||
### `.delete` - Delete configurations from Loki
|
||||
The `.delete` command allows you to delete entities in Loki without having to directly run `rm -rf` on the configuration
|
||||
directory or file corresponding to the target entity. You can use it to delete the following entities:
|
||||
|
||||
* `.delete role` - Delete select roles
|
||||
* `.delete session` - Delete select sessions
|
||||
* `.delete macro` - Delete select macros
|
||||
* `.delete rag` - Delete select RAGs
|
||||
* `.delete agent-data` - Delete select agent's configurations and all tools
|
||||
|
||||
### `.info` - Display information about the current mode
|
||||
The `.info` command provides useful information about different modes that Loki may be operating in. It's helpful if you
|
||||
want a quick understanding of the system info, a role's configuration, an agent's configuration, etc.
|
||||
|
||||
The following entities are supported:
|
||||
|
||||
| Command | Description |
|
||||
|-----------------|-------------------------------------------------------------|
|
||||
| `.info` | Display system information (identical to the `--info` flag) |
|
||||
| `.info role` | Display information about the active role |
|
||||
| `.info session` | Display information about the active session |
|
||||
| `.info agent` | Display information about the active agent |
|
||||
| `.info rag` | Display information about the active RAG |
|
||||
|
||||
### `.authenticate` - Authenticate the current model client via OAuth
|
||||
The `.authenticate` command will start the OAuth flow for the current model client if
|
||||
* The client supports OAuth (See the [clients documentation](./clients/CLIENTS.md#providers-that-support-oauth) for supported clients)
|
||||
* The client is configured in your Loki configuration to use OAuth via the `auth: oauth` property
|
||||
|
||||
### `.exit` - Exit an agent/role/session/rag or the Loki REPL itself
|
||||
The `.exit` command is used to move between modes in the Loki REPL.
|
||||
|
||||
| Command | Description |
|
||||
|-----------------|-------------------------|
|
||||
| `.exit role` | Exit the active role |
|
||||
| `.exit session` | Exit the active session |
|
||||
| `.exit agent` | Exit the active agent |
|
||||
| `.exit rag` | Exit the active RAG |
|
||||
| `.exit` | Exit the Loki REPL |
|
||||
|
||||
### `.help` - Show the help guide
|
||||
Just like with any shell or REPL, you sometimes need a little help and want to know what commands are available to you.
|
||||
That's when you use the `.help` command.
|
||||
@@ -1,572 +0,0 @@
|
||||
# Architecture Plan: Loki REST API Service Mode
|
||||
|
||||
## The Core Problem
|
||||
|
||||
Today, Loki's `Config` struct is a god object — it holds both server-wide configuration (LLM providers, vault, tool definitions) and per-interaction mutable state (current role, session, agent, supervisor, inbox, tool tracker) in one `Arc<RwLock<Config>>`. CLI and REPL both mutate this singleton directly. Adding a third interface (REST API) that handles concurrent users makes this untenable.
|
||||
|
||||
## Design Pattern: Engine + Context + Emitter
|
||||
|
||||
The refactor splits Loki into three layers:
|
||||
|
||||
```
|
||||
┌─────────┐ ┌─────────┐ ┌─────────┐
|
||||
│ CLI │ │ REPL │ │ API │ ← Thin adapters (frontends)
|
||||
└────┬────┘ └────┬────┘ └────┬────┘
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────────────────────┐
|
||||
│ RunRequest + Emitter │ ← Uniform request shape
|
||||
└──────────────┬───────────────┘
|
||||
▼
|
||||
┌──────────────────────────────┐
|
||||
│ Engine::run() │ ← Single core entrypoint
|
||||
│ (input → messages → LLM │
|
||||
│ → tool loop → events) │
|
||||
└──────────────┬───────────────┘
|
||||
│
|
||||
┌────────────┼────────────┐
|
||||
▼ ▼ ▼
|
||||
AppState RequestContext SessionStore
|
||||
(global, (per-request, (file-backed,
|
||||
immutable) mutable) per-session lock)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Split Config → AppState (global) + RequestContext (per-request)
|
||||
|
||||
### AppState — created once at startup, wrapped in `Arc`, never mutated during requests:
|
||||
|
||||
```rust
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub config: Arc<AppConfig>, // deserialized config.yaml (frozen)
|
||||
pub providers: ProviderRegistry, // LLM client configs + OAuth tokens
|
||||
pub vault: Arc<VaultService>, // encrypted credential storage (internal locking)
|
||||
pub tools: Arc<ToolRegistry>, // tool definitions, function dirs, visible_tools
|
||||
pub mcp_global: Arc<McpGlobalConfig>, // global MCP settings (not live instances)
|
||||
pub sessions: Arc<dyn SessionStore>, // file-backed session persistence
|
||||
pub rag_defaults: RagDefaults, // embedding model, chunk size, etc.
|
||||
}
|
||||
```
|
||||
|
||||
### RequestContext — created per CLI invocation, per REPL turn, or per API request:
|
||||
|
||||
```rust
|
||||
pub struct RequestContext {
|
||||
pub app: Arc<AppState>, // borrows global state
|
||||
pub request_id: Uuid,
|
||||
pub mode: FrontendMode, // Cli | Repl | Api
|
||||
pub cancel: CancellationToken, // unified cancellation
|
||||
|
||||
// per-request mutable state (was on Config)
|
||||
pub session: SessionHandle,
|
||||
pub convo: ConversationState, // messages, last_message, tool_call_tracker
|
||||
pub agent: Option<AgentRuntime>, // supervisor, MCP instances, inbox, escalation
|
||||
pub overrides: Overrides, // model, role, rag, dry_run, etc.
|
||||
pub auth: Option<AuthContext>, // API-only; None for CLI/REPL
|
||||
}
|
||||
|
||||
pub struct Overrides {
|
||||
pub role: Option<String>,
|
||||
pub model: Option<String>,
|
||||
pub rag: Option<RagConfig>,
|
||||
pub agent: Option<AgentSpec>,
|
||||
pub dry_run: bool,
|
||||
pub macro_mode: bool,
|
||||
}
|
||||
```
|
||||
|
||||
### What changes for existing code
|
||||
|
||||
Every function that currently takes `&GlobalConfig` (i.e., `Arc<RwLock<Config>>`) and calls `.read()` / `.write()` gets refactored to take `&AppState` for reads and `&mut RequestContext` for mutations. The `config.write().set_model(...)` pattern becomes `ctx.overrides.model = Some(...)`.
|
||||
|
||||
### REPL special case
|
||||
|
||||
The REPL keeps a long-lived `RequestContext` that persists across turns (just like today's Config singleton does). State-changing dot-commands (`.model`, `.role`, `.session`) mutate the REPL's own context. This preserves current behavior exactly.
|
||||
|
||||
---
|
||||
|
||||
## 2. Unified Dispatch: The Engine
|
||||
|
||||
Instead of `start_directive()` in `main.rs` and `ask()` in `repl/mod.rs` being separate code paths, both call one core function:
|
||||
|
||||
```rust
|
||||
pub struct Engine {
|
||||
pub app: Arc<AppState>,
|
||||
pub agent_factory: Arc<dyn AgentFactory>,
|
||||
}
|
||||
|
||||
impl Engine {
|
||||
pub async fn run(
|
||||
&self,
|
||||
ctx: &mut RequestContext,
|
||||
req: RunRequest,
|
||||
emitter: &dyn Emitter,
|
||||
) -> Result<RunOutcome, CoreError> {
|
||||
// 1. Apply any CoreCommand (set role, model, session, etc.)
|
||||
// 2. Build Input from req.input + ctx (role messages, session history, RAG)
|
||||
// 3. Create LLM client from provider registry
|
||||
// 4. call_chat_completions[_streaming](), emitting events via emitter
|
||||
// 5. Tool result loop (recursive)
|
||||
// 6. Persist session updates
|
||||
// 7. Return outcome (session_id, message_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RunRequest {
|
||||
pub input: UserInput, // text, files, media
|
||||
pub command: Option<CoreCommand>, // normalized dot-command
|
||||
pub stream: bool,
|
||||
}
|
||||
|
||||
pub enum CoreCommand {
|
||||
SetRole(String),
|
||||
SetModel(String),
|
||||
StartSession { name: Option<String> },
|
||||
StartAgent { name: String, variables: HashMap<String, String> },
|
||||
Continue,
|
||||
Regenerate,
|
||||
CompressSession,
|
||||
Info,
|
||||
// ... one variant per REPL dot-command
|
||||
}
|
||||
```
|
||||
|
||||
### How frontends use it
|
||||
|
||||
| Frontend | Context lifetime | How it calls Engine |
|
||||
|---|---|---|
|
||||
| CLI | Single invocation, then exit | Creates `RequestContext`, calls `engine.run()` once, exits |
|
||||
| REPL | Long-lived across turns | Keeps `RequestContext`, calls `engine.run()` per line, dot-commands become `CoreCommand` variants |
|
||||
| API | Per HTTP request, but session persists | Loads `RequestContext` from `SessionStore` per request, calls `engine.run()`, persists back |
|
||||
|
||||
---
|
||||
|
||||
## 3. Output Abstraction: The Emitter Trait
|
||||
|
||||
The core never writes to stdout or formats JSON. It emits structured semantic events:
|
||||
|
||||
```rust
|
||||
pub enum Event<'a> {
|
||||
Started { request_id: Uuid, session_id: Uuid },
|
||||
AssistantDelta(&'a str), // streaming token
|
||||
AssistantMessageEnd { full_text: &'a str },
|
||||
ToolCall { name: &'a str, args: &'a str },
|
||||
ToolResult { name: &'a str, result: &'a str },
|
||||
Info(&'a str),
|
||||
Error(CoreError),
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait Emitter: Send + Sync {
|
||||
async fn emit(&self, event: Event<'_>) -> Result<(), EmitError>;
|
||||
}
|
||||
```
|
||||
|
||||
### Three implementations
|
||||
|
||||
- **`TerminalEmitter`** — wraps the existing `SseHandler` → `markdown_stream` / `raw_stream` logic. Renders to terminal with crossterm. Used by both CLI and REPL.
|
||||
- **`JsonEmitter`** — collects all events, returns a JSON response body at the end. Used by non-streaming API requests.
|
||||
- **`SseEmitter`** — converts each `Event` to an SSE frame, pushes into a `tokio::sync::mpsc` channel that axum streams to the client. Used by streaming API requests.
|
||||
|
||||
---
|
||||
|
||||
## 4. Session Isolation for API
|
||||
|
||||
### Session IDs
|
||||
|
||||
UUID-based for API consumers. CLI/REPL keep human-readable names as aliases.
|
||||
|
||||
```rust
|
||||
#[async_trait]
|
||||
pub trait SessionStore: Send + Sync {
|
||||
async fn create(&self, alias: Option<&str>) -> Result<SessionHandle>;
|
||||
async fn open(&self, id: SessionId) -> Result<SessionHandle>;
|
||||
async fn open_by_name(&self, name: &str) -> Result<SessionHandle>; // CLI/REPL compat
|
||||
}
|
||||
```
|
||||
|
||||
### File layout
|
||||
|
||||
```
|
||||
~/.config/loki/sessions/
|
||||
by-id/<uuid>/state.yaml # canonical storage
|
||||
by-name/<name> -> <uuid> # symlink or mapping file for CLI/REPL
|
||||
```
|
||||
|
||||
### Concurrency
|
||||
|
||||
Each `SessionHandle` holds a `tokio::sync::Mutex` so two concurrent API requests to the same session serialize properly. For v1 this is sufficient — no need for a database.
|
||||
|
||||
---
|
||||
|
||||
## 5. Tool Scope Isolation (formerly "Agent Isolation")
|
||||
|
||||
**Correction:** An earlier version of this document singled out agents as the owner of "live tool and MCP runtime." That was wrong. Loki allows MCP servers and tools to be configured at **every** `RoleLike` level — global, role, session, and agent — with resolution priority `Agent > Session > Role > Global`. Agents aren't uniquely coupled to MCP lifecycle; they're just the most visibly coupled scope in today's code.
|
||||
|
||||
The correct abstraction is **`ToolScope`**: every active `RoleLike` owns one. A `ToolScope` is a self-contained unit holding the resolved function declarations, live MCP runtime handles, and the tool-call tracker for whichever scope is currently on top of the stack.
|
||||
|
||||
### Today's behavior (to match in v1)
|
||||
|
||||
`McpRegistry::reinit()` is already **diff-based**: given a new enabled-server list, it stops only the servers that are no longer needed, leaves still-needed ones alive, and starts only the missing ones. This is correct single-tenant behavior but the registry is a process-wide singleton, so two concurrent consumers with different MCP sets trample each other.
|
||||
|
||||
### Target design
|
||||
|
||||
```rust
|
||||
pub struct ToolScope {
|
||||
pub functions: Functions, // resolved declarations for this scope
|
||||
pub mcp_runtime: McpRuntime, // live handles to MCP processes
|
||||
pub tool_tracker: ToolCallTracker, // per-scope call tracking
|
||||
}
|
||||
|
||||
pub struct McpRuntime {
|
||||
servers: HashMap<String, Arc<McpServerHandle>>, // live, ref-counted
|
||||
}
|
||||
|
||||
pub struct McpFactory {
|
||||
shared_servers: Mutex<HashMap<McpServerKey, Weak<McpServerHandle>>>,
|
||||
}
|
||||
|
||||
impl McpFactory {
|
||||
/// Produce a runtime with handles for the requested enabled servers.
|
||||
/// Shared across ToolScopes via Arc when configs match; isolated when they differ.
|
||||
pub async fn build_runtime(&self, enabled: &[String]) -> Result<McpRuntime>;
|
||||
}
|
||||
```
|
||||
|
||||
**`McpFactory` lives on `AppState`.** It does NOT hold any live servers itself — it holds weak refs so that when the last `ToolScope` using a given server drops its `Arc`, the process is torn down.
|
||||
|
||||
**`ToolScope` lives on `RequestContext`.** It replaces the current `functions`, `tool_call_tracker`, and (implicit) global `mcp_registry` fields. Every active scope — whether that's "just the REPL with its global MCP set" or "an agent with its own MCP set" — owns exactly one `ToolScope`.
|
||||
|
||||
### Scope transitions
|
||||
|
||||
When a `RoleLike` activates or exits:
|
||||
|
||||
1. Resolve the effective enabled-tool and enabled-MCP-server lists using priority `Agent > Session > Role > Global`.
|
||||
2. Ask `McpFactory::build_runtime(enabled)` for an `McpRuntime`. The factory reuses existing `Arc<McpServerHandle>`s where keys match; spawns new processes where they don't.
|
||||
3. Construct a new `ToolScope` with the runtime + resolved `Functions`.
|
||||
4. Assign it to `ctx.tool_scope`. The old `ToolScope` drops; any `Arc<McpServerHandle>`s with no other references shut down their processes.
|
||||
|
||||
This preserves today's diff-based behavior for single-tenant (REPL) and makes it correct for multi-tenant (API).
|
||||
|
||||
### Sharing vs isolation (the key property)
|
||||
|
||||
`McpServerKey` encodes server name + command + args + env vars. Two `ToolScope`s requesting the **same key** share the same `Arc<McpServerHandle>`. Two requesting **different keys** (e.g., different per-user API keys baked into the env) get separate processes. This gives us:
|
||||
|
||||
- **Isolation by default** — different configs = different processes, no cross-tenant leakage
|
||||
- **Sharing by coincidence** — identical configs = one process, ref-counted
|
||||
- **Clean cleanup** — processes die automatically when the last scope releases them
|
||||
|
||||
### Agent-specific state
|
||||
|
||||
Agents still own some state that's genuinely agent-only (not in `ToolScope`): the supervisor, inbox, escalation queue, optional todo list, sub-agent handles, and the parent/child tree. That state lives in an `AgentRuntime`:
|
||||
|
||||
```rust
|
||||
pub struct AgentRuntime {
|
||||
pub spec: AgentSpec,
|
||||
pub rag: Option<Arc<Rag>>, // shared across sibling sub-agents
|
||||
pub supervisor: Supervisor,
|
||||
pub inbox: Arc<Inbox>,
|
||||
pub escalation_queue: Arc<EscalationQueue>, // root-shared for user interaction
|
||||
pub todo_list: Option<TodoList>, // present only when auto_continue: true
|
||||
pub self_agent_id: String,
|
||||
pub parent_supervisor: Option<Arc<Supervisor>>,
|
||||
pub current_depth: usize,
|
||||
pub auto_continue_count: usize,
|
||||
}
|
||||
```
|
||||
|
||||
Three things to notice in this shape:
|
||||
|
||||
1. **`todo_list: Option<TodoList>`** — today's code eagerly allocates a `TodoList::default()` for every agent, but the todo tools and auto-continuation prompts are only exposed when `auto_continue: true`. Switching to `Option` lets us skip the allocation entirely for agents that don't opt in, and makes the "is this agent using todos?" question a type-level check rather than a config lookup. The semantics users see are unchanged.
|
||||
|
||||
2. **`rag: Option<Arc<Rag>>`** — agent RAG is an `Arc`, not an owned `Rag`. Today, every sub-agent of the same type independently calls `Rag::load()` and deserializes its own copy of the embeddings from disk. That means a parent spawning 4 parallel siblings of the same agent type pays the deserialize cost 5 times and holds 5 copies of identical vectors in memory. Sharing via `Arc` fixes both.
|
||||
|
||||
3. **No `mcp_runtime`** — MCP lives on `ToolScope`, not here. Agents get their tools through `ctx.tool_scope` like everyone else.
|
||||
|
||||
An `AgentRuntime` goes into `ctx.agent_runtime` **in addition to** the `ToolScope` — they're orthogonal concerns. An agent has both a `ToolScope` (its resolved tools + MCP) and an `AgentRuntime` (its supervision/messaging/RAG/todo state).
|
||||
|
||||
### RAG Cache (unified for standalone + agent RAG)
|
||||
|
||||
RAG in Loki comes from exactly two places today:
|
||||
|
||||
1. **Standalone RAG**, attached via the `.rag <name>` REPL command or the equivalent API call. Persists across role/session switches. Lives in `ctx.rag: Option<Arc<Rag>>`.
|
||||
2. **Agent RAG**, loaded from the `documents:` field of an agent's `config.yaml` when the agent is activated. Lives in `ctx.agent_runtime.rag: Option<Arc<Rag>>` for the agent's lifetime.
|
||||
|
||||
Roles and Sessions do **not** own RAG — the `Role` and `Session` structs have no RAG fields. This is true today and the refactor preserves it.
|
||||
|
||||
Since both standalone and agent RAGs are ultimately `Arc<Rag>` instances loaded from disk YAML files, a single cache can serve both. `AppState` holds one:
|
||||
|
||||
```rust
|
||||
pub struct AppState {
|
||||
pub config: Arc<AppConfig>,
|
||||
pub vault: GlobalVault,
|
||||
pub mcp_factory: Arc<McpFactory>,
|
||||
pub rag_cache: Arc<RagCache>,
|
||||
}
|
||||
|
||||
pub struct RagCache {
|
||||
entries: RwLock<HashMap<RagKey, Weak<Rag>>>,
|
||||
}
|
||||
|
||||
#[derive(Hash, Eq, PartialEq, Clone, Debug)]
|
||||
pub enum RagKey {
|
||||
Named(String), // standalone RAG: rags/<name>.yaml
|
||||
Agent(String), // agent-owned RAG: agents/<name>/rag.yaml
|
||||
}
|
||||
|
||||
impl RagCache {
|
||||
/// Returns a shared Arc<Rag> for the given key. If another scope
|
||||
/// holds a live reference, returns that exact Arc. Otherwise loads
|
||||
/// from disk, stores a Weak for future sharing, returns a fresh Arc.
|
||||
/// Concurrent first-load is serialized via per-key locks.
|
||||
pub async fn load(&self, key: &RagKey) -> Result<Option<Arc<Rag>>>;
|
||||
|
||||
/// Invalidates the cache entry. Called by rebuild_rag / edit_rag_docs
|
||||
/// so the next load reads from disk. Does NOT affect existing Arc
|
||||
/// holders — they keep their old Rag until they drop it.
|
||||
pub fn invalidate(&self, key: &RagKey);
|
||||
}
|
||||
```
|
||||
|
||||
Why the enum: agent RAGs and standalone RAGs live at different paths on disk and could theoretically have overlapping names (an agent called "docs" and a standalone rag called "docs"). Keeping them in distinct namespaces avoids collisions and keeps the cache lookups unambiguous.
|
||||
|
||||
Why `Weak`: we don't want the cache to pin RAGs in memory forever. If no scope holds an `Arc<Rag>` for key X, the `Weak` becomes dangling, and the next `load()` reads fresh. "Share while in use, drop when nobody needs it" without a manual reaper.
|
||||
|
||||
**Concurrency wrinkle:** if two consumers request the same key at exactly the same time and neither finds a live entry, both will race to load from disk. Fix with per-key `tokio::sync::Mutex` or `once_cell::sync::OnceCell<Arc<Rag>>` — the second caller blocks briefly and receives the shared Arc.
|
||||
|
||||
**Invalidation:** both `rebuild_rag` and `edit_rag_docs` call `invalidate()` with the key corresponding to whichever RAG was being operated on (standalone or agent-owned). Existing `Arc<Rag>` holders keep their old reference until they drop it — which is the correct behavior, since you don't want a running request to suddenly see a partially-rebuilt index mid-execution.
|
||||
|
||||
### Where RAG attaches in `RequestContext`
|
||||
|
||||
Two distinct slots, two distinct purposes, one shared cache:
|
||||
|
||||
```rust
|
||||
pub struct RequestContext {
|
||||
// ... other fields ...
|
||||
pub rag: Option<Arc<Rag>>, // standalone RAG from `.rag <name>` or API equivalent
|
||||
pub agent_runtime: Option<AgentRuntime>, // contains its own `rag: Option<Arc<Rag>>` when agent owns one
|
||||
}
|
||||
```
|
||||
|
||||
When resolving "what RAG should this request use", the engine checks `ctx.agent_runtime.rag` first (agent-owned takes precedence during an agent turn), then falls back to `ctx.rag` (the user's standalone selection). If neither is set, no RAG context is injected into the prompt.
|
||||
|
||||
**Behavior preservation:** today's code uses a single `Config.rag` slot that's overwritten by whichever action touched it most recently — `use_rag` and `use_agent` both clobber it. Exiting an agent leaves the overwrite in place; the user has to re-run `.rag <name>` to restore their standalone RAG. The new two-slot design gives us the opportunity to fix that (save `ctx.rag` into the `AgentRuntime` on activation, restore on exit) but **Phase 1 preserves today's clobber-and-forget behavior** to keep the refactor mechanical. The improvement is flagged as a Phase 2+ enhancement.
|
||||
|
||||
### Sub-agent spawning
|
||||
|
||||
Each child agent gets its **own** `RequestContext` forked from the parent's `Arc<AppState>`. That means each child gets:
|
||||
|
||||
- Its own `ToolScope` built from its agent.yaml's `mcp_servers` + `global_tools`, produced by `McpFactory`
|
||||
- Its own `AgentRuntime` with a fresh supervisor, a fresh inbox, depth = parent.depth + 1
|
||||
- A `parent_supervisor` reference pointing back at the parent's supervisor for escalation/messaging
|
||||
- A shared `root_escalation_queue` cloned by `Arc` from the parent's runtime (one queue, one human at the root)
|
||||
- A shared `rag: Option<Arc<Rag>>` via `AppState.rag_cache.load(RagKey::Agent(child_agent_name))` — if the parent already holds a strong ref, the cache returns the same Arc and no disk I/O happens
|
||||
|
||||
Because each child has its own `ToolScope`, **concurrent sub-agents can run with different MCP server sets simultaneously** — something today's singleton registry cannot do. The `McpFactory` pool handles overlap: if child A and child B both need `github` with matching keys, they share one `github` process via `Arc`.
|
||||
|
||||
Because sibling sub-agents of the same type share one `Arc<Rag>` through the unified cache, **RAG embeddings are loaded at most once per (standalone or agent) name per process**, regardless of how many siblings or concurrent API sessions reference the same name. The first holder keeps the embeddings warm for everyone else's lifetime, and they drop together once nobody holds a reference.
|
||||
|
||||
### MCP Lifecycle Policy (pooling and idle timeout)
|
||||
|
||||
`McpFactory` needs an eviction policy so long-running server processes don't accumulate idle MCP subprocesses indefinitely. The design is a two-layer scheme:
|
||||
|
||||
```rust
|
||||
pub struct McpFactory {
|
||||
active: Mutex<HashMap<McpServerKey, Weak<McpServerHandle>>>,
|
||||
idle: Mutex<HashMap<McpServerKey, IdleEntry>>,
|
||||
config: McpFactoryConfig,
|
||||
}
|
||||
|
||||
struct IdleEntry {
|
||||
handle: Arc<McpServerHandle>,
|
||||
idle_since: Instant,
|
||||
}
|
||||
|
||||
pub struct McpFactoryConfig {
|
||||
pub idle_timeout: Duration, // how long idle servers stay warm
|
||||
pub cleanup_interval: Duration, // how often the reaper runs
|
||||
pub max_idle_servers: Option<usize>, // LRU cap (None = unbounded)
|
||||
}
|
||||
```
|
||||
|
||||
**Layer 1 — active references via Arc.** Scopes currently using a server hold `Arc<McpServerHandle>`. Standard Rust refcounting. Any live reference keeps the process running, regardless of timers.
|
||||
|
||||
**Layer 2 — idle grace period via LRU eviction.** When the last active scope drops its Arc, a custom `Drop` impl on the handle moves it into the idle pool with a timestamp instead of tearing it down immediately. A background reaper task wakes on `cleanup_interval` and evicts entries whose idle time exceeds `idle_timeout`, calling `cancel().await` on the actual MCP subprocess.
|
||||
|
||||
Acquisition order on every scope transition:
|
||||
|
||||
```rust
|
||||
impl McpFactory {
|
||||
pub async fn acquire(&self, key: &McpServerKey) -> Result<Arc<McpServerHandle>> {
|
||||
// 1. Someone else is actively using it — share.
|
||||
if let Some(arc) = self.try_reuse_active(key) { return Ok(arc); }
|
||||
// 2. Sitting in the idle pool — revive it, zero startup cost.
|
||||
if let Some(arc) = self.revive_from_idle(key) { return Ok(arc); }
|
||||
// 3. Neither — spawn fresh.
|
||||
self.spawn_new(key).await
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Sensible defaults by deployment mode:**
|
||||
|
||||
| Mode | `idle_timeout` default | Rationale |
|
||||
|---|---|---|
|
||||
| CLI one-shot | N/A (process exits, everything dies) | No pooling needed |
|
||||
| REPL | `0` (immediate drop) | Matches today's reactive reinit behavior |
|
||||
| API server | `5 minutes` | Absorbs burst traffic, caps stale resources |
|
||||
|
||||
These are defaults, not mandates. Users should be able to override globally and per-server:
|
||||
|
||||
```yaml
|
||||
# config.yaml
|
||||
mcp_pool:
|
||||
idle_timeout_seconds: 300
|
||||
cleanup_interval_seconds: 30
|
||||
max_idle_servers: 50
|
||||
```
|
||||
|
||||
```json
|
||||
// functions/mcp.json
|
||||
{
|
||||
"github": { "command": "...", "idle_timeout_seconds": 900 },
|
||||
"filesystem": { "command": "...", "idle_timeout_seconds": 60 }
|
||||
}
|
||||
```
|
||||
|
||||
**Optional health checks.** While a handle sits in the idle pool, the reaper can optionally ping it via `tools/list`. If a server has crashed or become unresponsive, it's evicted immediately. Without this, a stale idle entry would make the first real request after revival fail. Worth implementing, but not strictly required for v1.
|
||||
|
||||
**Graceful shutdown.** On server shutdown, drain active scopes (let in-flight LLM calls complete or cancel via token), then tear down the idle pool. Give it a bounded drain timeout before force-killing. Especially important for MCP servers holding external transactions or locks.
|
||||
|
||||
**Per-tenant isolation.** `McpServerKey` includes env vars in its hash, so two tenants with different `GITHUB_TOKEN`s get distinct keys and therefore distinct processes. Zero cross-tenant leakage by construction.
|
||||
|
||||
### Phasing
|
||||
|
||||
Phase 1 ships `McpFactory` without the pool — just `acquire()` that always spawns fresh, `Drop` that always tears down. This is correct but inefficient. Phase 5 adds the idle pool, reaper task, health checks, and configuration knobs. Splitting it this way keeps Phase 1 focused on the state split (its actual goal) and Phase 5 focused on the pooling optimization (where it has a clear performance target: warm-path MCP tool calls should have near-zero overhead).
|
||||
|
||||
### Lifecycle summary
|
||||
|
||||
| Frontend | ToolScope lifetime | AgentRuntime lifetime | RAG lifetime |
|
||||
|---|---|---|---|
|
||||
| **CLI one-shot** | One invocation | One invocation (if `--agent`) | One invocation |
|
||||
| **REPL** | Long-lived, rebuilt on `.role` / `.session` / `.agent` / `.set enabled_mcp_servers` | Lives from `.agent X` until `.exit agent` | Standalone RAG set via `.rag <name>` persists across role/session switches; agent RAG lives as long as the `AgentRuntime`; both come from the shared `RagCache` |
|
||||
| **API session** | Lives while session is "warm"; rebuilt when client changes role/session/agent | Lives while session is "warm" | Same as REPL; `RagCache` shares `Arc<Rag>`s across concurrent sessions using the same RAG name |
|
||||
| **Sub-agent (any frontend)** | Lives for the sub-agent task | Lives for the sub-agent task | Shared via `Arc` with parent and siblings through `RagCache` |
|
||||
|
||||
---
|
||||
|
||||
## 6. Cross-Cutting Concerns
|
||||
|
||||
| Concern | Pattern | CLI | REPL | API |
|
||||
|---|---|---|---|---|
|
||||
| **Errors** | Core returns `CoreError` enum; frontends map | `render_error()` to stderr | `render_error()` to terminal | `{ "error": { "code": "...", "message": "..." } }` JSON |
|
||||
| **Cancellation** | `CancellationToken` in `RequestContext` | Ctrl-C handler triggers token | Ctrl-C triggers token | Client disconnect / request timeout triggers token |
|
||||
| **Auth** | Middleware sets `AuthContext` on `RequestContext` | None (local user) | None (local user) | Bearer token / API key validated by axum middleware |
|
||||
| **Tracing** | `tracing::Span` per request with request_id, session_id, mode | Log to file | Log to file | Log to file + structured JSON logs |
|
||||
|
||||
### Error type
|
||||
|
||||
```rust
|
||||
pub enum CoreError {
|
||||
InvalidRequest { msg: String },
|
||||
NotFound { msg: String },
|
||||
Unauthorized { msg: String },
|
||||
Forbidden { msg: String },
|
||||
Timeout { msg: String },
|
||||
Cancelled,
|
||||
Provider { msg: String },
|
||||
Tool { msg: String },
|
||||
Io { msg: String },
|
||||
}
|
||||
```
|
||||
|
||||
### Cancellation
|
||||
|
||||
Use a `CancellationToken` in `RequestContext`. The core checks it via `tokio::select!` around long awaits (LLM stream, tool execution, MCP IO).
|
||||
|
||||
- CLI/REPL: Ctrl-C handler triggers token.
|
||||
- API: axum provides disconnect detection for SSE/streaming; when the client drops, cancel the token.
|
||||
- Timeouts: set deadline and translate to token cancellation.
|
||||
|
||||
### Auth (API-only initially)
|
||||
|
||||
axum middleware authenticates (API key / bearer token), builds `AuthContext`, stores in request extensions, then the handler copies it into `RequestContext`. Core enforces policy only when executing sensitive operations (tools, filesystem, vault).
|
||||
|
||||
```rust
|
||||
pub struct AuthContext {
|
||||
pub subject: String,
|
||||
pub scopes: Vec<String>,
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. API Endpoint Design
|
||||
|
||||
```
|
||||
POST /v1/completions # one-shot prompt (no session)
|
||||
POST /v1/sessions # create session
|
||||
POST /v1/sessions/:id/completions # prompt within session
|
||||
DELETE /v1/sessions/:id # close session
|
||||
POST /v1/sessions/:id/agent # activate agent on session
|
||||
DELETE /v1/sessions/:id/agent # deactivate agent
|
||||
POST /v1/sessions/:id/role # set role on session
|
||||
POST /v1/sessions/:id/rag # attach RAG to session
|
||||
GET /v1/models # list available models
|
||||
GET /v1/agents # list available agents
|
||||
GET /v1/roles # list available roles
|
||||
```
|
||||
|
||||
### Request body for completions
|
||||
|
||||
```json
|
||||
{
|
||||
"prompt": "Explain TCP handshake",
|
||||
"model": "openai:gpt-4o",
|
||||
"stream": true,
|
||||
"files": ["path/to/doc.pdf"],
|
||||
"role": "explain"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Implementation Phases
|
||||
|
||||
| Phase | Scope | Effort | Risk |
|
||||
|---|---|---|---|
|
||||
| **Phase 1: Extract AppState** | Split Config into AppState (global) + per-request state. Keep CLI/REPL working exactly as before. No API yet. | ~1-2 weeks | Medium — touching every file that uses GlobalConfig |
|
||||
| **Phase 2: Introduce Engine + Emitter** | Unify `start_directive()` and `ask()` behind `Engine::run()`. Create `TerminalEmitter`. CLI/REPL now call Engine. | ~1 week | Low — refactoring existing paths |
|
||||
| **Phase 3: SessionStore abstraction** | Extract session persistence behind trait. Add UUID-based sessions. CLI/REPL still use name-based aliases. | ~3-5 days | Low |
|
||||
| **Phase 4: REST API server** | Add `--serve` flag. axum handlers that create `RequestContext`, call `Engine::run()`, return JSON/SSE. Basic auth middleware. | ~1-2 weeks | Low — clean layer on top of Engine |
|
||||
| **Phase 5: Agent isolation** | Move agent runtime into `RequestContext`. `AgentFactory` creates isolated runtimes per session. | ~1 week | Medium — MCP server lifecycle mgmt |
|
||||
| **Phase 6: Production hardening** | Rate limiting, proper auth, request validation, health checks, graceful shutdown, deployment configs. | ~1 week | Low |
|
||||
|
||||
**Total estimate: ~5-7 weeks** for a production-ready v1.
|
||||
|
||||
### Key Risk: Phase 1
|
||||
|
||||
Phase 1 is the hardest and riskiest — it touches nearly every module. The mitigation is to do it incrementally: first add `AppState` alongside existing `Config`, then migrate callers module by module, then remove the old `GlobalConfig` type alias. Tests should pass at every intermediate step.
|
||||
|
||||
---
|
||||
|
||||
## Key Design Decisions & Trade-offs
|
||||
|
||||
1. **Eliminates the singleton mutation bottleneck**: concurrency becomes "multiple `RequestContext`s" rather than fighting over `RwLock<Config>`.
|
||||
2. **Preserves current behavior**: REPL can keep "state-changing commands" by mutating its own long-lived `RequestContext` + persisted `SessionState`.
|
||||
3. **Streaming becomes portable**: terminal rendering, JSON, and SSE are just different `Emitter`s over the same event stream.
|
||||
4. **Agent/MCP isolation is explicit**: prevents cross-session conflicts by construction.
|
||||
|
||||
## Watch Out For
|
||||
|
||||
1. **Persisted vs in-memory drift**: decide which fields live in `SessionState` vs `ConversationState`; persist only what must survive process restarts.
|
||||
2. **Per-session concurrency semantics**: either serialize requests per session (simplest) or carefully merge message histories; v1 should serialize.
|
||||
3. **MCP process lifecycle**: if you keep MCP servers alive across requests, tie them to a session runtime and clean them up on session close/TTL.
|
||||
|
||||
## Future Considerations
|
||||
|
||||
1. Swap file store behind `SessionStore` with sqlite without changing core.
|
||||
2. Add a stable public API schema for events so clients can render rich tool-call UIs.
|
||||
3. Actor model (one tokio task per session receiving commands via mpsc) for simplified session+agent lifetime management.
|
||||
@@ -1,266 +0,0 @@
|
||||
# Roles
|
||||
When customizing the behavior or LLMs, we use roles to "constrain" the responses or behavior of the LLM to whatever
|
||||
purpose we desire.
|
||||
|
||||
Think of them kind of like a baby: That baby can grow up to do anything! Be a resume builder, teacher, engineer, etc.
|
||||
|
||||
The only difference is that with roles, we're explicitly telling the LLM what we want it to be. Also: the LLM is already
|
||||
grown up so we don't have to wait!
|
||||
|
||||

|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Role Definition](#role-definition)
|
||||
- [Metadata Header](#metadata-header)
|
||||
- [Instructions](#instructions)
|
||||
- [Special Case: Metadata Header Only](#special-case-metadata-header-only)
|
||||
- [Prompt Types](#prompt-types)
|
||||
- [Embedded Prompts](#embedded-prompts)
|
||||
- [System Prompts](#system-prompts)
|
||||
- [Few-Shot Prompt](#few-shot-prompt)
|
||||
- [Built-In Roles](#built-in-roles)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Role Definition
|
||||
Roles in Loki are Markdown files that live in the `roles` directory of your Loki configuration. Loki configuration
|
||||
locations vary between systems, so you can use the following command to find the location of your roles configuration
|
||||
directory:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'roles_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
All role configuration files have two parts: The metadata header, and the instructions.
|
||||
|
||||
**Example:** An expert resume builder role that specializes in helping users build and refine their resumes.
|
||||
```markdown
|
||||
---
|
||||
# This is the metadata header
|
||||
name: resume-builder
|
||||
model: openai:gpt-4o
|
||||
temperature: 0.2
|
||||
top_p: 0
|
||||
enabled_tools: fs_ls,fs_cat
|
||||
enabled_mcp_servers: github
|
||||
---
|
||||
<!-- This is the instructions -->
|
||||
You are an expert resume builder.
|
||||
```
|
||||
|
||||
To see a full example configuration for a role, refer to the [example role configuration](../config.role.example.md)
|
||||
file in the root of the repo.
|
||||
|
||||
### Metadata Header
|
||||
The metadata header in all role configuration files is completely optional. It lets you define role-specific settings
|
||||
for each role that make the model work the way you want for your role. This includes things like forcing your role to
|
||||
always use a specific model, set of tools, and tailoring the hyperparameters of the model for your role.
|
||||
|
||||
The header consists of a YAML-formatted list of settings that let you customize the model behavior for your role. These
|
||||
settings sit between `---` separators in your role configuration so Loki knows they're not part of the instructions you
|
||||
want to feed the model.
|
||||
|
||||
The following table lists the available configuration settings and their default values (if undefined):
|
||||
|
||||
| Setting | Default | Description |
|
||||
|-----------------------|----------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
|
||||
| `name` | The name of the role markdown file | The name of the role |
|
||||
| `model` | Default configured model or currently in-use model (REPL mode) | The preferred model to use with this role |
|
||||
| `temperature` | Default `temperature` for the preferred model | Controls the creativity and randomness of the model's responses |
|
||||
| `top_p` | Default `top_p` for the preferred model | Alternative way to control the model's output diversity, affecting the <br>probability distribution of tokens |
|
||||
| `enabled_tools` | Global setting for `enabled_tools` | The tools that this role utilizes |
|
||||
| `enabled_mcp_servers` | Global setting for `enabled_mcp_servers` | The MCP servers that this role utilizes |
|
||||
| `prompt` | `null` | See [Prompt Types](#prompt-types) for detailed usage |
|
||||
|
||||
### Instructions
|
||||
The instructions for a role is what you use to tell the model how you want it to behave. This typically consists of one
|
||||
or two sentences, but can be more. To see some examples, look at the [built-in roles](../assets/roles) to see how they are defined.
|
||||
|
||||
**Pro-Tip:** The struggle to create good instructions for a role (or any other kind of instructions for your model) is
|
||||
so common, that Loki comes with a role to help you write instructions for roles! Simply invoke the role to start
|
||||
creating a role with the `create-prompt` role:
|
||||
|
||||
```shell
|
||||
loki -r create-prompt
|
||||
```
|
||||
|
||||
### Special Case: Metadata Header Only
|
||||
When instructions are defined, the metadata header is optional. However sometimes we want a way to enable specific
|
||||
functions or MCP servers when prompting different models. In this situation, you need only specify the metadata header
|
||||
to just enable these settings as you like.
|
||||
|
||||
**Example: Role that enables all filesystem tools**
|
||||
`roles/filesystem-functions.md`
|
||||
```markdown
|
||||
---
|
||||
enabled_tools: fs_ls,fs_cat,fs_mkdir,fs_patch,fs_write
|
||||
---
|
||||
```
|
||||
|
||||
**Example: Role that enables the GitHub MCP server with the ollama:deepseek-r1 model**
|
||||
`roles/github.md`
|
||||
```markdown
|
||||
---
|
||||
model: ollama:deepseek-r1
|
||||
enabled_mcp_servers: github
|
||||
---
|
||||
```
|
||||
|
||||
For more examples of this special use case of roles, you can look at the role configuration files for the following
|
||||
built-in roles:
|
||||
|
||||
* [explain-shell](../assets/roles/explain-shell.md) - Explains cryptic shell commands in natural language
|
||||
* [functions](../assets/roles/functions.md) - Enables all available functions (i.e. all globally `visible_functions`)
|
||||
* [mcp-servers](../assets/roles/mcp-servers.md) - Enables all available MCP servers
|
||||
|
||||
## Special Variables
|
||||
Loki has a set of built-in special variables that it will inject into your role's instructions if it finds them in the
|
||||
`{{variable_name}}` syntax. The available special variables are listed below:
|
||||
|
||||
| Name | Description | Example |
|
||||
|-----------------|-----------------------------------------------------------|----------------------------|
|
||||
| `__os__` | Operating system name | `linux` |
|
||||
| `__os_family__` | Operating system family | `unix` |
|
||||
| `__arch__` | System architecture | `x86_64` |
|
||||
| `__shell__` | The current user's default shell | `bash` |
|
||||
| `__locale__` | The current user's preferred language and region settings | `en-US` |
|
||||
| `__now__` | Current timestamp in ISO 8601 format | `2025-11-07T10:15:44.268Z` |
|
||||
| `__cwd__` | The current working directory | `/tmp` |
|
||||
|
||||
## Prompt Types
|
||||
In Loki, you can also create roles with pre-configured prompts so you can template prompts for your use cases. This is
|
||||
the purpose of the `prompt` field in the role's metadata header.
|
||||
|
||||
There's three types of prompts you can create:
|
||||
|
||||
### Embedded Prompts
|
||||
Embedded prompts let you create templated prompts for any input given to it. They are ideal for concise, input-driven
|
||||
replies from the model. The input that users pass to Loki are injected into your prompt via a `__INPUT__` placeholder in
|
||||
your prompt.
|
||||
|
||||
**Example: Role to convert the given input to TOML**
|
||||
`roles/convert-to-toml.md`
|
||||
```markdown
|
||||
---
|
||||
prompt: convert __INPUT__ to TOML
|
||||
---
|
||||
Convert the given input to TOML format. Exclude any markdown formatting or code blocks and only output code.
|
||||
```
|
||||
Usage:
|
||||
```shell
|
||||
$ loki -r json-to-toml '{"test":"hi me"}'
|
||||
test = "hi me"
|
||||
```
|
||||
|
||||
Without the instructions (i.e. the prompt after the metadata header), this role would simply generate the following
|
||||
message for the model:
|
||||
|
||||
```json
|
||||
[
|
||||
{"role": "user", "content": "convert {\"test\":\"hi me\"} to TOML"}
|
||||
]
|
||||
```
|
||||
|
||||
### System Prompts
|
||||
System prompts let you set the general context of the LLMs behavior. This is no different than other system prompts you
|
||||
define in ChatGPT, Claude, Open WebUI, etc.
|
||||
|
||||
They are essentially Embedded Prompts without an `__INPUT__` placeholder.
|
||||
|
||||
**Example: Role to convert all input words to emoji**
|
||||
`roles/emoji.md`
|
||||
```markdown
|
||||
---
|
||||
prompt: convert my words to emojis
|
||||
---
|
||||
Convert all given input words into emojis
|
||||
```
|
||||
Usage:
|
||||
```shell
|
||||
$ loki -r emoji music joy
|
||||
🎵 😊
|
||||
```
|
||||
|
||||
Without the instructions (i.e. the prompt after the metadata header), this role would simply generate the following
|
||||
messages for the model:
|
||||
|
||||
```json
|
||||
[
|
||||
{"role": "system", "content": "convert my words to emojis"},
|
||||
{"role": "user", "content": "music joy"}
|
||||
]
|
||||
```
|
||||
|
||||
### Few-Shot Prompt
|
||||
[Few-Shot prompting](https://www.promptingguide.ai/techniques/fewshot) is a technique to enable in-context learning for LLMs by providing examples in the prompt to steer
|
||||
the model to better performance. In Loki, this is done as an extension of System Prompts.
|
||||
|
||||
**Example: Role to output code only**
|
||||
`roles/code-generator.md`
|
||||
~~~markdown
|
||||
---
|
||||
prompt: |-
|
||||
Output code only without comments or explanations.
|
||||
### INPUT:
|
||||
async sleep in js
|
||||
### OUTPUT:
|
||||
```javascript
|
||||
async function timeout(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
```
|
||||
---
|
||||
Output code only in response to the user's request
|
||||
~~~
|
||||
Usage:
|
||||
~~~shell
|
||||
$ loki -r code-generator python add two numbers
|
||||
```python
|
||||
# Function to add two numbers
|
||||
def add_numbers(num1, num2):
|
||||
return num1 + num2
|
||||
|
||||
# Example usage
|
||||
number1 = 5
|
||||
number2 = 7
|
||||
|
||||
result = add_numbers(number1, number2)
|
||||
print(f"The sum of {number1} and {number2} is {result}.")
|
||||
```
|
||||
~~~
|
||||
|
||||
Without the instructions (i.e. the prompt after the metadata header), this role would simply generate the following
|
||||
messages for the model:
|
||||
|
||||
```json
|
||||
[
|
||||
{"role": "system", "content": "Output code only without comments or explanations."},
|
||||
{"role": "user", "content": "async sleep in js"},
|
||||
{"role": "assistant", "content": "```javascript\nasync function timeout(ms) {\n return new Promise(resolve => setTimeout(resolve, ms));\n}\n```"},
|
||||
{"role": "user", "content": "python add two numbers"}
|
||||
]
|
||||
```
|
||||
|
||||
## Built-In Roles
|
||||
Loki comes packaged with some useful built-in roles. These are also good examples if you're looking for more examples on
|
||||
how to make your own roles, so be sure to check out the [built-in role definitions](../assets/roles) if you're looking
|
||||
for more examples.
|
||||
|
||||
* `code`: Generates code (used by `loki -c`)
|
||||
* `create-prompt`: Creates a prompt based on the user's input
|
||||
* `create-title`: Creates 3-6 word titles based on the user's input
|
||||
* `explain-shell`: Explains shell commands
|
||||
* `functions`: Enable all globally-visible functions
|
||||
* `github`: Interact with GitHub using natural language
|
||||
* `mcp-servers`: Enables all MCP servers
|
||||
* `repo-analyzer`: Ask questions about the code repository in the current working directory
|
||||
* `shell`: Convert natural language into shell commands (used by `loki -e`)
|
||||
* `slack`: Interact with Slack using natural language
|
||||
|
||||
## Temporary Roles
|
||||
Loki also enables you to create temporary roles that will be discarded once you're finished with them. This is done via
|
||||
the `.prompt/--prompt` command:
|
||||
|
||||

|
||||
@@ -1,44 +0,0 @@
|
||||
# Sessions
|
||||
By default, Loki does not send back all previous messages in a conversation to the model. This means that each time you
|
||||
query a model, it's essentially a one-off. However, Loki does support chat-like conversations with LLMs via its
|
||||
`sessions` mechanism.
|
||||
|
||||
Sessions in Loki enable the familiar conversational interactions with LLMs. This means you can reference previous
|
||||
answers and ask follow-up questions and the model will know what you're referring to.
|
||||
|
||||
Sessions can be temporary, or can be saved so you can continue conversations at a later time.
|
||||
|
||||
Saved sessions are stored in the `sessions` subdirectory of the Loki configuration directory. The location of the
|
||||
`sessions` directory varies by system, so you can use the following command to find the `sessions` directory if you need
|
||||
it:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'sessions_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Usage
|
||||
When you use a session in Loki, you can either persist it or discard it once you're done. Sessions you discard are then
|
||||
just considered 'temporary' sessions.
|
||||
|
||||

|
||||
|
||||
Sessions you persist and then load again later will inherit the same configuration as was used during the last usage of
|
||||
that session. That is to say, if you had certain tools or MCP servers enabled when you were last in that session, they
|
||||
will be available again when you continue that session.
|
||||
|
||||
## Configuration
|
||||
Session behavior can be configured from the global Loki configuration file. The location of this file varies between
|
||||
systems so you can use the following command to locate it on your system:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'config_file' | awk '{print $2}'
|
||||
```
|
||||
|
||||
The following settings are available to customize the default behavior of sessions globally:
|
||||
|
||||
| Setting | Description |
|
||||
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `save_session` | Controls the persistence of the session. <br><ul><li>If `true`, then any time you're in a session, changes will auto-save unless explicitly defined otherwise.</li> <li>If `false`, then any time you're in a session, changes will not auto-save unless explicitly specified otherwise.</li><li>If `null`, Loki will always prompt the user for what to do.</li></ul> |
|
||||
| `compression_threshold` | Defines the token count threshold at which Loki will compress the session to save on the context length |
|
||||
| `summarization_prompt` | This is the prompt that is used to compress the session up to a given point when compression is triggered |
|
||||
| `summary_context_prompt` | This is the prompt that's used to add the summarized conversation generated by the `summarization_prompt` as context to the model |
|
||||
@@ -1,104 +0,0 @@
|
||||
# Loki Shell Integrations
|
||||
Loki supports the following integrations with a handful of shell environments to enhance user experience and streamline workflows.
|
||||
|
||||
## Tab Completions
|
||||
### Dynamic
|
||||
Dynamic tab completions are supported by Loki to assist users in quickly completing commands, options, and arguments.
|
||||
You can enable it by using the corresponding command for your shell. To enable dynamic tab completions for every
|
||||
shell session (i.e. persistently), add the corresponding command to your shell's configuration file as indicated:
|
||||
|
||||
```shell
|
||||
# Bash
|
||||
# (add to: `~/.bashrc`)
|
||||
source <(COMPLETE=bash loki)
|
||||
|
||||
# Zsh
|
||||
# (add to: `~/.zshrc`)
|
||||
source <(COMPLETE=zsh loki)
|
||||
|
||||
# Fish
|
||||
# (add to: `~/.config/fish/config.fish`)
|
||||
source <(COMPLETE=fish loki | psub)
|
||||
|
||||
# Elvish
|
||||
# (add to: `~/.elvish/rc.elv`)
|
||||
eval (E:COMPLETE=elvish loki | slurp)
|
||||
|
||||
# PowerShell
|
||||
# (add to: `$PROFILE`)
|
||||
$env:COMPLETE = "powershell"
|
||||
loki | Out-String | Invoke-Expression
|
||||
```
|
||||
|
||||
At the time of writing, `nushell` is not yet fully supported for dynamic tab completions due to limitations
|
||||
in the [`clap`](https://crates.io/crates/clap) crate. However, `nushell` support is being actively developed, and will
|
||||
be added in a future release.
|
||||
|
||||
Progress on this feature can be tracked in the following issue: [Clap Issue #5840](https://github.com/clap-rs/clap/issues/5840).
|
||||
|
||||
### Static
|
||||
Static tab completions (i.e. pre-generated completion scripts that are not context aware) can also be generated using the
|
||||
`--completions` flag. You can enable static tab completions by using the corresponding commands for your shell. These commands
|
||||
will enable them for every shell session (i.e. persistently):
|
||||
|
||||
```shell
|
||||
# Bash
|
||||
echo 'source <(loki --completions bash)' >> ~/.bashrc
|
||||
|
||||
# Zsh
|
||||
echo 'source <(loki --completions zsh)' >> ~/.zshrc
|
||||
|
||||
# Fish
|
||||
echo 'loki --completions fish | source' >> ~/.config/fish/config.fish
|
||||
|
||||
# Elvish
|
||||
echo 'eval (loki --completions elvish | slurp)' >> ~/.elvish/rc.elv
|
||||
|
||||
# Nushell
|
||||
[[ -d ~/.config/nushell/completions ]] || mkdir -p ~/.config/nushell/completions
|
||||
loki --completions nushell | save -f ~/.config/nushell/completions/loki.nu
|
||||
echo 'use ~/.config/nushell/completions/cli.nu *' >> ~/.config/nushell/config.nu
|
||||
|
||||
# PowerShell
|
||||
Add-content $PROFILE "loki --completions powershell | Out-String | Invoke-Expression"
|
||||
```
|
||||
|
||||
## Shell Assistant
|
||||
Loki has an `-e,--execute` flag that allows users to run natural language commands directly from the CLI. It accepts
|
||||
natural language input and translates it into executable shell commands.
|
||||
|
||||

|
||||
|
||||
## Intelligent Command Completions
|
||||
Loki also provides shell scripts that bind `Alt-e` to `loki -e "<current command line>"`, allowing users to generate
|
||||
commands from natural text directly without invoking the CLI.
|
||||
|
||||
For example:
|
||||
|
||||
```shell
|
||||
$ find all typescript files with more than 100 lines<Alt-e>
|
||||
# Gets replaced with
|
||||
$ find . -name '*.ts' -type f -exec awk 'NR>100{exit 1}' {} \; -print
|
||||
```
|
||||
|
||||
To use the CLI helper, add the content of the appropriate integration script for your shell to your shell configuration file:
|
||||
* [Bash Integration](../scripts/shell-integration/integration.bash) (add to: `~/.bashrc`)
|
||||
* [Zsh Integration](../scripts/shell-integration/integration.zsh) (add to: `~/.zshrc`)
|
||||
* [Elvish Integration](../scripts/shell-integration/integration.elv) (add to: `~/.elvish/rc.elv`)
|
||||
* [Fish Integration](../scripts/shell-integration/integration.fish) (add to: `~/.config/fish/config.fish`)
|
||||
* [Nushell Integration](../scripts/shell-integration/integration.nu) (add to: `~/.config/nushell/config.nu`)
|
||||
* [PowerShell Integration](../scripts/shell-integration/integration.ps1) (add to: `$PROFILE`)
|
||||
|
||||
## Explain Commands
|
||||
In addition to the Shell Assistant, Loki has a built-in role that explains shell commands to you to decipher their
|
||||
language. So if Loki generates a command that you're unsure of what it does, simply pass it to the `explain-shell` role:
|
||||
|
||||

|
||||
|
||||
## Code Generation
|
||||
Users can also directly generate code snippets from natural language prompts using the `-c,--code` flag.
|
||||
|
||||

|
||||
|
||||
**Pro Tip:** Pipe the output of the code generation directly into `tee` to ensure the generated code is properly extracted
|
||||
from any generated Markdown (i.e. remove any triple backticks).
|
||||
@@ -1,71 +0,0 @@
|
||||
# Theming Loki
|
||||
Loki supports customizing the theme via a `.tmTheme` file.
|
||||
|
||||
## Setup
|
||||
To install a custom theme, download the `.tmTheme` file to the Loki configuration directory and name it `dark.tmTheme`
|
||||
or `light.tmTheme`. The location of the Loki configuration directory varies between systems, so you can use the
|
||||
following command to locate it on your system:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'config_dir' | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Themes
|
||||
### 1337-Scheme
|
||||
https://raw.githubusercontent.com/MarkMichos/1337-Scheme/ca6a329cfda8307449d405b70f8fab34b8fd23b5/1337.tmTheme
|
||||

|
||||
|
||||
### Coldark
|
||||
https://raw.githubusercontent.com/ArmandPhilippot/coldark-bat/e44750b2a9629dd12d8ed3ad9fd50c77232170b9/Coldark-Dark.tmTheme
|
||||

|
||||
|
||||
### Dracula
|
||||
https://raw.githubusercontent.com/dracula/sublime/c2de0acf5af67042393cf70de68013153c043656/Dracula.tmTheme
|
||||

|
||||
|
||||
### GitHub
|
||||
https://raw.githubusercontent.com/AlexanderEkdahl/github-sublime-theme/508740b2430c3c3a9e785fc93ee1d7c6f233af53/GitHub.tmTheme
|
||||

|
||||
|
||||
### gruvbox
|
||||
#### Dark
|
||||
https://raw.githubusercontent.com/subnut/gruvbox-tmTheme/64c47250e54298b91e2cf8d401320009aba9f991/gruvbox-dark.tmTheme
|
||||

|
||||
|
||||
#### Light
|
||||
https://raw.githubusercontent.com/subnut/gruvbox-tmTheme/64c47250e54298b91e2cf8d401320009aba9f991/gruvbox-light.tmTheme
|
||||

|
||||
|
||||
### OneHalf
|
||||
#### Dark
|
||||
https://raw.githubusercontent.com/sonph/onehalf/141c775ace6b71992305f144a8ab68e9a8ca4a25/sublimetext/OneHalfDark.tmTheme
|
||||

|
||||
|
||||
#### Light
|
||||
https://raw.githubusercontent.com/sonph/onehalf/141c775ace6b71992305f144a8ab68e9a8ca4a25/sublimetext/OneHalfLight.tmTheme
|
||||

|
||||
|
||||
### Solarized
|
||||
#### Dark
|
||||
https://raw.githubusercontent.com/braver/Solarized/87e01090cggjf5fb821a234265b3138426ae84900e7/Solarized%20(dark).tmTheme
|
||||

|
||||
|
||||
#### Light
|
||||
https://raw.githubusercontent.com/braver/Solarized/87e01090cf5fb821a234265b3138426ae84900e7/Solarized%20(light).tmTheme
|
||||

|
||||
|
||||
### Sublime Snazzy
|
||||
https://raw.githubusercontent.com/greggb/sublime-snazzy/70343201f1d7539adbba3c79e2fe81c2559a0431/Sublime%20Snazzy.tmTheme
|
||||

|
||||
|
||||
### TwoDark
|
||||
https://raw.githubusercontent.com/erremauro/TwoDark/8e0f6fa5b59d196658a22288f519fd8320de4c87/TwoDark.tmTheme
|
||||

|
||||
|
||||
### Visual Studio Dark+
|
||||
https://raw.githubusercontent.com/vidann1/visual-studio-dark-plus/01ee1e8e0dc578f3b4e8c0dbb6aa0279b4a26a40/Visual%20Studio%20Dark%2B.tmTheme
|
||||

|
||||
|
||||
### Zenburn
|
||||
https://raw.githubusercontent.com/colinta/zenburn/86d4ee7a1f884851a1d21d66249687f527fced32/zenburn.tmTheme
|
||||

|
||||
@@ -1,250 +0,0 @@
|
||||
# Todo System
|
||||
|
||||
Loki's Todo System is a built-in task tracking feature designed to improve the reliability and effectiveness of LLM agents,
|
||||
especially smaller models. It provides structured task management that helps models:
|
||||
|
||||
- Break complex tasks into manageable steps
|
||||
- Track progress through multistep workflows
|
||||
- Automatically continue work until all tasks are complete
|
||||
- Avoid forgetting steps or losing context
|
||||
|
||||

|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Why Use the Todo System?](#why-use-the-todo-system)
|
||||
- [How It Works](#how-it-works)
|
||||
- [Configuration Options](#configuration-options)
|
||||
- [Available Tools](#available-tools)
|
||||
- [Auto-Continuation](#auto-continuation)
|
||||
- [Best Practices](#best-practices)
|
||||
- [Example Workflow](#example-workflow)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
<!--toc:end-->
|
||||
|
||||
## Why Use the Todo System?
|
||||
Smaller language models often struggle with:
|
||||
- **Context drift**: Forgetting earlier steps in a multi-step task
|
||||
- **Incomplete execution**: Stopping before all work is done
|
||||
- **Lack of structure**: Jumping between tasks without clear organization
|
||||
|
||||
The Loki Todo System addresses these issues by giving the model explicit tools to plan, track, and verify task completion.
|
||||
The system automatically prompts the model to continue when incomplete tasks remain, ensuring work gets finished.
|
||||
|
||||
## How It Works
|
||||
1. **Planning Phase**: The model initializes a todo list with a goal and adds individual tasks
|
||||
2. **Execution Phase**: The model works through tasks, marking each done immediately after completion
|
||||
3. **Continuation Phase**: If incomplete tasks remain, the system automatically prompts the model to continue
|
||||
4. **Completion**: When all tasks are marked done, the workflow ends naturally
|
||||
|
||||
The todo state is preserved across the conversation (and any compressions), and injected into continuation prompts,
|
||||
keeping the model focused on remaining work.
|
||||
|
||||
## Configuration Options
|
||||
The Todo System is configured per-agent in `<loki-config-dir>/agents/<agent-name>/config.yaml`:
|
||||
|
||||
| Setting | Type | Default | Description |
|
||||
|----------------------------|---------|-------------|---------------------------------------------------------------------------------|
|
||||
| `auto_continue` | boolean | `false` | Enable the To-Do system for automatic continuation when incomplete todos remain |
|
||||
| `max_auto_continues` | integer | `10` | Maximum number of automatic continuations before stopping |
|
||||
| `inject_todo_instructions` | boolean | `true` | Inject the default todo tool usage instructions into the agent's system prompt |
|
||||
| `continuation_prompt` | string | (see below) | Custom prompt used when auto-continuing |
|
||||
|
||||
### Example Configuration
|
||||
```yaml
|
||||
# agents/my-agent/config.yaml
|
||||
model: openai:gpt-4o
|
||||
auto_continue: true # Enable auto-continuation
|
||||
max_auto_continues: 15 # Allow up to 15 automatic continuations
|
||||
inject_todo_instructions: true # Include todo instructions in system prompt
|
||||
continuation_prompt: | # Optional: customize the continuation prompt
|
||||
[CONTINUE]
|
||||
You have unfinished tasks. Proceed with the next pending item.
|
||||
Do not explain; just execute.
|
||||
```
|
||||
|
||||
### Default Continuation Prompt
|
||||
If `continuation_prompt` is not specified, the following default is used:
|
||||
|
||||
```
|
||||
[SYSTEM REMINDER - TODO CONTINUATION]
|
||||
You have incomplete tasks in your todo list. Continue with the next pending item.
|
||||
Call tools immediately. Do not explain what you will do.
|
||||
```
|
||||
|
||||
## Available Tools
|
||||
When `inject_todo_instructions` is enabled (the default), agents have access to four built-in todo management tools:
|
||||
|
||||
### `todo__init`
|
||||
Initialize a new todo list with a goal. Clears any existing todos.
|
||||
|
||||
**Parameters:**
|
||||
- `goal` (string, required): The overall goal to achieve when all todos are completed
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{"goal": "Refactor the authentication module"}
|
||||
```
|
||||
|
||||
### `todo__add`
|
||||
Add a new todo item to the list.
|
||||
|
||||
**Parameters:**
|
||||
- `task` (string, required): Description of the todo task
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{"task": "Extract password validation into separate function"}
|
||||
```
|
||||
|
||||
**Returns:** The assigned task ID
|
||||
|
||||
### `todo__done`
|
||||
Mark a todo item as done by its ID.
|
||||
|
||||
**Parameters:**
|
||||
- `id` (integer, required): The ID of the todo item to mark as done
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{"id": 1}
|
||||
```
|
||||
|
||||
### `todo__list`
|
||||
Display the current todo list with status of each item.
|
||||
|
||||
**Parameters:** None
|
||||
|
||||
**Returns:** The full todo list with goal, progress, and item statuses
|
||||
|
||||
### `todo__clear`
|
||||
Clear the entire todo list and reset the goal. Use when the current task has been canceled or invalidated.
|
||||
|
||||
**Parameters:** None
|
||||
|
||||
**Returns:** Confirmation that the todo list was cleared
|
||||
|
||||
### REPL Command: `.clear todo`
|
||||
You can also clear the todo list manually from the REPL by typing `.clear todo`. This is useful when:
|
||||
- You gave a custom response that changes or cancels the current task
|
||||
- The agent is stuck in auto-continuation with stale todos
|
||||
- You want to start fresh without leaving and re-entering the agent
|
||||
|
||||
**Note:** This command is only available when an agent with `auto_continue: true` is active. If the todo
|
||||
system isn't enabled for the current agent, the command will display an error message.
|
||||
|
||||
## Auto-Continuation
|
||||
When `auto_continue` is enabled, Loki automatically sends a continuation prompt if:
|
||||
|
||||
1. The agent's response completes (model stops generating)
|
||||
2. There are incomplete tasks in the todo list
|
||||
3. The continuation count hasn't exceeded `max_auto_continues`
|
||||
4. The response isn't identical to the previous continuation (prevents loops)
|
||||
|
||||
### What Gets Injected
|
||||
Each continuation prompt includes:
|
||||
- The continuation prompt text (default or custom)
|
||||
- The current todo list state showing:
|
||||
- The goal
|
||||
- Progress (e.g., "3/5 completed")
|
||||
- Each task with status (✓ done, ○ pending)
|
||||
|
||||
**Example continuation context:**
|
||||
```
|
||||
[SYSTEM REMINDER - TODO CONTINUATION]
|
||||
You have incomplete tasks in your todo list. Continue with the next pending item.
|
||||
Call tools immediately. Do not explain what you will do.
|
||||
|
||||
Goal: Refactor the authentication module
|
||||
Progress: 2/4 completed
|
||||
✓ 1. Extract password validation into separate function
|
||||
✓ 2. Add unit tests for password validation
|
||||
○ 3. Update login handler to use new validation
|
||||
○ 4. Update registration handler to use new validation
|
||||
```
|
||||
|
||||
### Visual Feedback
|
||||
During auto-continuation, you'll see a message in your terminal:
|
||||
```
|
||||
📋 Auto-continuing (3/10): 2 incomplete todo(s) remain
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### For Agent Developers
|
||||
1. **Enable for complex workflows**: Use `auto_continue: true` for agents that handle multistep tasks
|
||||
2. **Set reasonable limits**: Adjust `max_auto_continues` based on typical task complexity
|
||||
3. **Customize the prompt**: If your agent needs specific continuation behavior, set a custom `continuation_prompt`
|
||||
|
||||
### For Model Behavior
|
||||
The injected instructions tell models to:
|
||||
|
||||
1. **Always create a todo list before starting work**: This ensures planning happens upfront
|
||||
2. **Mark each task done immediately after finishing**: Don't batch completions
|
||||
3. **Add all planned steps before starting**: Complete planning before execution
|
||||
|
||||
### When to Use vs. Skip
|
||||
**Use the Todo System when:**
|
||||
- Tasks have 3+ distinct steps
|
||||
- The model might lose track of progress
|
||||
- You want guaranteed completion of all steps
|
||||
- Working with smaller/less capable models
|
||||
|
||||
**Skip the Todo System when:**
|
||||
- Single-step, simple tasks
|
||||
- Interactive Q&A sessions
|
||||
- The overhead of task tracking isn't worth it
|
||||
|
||||
## Example Workflow
|
||||
Here's how a typical todo-driven workflow looks:
|
||||
|
||||
**User prompt:** "Add input validation to the user registration form"
|
||||
|
||||
**Model actions:**
|
||||
```
|
||||
1. todo__init(goal="Add input validation to user registration form")
|
||||
2. todo__add(task="Analyze current registration form fields")
|
||||
3. todo__add(task="Create validation rules for email field")
|
||||
4. todo__add(task="Create validation rules for password field")
|
||||
5. todo__add(task="Implement client-side validation")
|
||||
6. todo__add(task="Add server-side validation")
|
||||
7. todo__add(task="Write tests for validation logic")
|
||||
```
|
||||
|
||||
**Model executes first task, then:**
|
||||
```
|
||||
8. todo__done(id=1)
|
||||
9. [Proceeds with task 2...]
|
||||
10. todo__done(id=2)
|
||||
...
|
||||
```
|
||||
|
||||
**If model stops with incomplete tasks:**
|
||||
- System automatically sends continuation prompt
|
||||
- Model sees remaining tasks and continues
|
||||
- Repeats until all tasks are done or max continuations reached
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Model Not Using Todo Tools
|
||||
- Verify `inject_todo_instructions: true` in your agent config
|
||||
- Check that the agent is properly loaded (not just a role)
|
||||
- Some models may need explicit prompting to use the tools
|
||||
|
||||
### Too Many Continuations
|
||||
- Lower `max_auto_continues` to a reasonable limit
|
||||
- Check if the model is creating new tasks without completing old ones
|
||||
- Ensure tasks are appropriately scoped (not too granular)
|
||||
|
||||
### Continuation Loop
|
||||
The system detects when a model's response is identical to its previous continuation response and stops
|
||||
automatically. If you're seeing loops:
|
||||
- The model may be stuck; check if a task is impossible to complete
|
||||
- Consider adjusting the `continuation_prompt` to be more directive
|
||||
|
||||
---
|
||||
|
||||
## Additional Docs
|
||||
- [Agents](./AGENTS.md) - Full agent configuration guide
|
||||
- [Function Calling](./function-calling/TOOLS.md) - How tools work in Loki
|
||||
- [Sessions](./SESSIONS.md) - How conversation state is managed
|
||||
@@ -1,161 +0,0 @@
|
||||
# The Loki Vault
|
||||
The Loki vault lets users store sensitive secrets and credentials securely so that there's no plaintext secrets
|
||||
anywhere in your configurations.
|
||||
|
||||
It's based on the [G-Man library](https://github.com/Dark-Alex-17/gman) (which also comes in a binary format) which
|
||||
functions as a universal secret management tool.
|
||||
|
||||

|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Usage](#usage)
|
||||
- [CLI Usage](#cli-usage)
|
||||
- [REPL Usage](#repl-usage)
|
||||
- [Motivation](#motivation)
|
||||
- [How it works](#how-it-works)
|
||||
- [Supported Files](#supported-files)
|
||||
- [Environment Variable Secret Injection in Agents](#environment-variable-secret-injection-in-agents)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
The Loki vault can be used in one of two ways: via the CLI or via the REPL for interactive usage.
|
||||
|
||||
### CLI Usage
|
||||
The vault is utilized from the CLI with the following flags:
|
||||
|
||||
```bash
|
||||
--add-secret <SECRET_NAME> Add a secret to the Loki vault
|
||||
--get-secret <SECRET_NAME> Decrypt a secret from the Loki vault and print the plaintext
|
||||
--update-secret <SECRET_NAME> Update an existing secret in the Loki vault
|
||||
--delete-secret <SECRET_NAME> Delete a secret from the Loki vault
|
||||
--list-secrets List all secrets stored in the Loki vault
|
||||
```
|
||||
(The above is also documented in `loki --help`)
|
||||
|
||||
Loki will guide you through manipulating your secrets to make usage easier.
|
||||
|
||||
### REPL Usage
|
||||
The vault can be access from within the Loki REPL using the `.vault` commands:
|
||||
|
||||

|
||||

|
||||
|
||||
The manipulation of your vault is guided in the same way as the CLI usage, ensuring ease of use.
|
||||
|
||||
## Motivation
|
||||
Loki is intended to be highly configurable and adaptable to many different use cases. This means that users of Loki
|
||||
should be able to share configurations for agents, tools, roles, etc. with other users or even entire teams.
|
||||
|
||||
My objective is to encourage this, and to make it so that users can easily version their configurations using version
|
||||
control. Good VCS hygiene dictates that one *never* commits secrets or sensitive information to a repository.
|
||||
|
||||
Since a number of files and configurations in Loki may contain sensitive information, the vault exists to solve this problem.
|
||||
|
||||
Users can either share the vault password with a team, making it so a single configuration can be pulled from VCS and used
|
||||
by said team. Alternatively, each user can maintain their own vault password and expect other users to replace secret values
|
||||
with their user-specific secrets.
|
||||
|
||||
## How it works
|
||||
When you first start Loki, if you don't already have a vault password file, it will prompt you to create one. This file
|
||||
houses the password that is used to encrypt and decrypt secrets within Loki. This file exists so that you are not prompted
|
||||
for a password every time Loki attempts to decrypt a secret.
|
||||
|
||||
When you encrypt a secret, it uses the local provider for `gman` to securely store those secrets in the Loki vault file.
|
||||
This file is typically located at your Loki configuration directory under `vault.yml`. If you open this file, you'll see a
|
||||
bunch of gibberish. This is because all secrets are encrypted using the password you provided, meaning only you can decrypt them.
|
||||
|
||||
Secrets are specified in Loki configurations using the same variable templating as the [Jinja templating engine](https://jinja.palletsprojects.com/en/stable/):
|
||||
|
||||
```
|
||||
{{some_variable}}
|
||||
```
|
||||
|
||||
So whenever you want Loki to use a secret from the vault, you simply specify the secret name in this format in the applicable
|
||||
file.
|
||||
|
||||
**Example:**
|
||||
Suppose my vault has a secret called `GITHUB_TOKEN` in it, and I want to use that in the MCP configuration. Then, I simply replace
|
||||
the expected value in my `mcp.json` with the templated secret:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"atlassian": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "mcp-remote", "https://mcp.atlassian.com/v1/sse"]
|
||||
},
|
||||
"github": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"-i",
|
||||
"--rm",
|
||||
"-e",
|
||||
"GITHUB_PERSONAL_ACCESS_TOKEN",
|
||||
"ghcr.io/github/github-mcp-server"
|
||||
],
|
||||
"env": {
|
||||
"GITHUB_PERSONAL_ACCESS_TOKEN": "{{GITHUB_TOKEN}}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
At runtime, Loki will detect the templated secret and replace it with the decrypted value from the vault before executing.
|
||||
|
||||
## Supported Files
|
||||
At the time of writing, the following files support Loki secret injection:
|
||||
|
||||
| File Type | Description | Limitations |
|
||||
|-------------------------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `config.yaml` | The main Loki configuration file | Cannot use secret injection on the `vault_password_file` field |
|
||||
| `functions/mcp.json` | The MCP server configuration file | |
|
||||
| `<agent>/tools.<py/sh>` | Tool files for agents | Specific configuration and only supported for Agents, not all global tools ([see below](#environment-variable-secret-injection-in-agents)) |
|
||||
|
||||
|
||||
Note that all paths are relative to the Loki configuration directory. The directory varies by system, so you can find yours by
|
||||
running
|
||||
|
||||
```shell
|
||||
loki --info | grep config_dir | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Environment Variable Secret Injection in Agents
|
||||
Secrets from the Loki vault can be injected into agent `tools.sh/tools.py` as environment variables. This is done as
|
||||
follows:
|
||||
|
||||
1. Ensure a secret named `MY_USERNAME` is in your Loki vault.
|
||||
2. Set the name of the secret as the default value for a variable
|
||||
`<agent>/config.yaml`
|
||||
```yaml
|
||||
name: Username
|
||||
description: An AI agent that demonstrates agent capabilities
|
||||
instructions: |
|
||||
You are a AI agent designed to demonstrate agent capabilities.
|
||||
variables:
|
||||
- name: username
|
||||
description: Your user name
|
||||
# Configure the secret you want to inject using the same templating mentioned above; i.e. wrap the
|
||||
# case-sensitive name in '{{}}'
|
||||
default: '{{MY_USERNAME}}'
|
||||
```
|
||||
3. Reference the variable in your `<agent>/tools.<py/sh>` file using the familiar variable injection name; that is,
|
||||
since the name of the variable is `username`, the environment variable that will be provided to the tool call will
|
||||
be named `LLM_AGENT_VAR_USERNAME`
|
||||
`tools.sh`
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
# @env LLM_OUTPUT=/dev/stdout The output path
|
||||
|
||||
# @cmd Get my username
|
||||
get_my_username() {
|
||||
echo "$LLM_AGENT_VAR_USERNAME" >> "$LLM_OUTPUT"
|
||||
}
|
||||
```
|
||||
|
||||
For more information about variable usage within agents, refer to the [Variables section](./AGENTS.md#user-defined-variables) of the [Agents README](./AGENTS.md)
|
||||
|
||||
@@ -1,185 +0,0 @@
|
||||
# Model Clients
|
||||
|
||||
Loki supports a large number of model providers (referred to as `clients` since Loki is a client of these providers). In
|
||||
order to use them, you must configure each one in the `clients` array in the global Loki configuration file.
|
||||
|
||||
The location of the global Loki configuration file varies between systems, so you can use the following command to
|
||||
locate your configuration file:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'config_file' | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Supported Clients](#supported-clients)
|
||||
- [Client Configuration](#client-configuration)
|
||||
- [Authentication](#authentication)
|
||||
- [Extra Settings](#extra-settings)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Supported Clients
|
||||
Loki supports the following model client types:
|
||||
|
||||
* Azure AI Foundry
|
||||
* AWS Bedrock
|
||||
* Anthropic Claude
|
||||
* Cohere
|
||||
* Google Gemini
|
||||
* OpenAI
|
||||
* OpenAI-Compatible
|
||||
* GCP Vertex AI
|
||||
|
||||
In addition to the settings detailed below, each client may have additional settings specific to the provider. Check the
|
||||
[example global configuration file](../../config.example.yaml) to verify that your client has all the necessary fields
|
||||
defined.
|
||||
|
||||
## Client Configuration
|
||||
Each client in Loki has the same configuration settings available to them, with only special authentication fields added
|
||||
for specific clients as necessary. They are each placed under the `clients` array in your global configuration file:
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- name: client1
|
||||
# ... client configuration ...
|
||||
- name: client2
|
||||
# ... client configuration ...
|
||||
```
|
||||
|
||||
### Metadata
|
||||
The client metadata uniquely identifies the client in Loki so you can reference it across your configurations. The
|
||||
available settings are listed below:
|
||||
|
||||
| Setting | Description |
|
||||
|----------|------------------------------------------------------------------------------------------------------------|
|
||||
| `name` | The name of the client (e.g. `openai`, `gemini`, etc.) |
|
||||
| `auth` | Authentication method: `oauth` for OAuth, or omit to use `api_key` (see [Authentication](#authentication)) |
|
||||
| `models` | See the [model settings](#model-settings) documentation below |
|
||||
| `patch` | See the [client patch configuration](./PATCHES.md#client-configuration-patches) documentation |
|
||||
| `extra` | See the [extra settings](#extra-settings) documentation below |
|
||||
|
||||
Be sure to also check provider-specific configurations for any extra fields that are added for authentication purposes.
|
||||
|
||||
### Model Settings
|
||||
The `models` array lists the available models from the model client. Each one has the following settings:
|
||||
|
||||
| Setting | Required | Model Type | Description |
|
||||
|-----------------------------|----------|-------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `name` | * | `all` | The name of the model |
|
||||
| `real_name` | | `all` | You can define model aliases via the `name` field. However, Loki still needs to know the real name <br>of the model so it can query it. For example: If you have `name: gpt-alias`, then you must <br>also define `real_name: gpt-oss:latest` |
|
||||
| `type` | * | `all` | The type of model. Loki supports only 3 types of models: <ul><li>`chat`</li><li>`embedding`</li><li>`reranker`</li></ul> |
|
||||
| `input_price` | | `all` | The cost in USD per 1M tokens for each input sequence; Loki will keep track of usage costs if this is defined |
|
||||
| `output_price` | | `all` | The cost in USD per 1M tokens of the model output; Loki will keep track of usage costs if this is defined |
|
||||
| `patch` | | `all` | See the [model-specific patch configuration](./PATCHES.md#model-specific-patches) documentation |
|
||||
| `max_input_tokens` | | `all` | The maximum number of input tokens for the model |
|
||||
| `max_output_tokens` | | `chat` | The maximum number of output tokens for the model |
|
||||
| `require_max_tokens` | | `chat` | Whether to enforce the `max_output_tokens` constraint. |
|
||||
| `supports_vision` | | `chat` | Indicates if the model supports multimodal queries that would require vision (i.e. image recognition) |
|
||||
| `supports_function_calling` | | `chat` | Indicates if the model supports function calling |
|
||||
| `no_stream` | | `chat` | Enable or disable streaming API responses |
|
||||
| `no_system_message` | | `chat` | Controls whether the model supports system messages |
|
||||
| `system_prompt_prefix` | | `chat` | An additional prefix prompt to add to all system prompts to ensure consistent behavior across all interactions |
|
||||
| `max_tokens_per_chunk` | | `embedding` | The maximum chunk size supported by the embedding model |
|
||||
| `default_chunk_size` | | `embedding` | The default chunk size to use with the given model |
|
||||
| `max_batch_size` | | `embedding` | The maximum batch size that the given embedding model supports |
|
||||
|
||||
## Authentication
|
||||
|
||||
Loki clients support two authentication methods: **API keys** and **OAuth**. Each client entry in your configuration
|
||||
must use one or the other.
|
||||
|
||||
### API Key Authentication
|
||||
|
||||
Most clients authenticate using an API key. Simply set the `api_key` field directly or inject it from the
|
||||
[Loki vault](../VAULT.md):
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: claude
|
||||
api_key: '{{ANTHROPIC_API_KEY}}'
|
||||
```
|
||||
|
||||
API keys can also be provided via environment variables named `{CLIENT_NAME}_API_KEY` (e.g. `OPENAI_API_KEY`,
|
||||
`GEMINI_API_KEY`). See the [environment variables documentation](../ENVIRONMENT-VARIABLES.md#client-related-variables)
|
||||
for details.
|
||||
|
||||
### OAuth Authentication
|
||||
|
||||
For [providers that support OAuth](#providers-that-support-oauth), you can authenticate using your existing subscription instead of an API key. This uses
|
||||
the OAuth 2.0 PKCE flow.
|
||||
|
||||
**Step 1: Configure the client**
|
||||
|
||||
Add a client entry with `auth: oauth` and no `api_key`:
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: claude
|
||||
name: my-claude-oauth
|
||||
auth: oauth
|
||||
```
|
||||
|
||||
**Step 2: Authenticate**
|
||||
|
||||
Run the `--authenticate` flag with the client name:
|
||||
|
||||
```sh
|
||||
loki --authenticate my-claude-oauth
|
||||
```
|
||||
|
||||
Or if you have only one OAuth-configured client, you can omit the name:
|
||||
|
||||
```sh
|
||||
loki --authenticate
|
||||
```
|
||||
|
||||
Alternatively, you can use the REPL command `.authenticate`.
|
||||
|
||||
This opens your browser for the OAuth authorization flow. Depending on the provider, Loki will either start a
|
||||
temporary localhost server to capture the callback automatically (e.g. Gemini) or ask you to paste the authorization
|
||||
code back into the terminal (e.g. Claude). Loki stores the tokens in `~/.cache/loki/oauth` and automatically refreshes
|
||||
them when they expire.
|
||||
|
||||
#### Gemini OAuth Note
|
||||
Loki uses the following scopes for OAuth with Gemini:
|
||||
* https://www.googleapis.com/auth/generative-language.peruserquota
|
||||
* https://www.googleapis.com/auth/userinfo.email
|
||||
* https://www.googleapis.com/auth/generative-language.retriever (Sensitive)
|
||||
|
||||
Since the `generative-language.retriever` scope is a sensitive scope, Google needs to verify Loki, which requires full
|
||||
branding (logo, official website, privacy policy, terms of service, etc.). The Loki app is open-source and is designed
|
||||
to be used as a simple CLI. As such, there's no terms of service or privacy policy associated with it, and thus Google
|
||||
cannot verify Loki.
|
||||
|
||||
So, when you kick off OAuth with Gemini, you may see a page similar to the following:
|
||||

|
||||
|
||||
Simply click the `Advanced` link and click `Go to Loki (unsafe)` to continue the OAuth flow.
|
||||
|
||||

|
||||

|
||||
|
||||
**Step 3: Use normally**
|
||||
|
||||
Once authenticated, the client works like any other. Loki uses the stored OAuth tokens automatically:
|
||||
|
||||
```sh
|
||||
loki -m my-claude-oauth:claude-sonnet-4-20250514 "Hello!"
|
||||
```
|
||||
|
||||
> **Note:** You can have multiple clients for the same provider. For example: you can have one with an API key and
|
||||
> another with OAuth. Use the `name` field to distinguish them.
|
||||
|
||||
### Providers That Support OAuth
|
||||
* Claude
|
||||
* Gemini
|
||||
|
||||
## Extra Settings
|
||||
Loki also lets you customize some extra settings for interacting with APIs:
|
||||
|
||||
| Setting | Description |
|
||||
|-------------------|-------------------------------------------------------|
|
||||
| `proxy` | Set a proxy to use |
|
||||
| `connect_timeout` | Set the timeout in seconds for connections to the API |
|
||||
@@ -1,368 +0,0 @@
|
||||
# Request Patching in Loki
|
||||
Loki provides two mechanisms for modifying API requests sent to LLM providers: **Model-Specific Patches** and
|
||||
**Client Configuration Patches**. These allow you to customize request parameters, headers, and URLs to work around
|
||||
provider quirks or add custom behavior.
|
||||
|
||||
## Quick Links
|
||||
- [Model-Specific Patches](#model-specific-patches)
|
||||
- [Client Configuration Patches](#client-configuration-patches)
|
||||
- [Comparison](#comparison)
|
||||
- [Common Use Cases](#common-use-cases)
|
||||
- [Environment Variable Patches](#environment-variable-patches)
|
||||
- [Tips](#tips)
|
||||
- [Debugging Patches](#debugging-patches)
|
||||
|
||||
---
|
||||
|
||||
## Model-Specific Patches
|
||||
|
||||
### Overview
|
||||
Model-specific patches are applied **unconditionally** to a single model. They are useful for handling model-specific
|
||||
quirks or requirements.
|
||||
|
||||
### When to Use
|
||||
- A specific model requires certain parameters to be set or removed
|
||||
- A model needs different default values than other models from the same provider
|
||||
- You need to add special configuration for one model only
|
||||
|
||||
### Structure
|
||||
|
||||
```yaml
|
||||
models:
|
||||
- name: model-name
|
||||
type: chat
|
||||
# ... other model properties ...
|
||||
patch:
|
||||
url: "https://custom-endpoint.com" # Optional: override the API endpoint
|
||||
body: # Optional: modify request body
|
||||
<parameter>: <value> # Add or modify parameters
|
||||
<parameter>: null # Remove parameters (set to null)
|
||||
headers: # Optional: modify request headers
|
||||
<header-name>: <value> # Add or modify headers
|
||||
<header-name>: null # Remove headers (set to null)
|
||||
```
|
||||
|
||||
### Examples
|
||||
|
||||
#### Example 1: Removing Parameters
|
||||
OpenAI's o1 models don't support `temperature`, `top_p`, or `max_tokens` parameters. The `patch` removes them:
|
||||
|
||||
```yaml
|
||||
- name: o4-mini
|
||||
type: chat
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 100000
|
||||
supports_function_calling: true
|
||||
patch:
|
||||
body:
|
||||
max_tokens: null # Remove max_tokens from request
|
||||
temperature: null # Remove temperature from request
|
||||
top_p: null # Remove top_p from request
|
||||
```
|
||||
|
||||
#### Example 2: Setting Required Parameters
|
||||
Some models require specific parameters to be set:
|
||||
|
||||
```yaml
|
||||
- name: o4-mini-high
|
||||
type: chat
|
||||
patch:
|
||||
body:
|
||||
reasoning_effort: high # Always set reasoning_effort to "high"
|
||||
max_tokens: null
|
||||
temperature: null
|
||||
```
|
||||
|
||||
#### Example 3: Custom Endpoint
|
||||
If a model needs a different API endpoint:
|
||||
|
||||
```yaml
|
||||
- name: custom-model
|
||||
type: chat
|
||||
patch:
|
||||
url: "https://special-endpoint.example.com/v1/chat"
|
||||
```
|
||||
|
||||
#### Example 4: Adding Headers
|
||||
Add authentication or custom headers:
|
||||
|
||||
```yaml
|
||||
- name: special-model
|
||||
type: chat
|
||||
patch:
|
||||
headers:
|
||||
X-Custom-Header: "special-value"
|
||||
X-API-Version: "2024-01"
|
||||
```
|
||||
|
||||
### How It Works
|
||||
1. When you use a model, Loki loads its configuration
|
||||
2. If the model has a `patch` field, it's **always applied** to every request
|
||||
3. The patch modifies the request URL, body, or headers before sending to the API
|
||||
4. Parameters set to `null` are **removed** from the request
|
||||
|
||||
---
|
||||
|
||||
## Client Configuration Patches
|
||||
|
||||
### Overview
|
||||
Client configuration patches allow you to apply customizations to **multiple models** based on
|
||||
**regex pattern matching**. They're defined in your `config.yaml` file and can target specific API types (`chat`,
|
||||
`embeddings`, or `rerank`).
|
||||
|
||||
### When to Use
|
||||
- You want to apply the same settings to multiple models from a provider
|
||||
- You need different configurations for different groups of models
|
||||
- You want to override the default client model settings
|
||||
- You need environment-specific customizations
|
||||
|
||||
### Structure
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: <client> # e.g., gemini, openai, claude
|
||||
# ... client configuration ...
|
||||
patch:
|
||||
chat_completions: # For chat models
|
||||
'<regex-pattern>': # Regex to match model names
|
||||
url: "..." # Optional: override endpoint
|
||||
body: # Optional: modify request body
|
||||
<parameter>: <value>
|
||||
headers: # Optional: modify headers
|
||||
<header>: <value>
|
||||
embeddings: # For embedding models
|
||||
'<regex-pattern>':
|
||||
# ... same structure ...
|
||||
rerank: # For reranker models
|
||||
'<regex-pattern>':
|
||||
# ... same structure ...
|
||||
```
|
||||
|
||||
### Pattern Matching
|
||||
- Patterns are **regular expressions** that match against the model name
|
||||
- Use `.*` to match all models
|
||||
- Use specific patterns like `gpt-4.*` to match model families
|
||||
- Use `model1|model2` to match multiple specific models
|
||||
|
||||
### Examples
|
||||
|
||||
#### Example 1: Disable Safety Filters for Gemini Models
|
||||
Apply to all Gemini chat models:
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: gemini
|
||||
api_key: "{{GEMINI_API_KEY}}"
|
||||
patch:
|
||||
chat_completions:
|
||||
'.*': # Matches all Gemini models
|
||||
body:
|
||||
safetySettings:
|
||||
- category: HARM_CATEGORY_HARASSMENT
|
||||
threshold: BLOCK_NONE
|
||||
- category: HARM_CATEGORY_HATE_SPEECH
|
||||
threshold: BLOCK_NONE
|
||||
- category: HARM_CATEGORY_SEXUALLY_EXPLICIT
|
||||
threshold: BLOCK_NONE
|
||||
- category: HARM_CATEGORY_DANGEROUS_CONTENT
|
||||
threshold: BLOCK_NONE
|
||||
```
|
||||
|
||||
#### Example 2: Apply Settings to Specific Model Family
|
||||
Only apply to GPT-4 models (not GPT-3.5):
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: openai
|
||||
api_key: "{{OPENAI_API_KEY}}"
|
||||
patch:
|
||||
chat_completions:
|
||||
'gpt-4.*': # Matches gpt-4, gpt-4-turbo, gpt-4o, etc.
|
||||
body:
|
||||
frequency_penalty: 0.2
|
||||
presence_penalty: 0.1
|
||||
```
|
||||
|
||||
#### Example 3: Different Settings for Different Models
|
||||
Apply different patches based on model name:
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: openai
|
||||
api_key: "{{OPENAI_API_KEY}}"
|
||||
patch:
|
||||
chat_completions:
|
||||
'gpt-4o': # Specific model
|
||||
body:
|
||||
temperature: 0.7
|
||||
'gpt-3.5.*': # Model family
|
||||
body:
|
||||
temperature: 0.9
|
||||
max_tokens: 2000
|
||||
```
|
||||
|
||||
#### Example 4: Modify Embedding Requests
|
||||
Apply to embedding models:
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: openai
|
||||
api_key: "{{OPENAI_API_KEY}}"
|
||||
patch:
|
||||
embeddings:
|
||||
'text-embedding-.*': # All text-embedding models
|
||||
body:
|
||||
dimensions: 1536
|
||||
encoding_format: "float"
|
||||
```
|
||||
|
||||
#### Example 5: Custom Headers for Specific Models
|
||||
Add headers only for certain models:
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: openai-compatible
|
||||
api_base: "https://api.example.com/v1"
|
||||
patch:
|
||||
chat_completions:
|
||||
'custom-model-.*':
|
||||
headers:
|
||||
X-Custom-Auth: "bearer-token"
|
||||
X-Model-Version: "latest"
|
||||
```
|
||||
|
||||
#### Example 6: Override Endpoint for Specific Models
|
||||
Use different endpoints for different model groups:
|
||||
|
||||
```yaml
|
||||
clients:
|
||||
- type: openai-compatible
|
||||
api_base: "https://default-endpoint.com/v1"
|
||||
patch:
|
||||
chat_completions:
|
||||
'premium-.*': # Premium models use different endpoint
|
||||
url: "https://premium-endpoint.com/v1/chat/completions"
|
||||
```
|
||||
|
||||
### How It Works
|
||||
1. When making a request, Loki checks if the client has a `patch` configuration
|
||||
2. It looks at the appropriate API type (`chat_completions`, `embeddings`, or `rerank`)
|
||||
3. For each pattern in that section, it checks if the regex matches the model name
|
||||
4. If a match is found, that patch is applied to the request
|
||||
5. Only the **first matching pattern** is applied (patterns are processed in order)
|
||||
|
||||
---
|
||||
|
||||
## Comparison
|
||||
|
||||
| Feature | Model-Specific Patch | Client Configuration Patch |
|
||||
|-----------------------|-----------------------|-------------------------------------|
|
||||
| **Scope** | Single model only | Multiple models via regex |
|
||||
| **Matching** | Exact model name | Regular expression pattern |
|
||||
| **Application** | Always applied | Only if pattern matches |
|
||||
| **API Type** | All APIs | Separate for chat/embeddings/rerank |
|
||||
| **Override** | Cannot be overridden | Can override model patch |
|
||||
| **Use Case** | Model-specific quirks | User preferences & customization |
|
||||
| **Application Order** | Applied first | Applied second (can override) |
|
||||
|
||||
### Patch Application Order
|
||||
When both patches are present, they're applied in this order:
|
||||
|
||||
1. **Model-Specific Patch**
|
||||
2. **Client Configuration Patch**
|
||||
|
||||
This means client configuration patches can override model-specific patches if they modify the same parameters.
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### Removing Unsupported Parameters
|
||||
Some models don't support standard parameters like `temperature` or `max_tokens`:
|
||||
|
||||
**Model Patch**:
|
||||
```yaml
|
||||
patch:
|
||||
body:
|
||||
temperature: null
|
||||
max_tokens: null
|
||||
```
|
||||
|
||||
### Adding Provider-Specific Parameters
|
||||
Providers often have unique parameters:
|
||||
|
||||
**Client Patch**:
|
||||
```yaml
|
||||
patch:
|
||||
chat_completions:
|
||||
'.*':
|
||||
body:
|
||||
safetySettings: [...] # Gemini
|
||||
thinking_budget: 10000 # DeepSeek
|
||||
response_format: # OpenAI
|
||||
type: json_object
|
||||
```
|
||||
|
||||
### Changing Endpoints
|
||||
Use custom or regional endpoints:
|
||||
|
||||
**Client Patch**:
|
||||
```yaml
|
||||
patch:
|
||||
chat_completions:
|
||||
'.*':
|
||||
url: "https://eu-endpoint.example.com/v1/chat"
|
||||
```
|
||||
|
||||
### Setting Default Values
|
||||
Provide defaults for specific models or model families:
|
||||
|
||||
**Client Patch**:
|
||||
```yaml
|
||||
patch:
|
||||
chat_completions:
|
||||
'claude-3-.*':
|
||||
body:
|
||||
max_tokens: 4096
|
||||
temperature: 0.7
|
||||
```
|
||||
|
||||
### Custom Authentication
|
||||
Add special authentication headers:
|
||||
|
||||
**Client Patch**:
|
||||
```yaml
|
||||
patch:
|
||||
chat_completions:
|
||||
'.*':
|
||||
headers:
|
||||
Authorization: "Bearer {{custom_token}}"
|
||||
X-Organization-ID: "org-123"
|
||||
```
|
||||
|
||||
## Environment Variable Patches
|
||||
You can also apply patches via environment variables for temporary overrides:
|
||||
|
||||
```bash
|
||||
export LLM_PATCH_OPENAI_CHAT_COMPLETIONS='{"gpt-4.*":{"body":{"temperature":0.5}}}'
|
||||
```
|
||||
|
||||
This takes precedence over client configuration patches but not model-specific patches.
|
||||
|
||||
## Tips
|
||||
1. **Use model patches** for permanent, model-specific requirements
|
||||
2. **Use client patches** for personal preferences or environment-specific settings
|
||||
3. **Test regex patterns** carefully
|
||||
4. **Set to `null`** to remove parameters, don't just omit them
|
||||
5. **Check each model provider's docs** for available parameters and their formats
|
||||
6. **Be specific** with patterns to avoid unintended matches
|
||||
7. **Remember order matters** - first matching pattern wins for client patches
|
||||
8. **Patches merge** - both types can be applied, with client patches overriding model patches
|
||||
|
||||
## Debugging Patches
|
||||
To see what request is actually being sent, enable debug logging:
|
||||
|
||||
```bash
|
||||
export RUST_LOG=loki=debug
|
||||
loki "your prompt here"
|
||||
```
|
||||
|
||||
This will show the final request body after all patches are applied.
|
||||
@@ -1,279 +0,0 @@
|
||||
# Bash Prompt Helpers
|
||||
|
||||
When creating bash based tools, it's often helpful to prompt the user for input or confirmation.
|
||||
|
||||
Loki comes pre-packaged with a handful of prompt helpers for your bash-based tools. These helpers
|
||||
can be used to prompt the user for various types of input, such as yes/no confirmations,
|
||||
text input, and selections from a list.
|
||||
|
||||
The utility script is located at `functions/utils/prompt-utils.sh` within your Loki `functions` directory.
|
||||
|
||||
The Loki `functions` directory varies between machines, so you can find its location on your system by running the following command in your terminal:
|
||||
|
||||
```shell
|
||||
loki --info | grep functions_dir | awk '{print $2}'
|
||||
```
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Import The Prompt Utils Into Your Tools Script](#import-the-prompt-utils-into-your-tools-script)
|
||||
- [Included Utility Functions](#included-utility-functions)
|
||||
- [input](#input)
|
||||
- [confirm](#confirm)
|
||||
- [list](#list)
|
||||
- [checkbox](#checkbox)
|
||||
- [password](#password)
|
||||
- [editor](#editor)
|
||||
- [with_validate](#with_validate)
|
||||
- [validate_present](#validate_present)
|
||||
- [detect_os](#detect_os)
|
||||
- [get_opener](#get_opener)
|
||||
- [open_link](#open_link)
|
||||
- [guard_operation](#guard_operation)
|
||||
- [guard_path](#guard_path)
|
||||
- [patch_file](#patch_file)
|
||||
- [error](#error)
|
||||
- [warn](#warn)
|
||||
- [info](#info)
|
||||
- [debug](#debug)
|
||||
- [trace](#trace)
|
||||
- [Colored Output](#colored-output)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Import The Prompt Utils Into Your Tools Script
|
||||
In order to use the bash prompt helpers in your bash scripts, you need to source the provided `prompt-utils.sh` script.
|
||||
This script is pre-packaged with Loki and is located [here](../../assets/functions/utils/prompt-utils.sh).
|
||||
|
||||
When sourcing the file in your bash script, you use the `LLM_PROMPT_UTILS_FILE` environment variable that automatically
|
||||
populates the `functions/utils/prompt-utils.sh` path for you.
|
||||
|
||||
Thus, to properly source and enable all the bash prompt helpers in your Bash tools, add the following prelude to your
|
||||
scripts:
|
||||
|
||||
```bash
|
||||
source "$LLM_PROMPT_UTILS_FILE"
|
||||
```
|
||||
|
||||
## Included Utility Functions
|
||||
Below are the built-in bash prompt helpers that can be used to enhance user interaction with your tool scripts.
|
||||
|
||||
### input
|
||||
Prompt for text input
|
||||
|
||||

|
||||
|
||||
**Example With Validation:**
|
||||
```bash
|
||||
text=$(with_validation 'input "Please enter something:"' validate_present 2>/dev/tty)
|
||||
```
|
||||
|
||||
**Example Without Validation:**
|
||||
```bash
|
||||
text=$(input "Please enter something:" 2>/dev/tty)
|
||||
```
|
||||
|
||||
### confirm
|
||||
Show a confirm dialog with options for yes/no
|
||||
|
||||

|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
confirmed=$(confirm "Do the thing?" 2>/dev/tty)
|
||||
if [[ $confirmed == "0" ]]; then echo "No"; else echo "Yes"; fi
|
||||
```
|
||||
|
||||
### list
|
||||
Renders a text based list of options that can be selected by the user using up, down, and enter
|
||||
keys that then returns the chosen option.
|
||||
|
||||

|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
options=("one" "two" "three" "four")
|
||||
choice=$(list "Select an item" "${options[@]}" 2>/dev/tty)
|
||||
echo "Your choice: ${options[$choice]}"
|
||||
```
|
||||
|
||||
### checkbox
|
||||
Render a text based list of options, where multiple options can be selected by the user using down, up,
|
||||
and enter keys that then returns the chosen options.
|
||||
|
||||

|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
options=("one" "two" "three" "four")
|
||||
checked=$(checkbox "Select one or more items" "${options[@]}" 2>/dev/tty)
|
||||
echo "Your choices: ${checked}"
|
||||
```
|
||||
|
||||
### password
|
||||
Show a password prompt displaying stars for each character typed.
|
||||
|
||||

|
||||
|
||||
**Example With Validation:**
|
||||
```bash
|
||||
validate_password() {
|
||||
if [[ ${#1} -lt 10 ]]; then
|
||||
echo "Password must be at least 10 characters"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
pass=$(with_validate 'password "Enter your password"' validate_password 2>/dev/tty)
|
||||
```
|
||||
|
||||
**Example Without Validation:**
|
||||
```bash
|
||||
pass="$(password "Enter your password:" 2>/dev/tty)"
|
||||
```
|
||||
|
||||
### editor
|
||||
Open the default editor (`$EDITOR`); if none is set, default back to `vi`
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
text=$(editor "Please enter something in the editor" 2>/dev/tty)
|
||||
echo -e "You wrote:\n${text}"
|
||||
```
|
||||
|
||||
### with_validate
|
||||
Evaluate the given prompt command with validation. This prompts the user for input until the
|
||||
validation functions returns 0.
|
||||
|
||||

|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
# Using the built-in 'validate_present' validator
|
||||
text=$(with_validate 'input "Please enter something and confirm with enter"' validate_present 2>/dev/tty)
|
||||
|
||||
# Using a custom validator; e.g. for password
|
||||
validate_password() {
|
||||
if [[ ${#1} -lt 10 ]]; then
|
||||
echo "Password needs to be at least 10 characters"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
pass=$(with_validate 'password "Enter random password"' validate_password 2>/dev/tty)
|
||||
```
|
||||
|
||||
### validate_present
|
||||
Validate that the prompt returned a value.
|
||||
|
||||

|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
text=$(with_validate 'input "Please enter something and confirm with enter"' validate_present 2>/dev/tty)
|
||||
```
|
||||
|
||||
### detect_os
|
||||
Detect the current OS.
|
||||
|
||||
Returns one of the following:
|
||||
|
||||
* `solaris`
|
||||
* `macos`
|
||||
* `linux`
|
||||
* `bsd`
|
||||
* `windows`
|
||||
* `unknown`
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
detect_os
|
||||
```
|
||||
|
||||
### get_opener
|
||||
Determines the Os-specific file opening command (i.e. the command to open anything)
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
# Returns 'xdg-open'
|
||||
get_opener
|
||||
```
|
||||
|
||||
### open_link
|
||||
Opens the given link in the default browser
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
open_link https://www.google.com
|
||||
```
|
||||
|
||||
### guard_operation
|
||||
Prompt for permission to run an operation.
|
||||
|
||||
Can be disabled by setting the environment variable `AUTO_CONFIRM`.
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
guard_operation "Execute SQL?"
|
||||
_run_sql
|
||||
```
|
||||
|
||||
### guard_path
|
||||
Prompt for permission to perform path operations.
|
||||
|
||||
Can be disabled by setting the environment variable `AUTO_CONFIRM`.
|
||||
|
||||
**Example:***
|
||||
```bash
|
||||
guard_path "$target_path" "Remove '$target_path'?"
|
||||
rm -rf "$target_path"
|
||||
```
|
||||
|
||||
### patch_file
|
||||
Patch a file and show a diff using the default diff viewer. Uses git diff syntax.
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
new_contents="$(patch_file "$path" file.patch)"
|
||||
```
|
||||
|
||||
### error
|
||||
Log an error
|
||||
|
||||

|
||||
|
||||
### warn
|
||||
Log a warning
|
||||
|
||||

|
||||
|
||||
### info
|
||||
Log info
|
||||
|
||||

|
||||
|
||||
### debug
|
||||
Log a debug message
|
||||
|
||||

|
||||
|
||||
### trace
|
||||
Log a trace message
|
||||
|
||||

|
||||
|
||||
### Colored Output
|
||||
The following commands allow users to output text in specific colors.
|
||||
|
||||
* `red`
|
||||
* `green`
|
||||
* `gold`
|
||||
* `blue`
|
||||
* `magenta`
|
||||
* `cyan`
|
||||
* `white`
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
red "This will be red"
|
||||
yellow "This will be yellow"
|
||||
```
|
||||
@@ -1,309 +0,0 @@
|
||||
# Custom Bash-Based Tools
|
||||
Loki supports tools written in Bash. However, they must be written in a special format with special annotations in order
|
||||
for Loki to be able to properly parse and utilize them. This formatting ensures that each Bash script is
|
||||
self-describing, and formatted in such a way that Loki can anticipate how to execute it and what parameters to pass to
|
||||
it. This standardization also lets Loki compile the script into a JSON schema that can be used to inform the LLM about
|
||||
how to use the tool.
|
||||
|
||||
Each Bash-based tool must follow a specific structure in order for Loki to be able to properly compile and execute it:
|
||||
|
||||
* The tool must be a Bash script with a `.sh` file extension.
|
||||
* The script must have the following comments:
|
||||
* `# @describe ...` comment at the top that describes the tool.
|
||||
* `# @env LLM_OUTPUT=/dev/stdout The output path` comment to describe the `LLM_OUTPUT` environment variable. This
|
||||
syntax in particular assigns `/dev/stdout` as the default value for `LLM_OUTPUT`, so that if it's not set by Loki,
|
||||
the script will still function properly.
|
||||
* `# @option --option <value> An example option` comments to define each option that the tool accepts.
|
||||
* Use `--flag` syntax for boolean flags.
|
||||
* Use `--option <value>` syntax for options that accept a value.
|
||||
* Use `--option <value1,value2>` syntax for options that accept multiple values (i.e. arrays).
|
||||
* The script must have a `main` function
|
||||
* The `main` function must redirect the return value to the `>> "$LLM_OUTPUT"` environment variable.
|
||||
* This is necessary because Loki relies on the `$LLM_OUTPUT` environment variable to capture the output of the tool.
|
||||
|
||||
Essentially, you can think of the Bash-based tool script as just a normal Bash script that uses special comments to
|
||||
define a CLI.
|
||||
* The `# @env LLM_OUTPUT=/dev/stdout` comment to define the `$LLM_OUTPUT` environment variable (good practice)
|
||||
* A `# @describe`
|
||||
* And a `main` function that writes to `$LLM_OUTPUT`
|
||||
|
||||
The following section explains how you can add parameters to your bash functions and how to test out your scripts.
|
||||
|
||||
## Quick Links:
|
||||
<!--toc:start-->
|
||||
- [Loki Bash Tools Syntax](#loki-bash-tools-syntax)
|
||||
- [Metadata](#metadata)
|
||||
- [Environment Variables](#environment-variables)
|
||||
- [Arguments](#arguments)
|
||||
- [Flags](#flags)
|
||||
- [Options](#options)
|
||||
- [Subcommands (Agents only)](#subcommands-agents-only)
|
||||
- [Execute and Test Your Bash Tools](#execute-and-test-your-bash-tools)
|
||||
- [Example](#example)
|
||||
- [Prompt Helpers](#prompt-helpers)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Loki Bash Tools Syntax
|
||||
Loki Bash tools work via `@___` annotations that describe specific functionality of a script. The following reference
|
||||
explains the general syntax of these annotations and how to use them to create a CLI that Loki can recognize.
|
||||
|
||||
Refer to the [Execute and Test Your Bash Tools](#execute-and-test-your-bash-tools) section to learn how to test out your Bash tools
|
||||
without needing to go through Loki itself.
|
||||
|
||||
It's important to note that any functions prefixed with `_` are not sent to the LLM, so they will be invisible to the
|
||||
LLM at runtime.
|
||||
|
||||
### Metadata:
|
||||
You can define different metadata about your script to help Loki understand its dependencies and purpose.
|
||||
|
||||
```bash
|
||||
# Use the `@meta require-tools` annotation to specify any external tools that your script depends on.
|
||||
# @meta require-tools jq,yq
|
||||
|
||||
# Use the `@describe` annotation to describe the purpose of the script.
|
||||
# @describe A tool to interact with things
|
||||
```
|
||||
|
||||
### Environment Variables:
|
||||
```bash
|
||||
###########################
|
||||
## Environment Variables ##
|
||||
###########################
|
||||
|
||||
# Use `@env` to define environment variables that the script uses.
|
||||
# @env LLM_OUTPUT=/dev/stdout The output path, with a default value of '/dev/stdout' if not set.
|
||||
# @env OPTIONAL An optional environment variable
|
||||
# @env REQUIRED! A required environment variable
|
||||
# @env DEFAULT_VALUE=default An environment variable with a default value if unset.
|
||||
# @env DEFAULT_FROM_FN=`_default_env_fn` An environment variable with a default value calculated from a function if unset.
|
||||
# @env CHOICE[even|odd] An environment variable that, if set, must be set to either `even` or `odd`
|
||||
# @env CHOICE_WITH_DEFAULT[=even|odd] An environment variable that, if set, must be set to either `even` or `odd`, and defaults to `even` when unset
|
||||
# @env CHOICE_FROM_FN[`_choice_env_fn`] An environment variable that, if set, must be set to one of the values returned by the `_choice_fn` function.
|
||||
|
||||
# Example variable usage:
|
||||
export CHOICE=even
|
||||
# ./script.sh
|
||||
main() {
|
||||
[[ $CHOICE == "even" ]] || { echo "The value of the 'CHOICE' env var is not 'even'" >> "$LLM_OUTPUT" && exit 1 }
|
||||
}
|
||||
|
||||
# Loki does not pass functions prefixed with `_` to the LLM, so these are essentially `private` functions
|
||||
_default_env_fn() {
|
||||
echo "calculated default env value"
|
||||
}
|
||||
|
||||
# Loki does not pass functions prefixed with `_` to the LLM, so these are essentially `private` functions
|
||||
_choice_env_fn() {
|
||||
echo even
|
||||
echo odd
|
||||
}
|
||||
```
|
||||
|
||||
### Arguments:
|
||||
When referencing an argument defined via the `@arg` annotation, you can access its value using the `argc_<argument_name>` variable that
|
||||
is created at runtime.
|
||||
|
||||
```bash
|
||||
###############
|
||||
## Arguments ##
|
||||
###############
|
||||
|
||||
# Use `@arg` To define positional arguments for your script.
|
||||
# To reference an argument within your script, use the `argc_<argument_name>` variable.
|
||||
# @arg optional Optional argument
|
||||
# @arg required! Required argument
|
||||
# @arg multi_value* An argument that accepts multiple values (e.g. './script.sh one two three')
|
||||
# @arg multi_value_required+ An argument that is required and accepts multiple values
|
||||
# @arg value_notated <VALUE> An argument that explicitly specifies the name for documentation (e.g. Usage: ./script.sh [VALUE])
|
||||
# @arg default=default An argument with a default value if unset
|
||||
# @arg default_from_fn=`_default_arg_fn` An argument with a default value calculated from a function if unset
|
||||
# @arg choice[even|odd] An argument that, if set, must be set to either `even` or `odd`
|
||||
# @arg required_choice+[even|odd] An required argument that must be set to either `even` or `odd`
|
||||
# @arg default_choice[=even|odd] An argument that if unset defaults to 'even', but if set must be either `even` or `odd`
|
||||
# @arg multi_value_choice*[even|odd] An argument that, if set, must be set to either `even` or `odd`, and accepts multiple values
|
||||
# @arg choice_fn[`_choice_arg_fn`] An argument that, if set, must be set to one of the values returned by the `_choice_arg_fn` function.
|
||||
# @arg choice_fn_no_valid[?`_choice_arg_fn`] An argument that, if set, can be set to one of the values returned by the `_choice_arg_fn` function,
|
||||
# but does not validate the value.
|
||||
# @arg multi_choice_fn*[`_choice_arg_fn`] An argument that, if set, must be set to one of the values returned by the `_choice_arg_fn` function,
|
||||
# and accepts multiple values.
|
||||
# @arg multi_choice_comma_fn*,[`_choice_arg_fn`] An argument that, if set, must be set to one of the values returned by the `_choice_arg_fn` function,
|
||||
# and accepts multiple values in the form of a comma-separated list
|
||||
# @arg capture_arg~ An argument that captures all remaining args passed to the script
|
||||
|
||||
# Example usage 1: ./script.sh something_required
|
||||
main() {
|
||||
[[ $argc_required == "something_required" ]] || { echo "The value of the 'required' arg is not 'something_required'" >> "$LLM_OUTPUT" && exit 1 }
|
||||
}
|
||||
|
||||
# Example usage 2: ./script.sh this is a test
|
||||
main() {
|
||||
[[ "${argc_multi_value[*]}" == "this is a test" ]] || { echo "The value of the 'multi_value' arg is not 'this is a test'" >> "$LLM_OUTPUT" && exit 1 }
|
||||
}
|
||||
|
||||
|
||||
# Loki does not pass functions prefixed with `_` to the LLM, so these are essentially `private` functions
|
||||
_default_arg_fn() {
|
||||
echo "default arg value"
|
||||
}
|
||||
|
||||
# Loki does not pass functions prefixed with `_` to the LLM, so these are essentially `private` functions
|
||||
_choice_arg_fn() {
|
||||
echo even
|
||||
echo odd
|
||||
}
|
||||
```
|
||||
|
||||
### Flags:
|
||||
To access the value of a flag defined via the `@flag` annotation, you can check the value of the `argc_<flag_name>` variable.
|
||||
|
||||
```bash
|
||||
###########
|
||||
## Flags ##
|
||||
###########
|
||||
|
||||
# Use `@flag` to define boolean flags for your script
|
||||
# To reference a flag within your script, use the `argc_<argument_name>` variable.
|
||||
# @flag --bool A boolean flag with only a long option
|
||||
# @flag -b --bool A boolean flag with a short and long option
|
||||
# @flag -b A boolean flag with only a short option
|
||||
# @flag --multi* A boolean flag that can be used multiple times (e.g. '--multi --multi' will return '2')
|
||||
|
||||
# Example usage 1: ./script.sh --bool
|
||||
main() {
|
||||
[[ $argc_bool == "1" ]] || { echo "The value of the 'bool' flag is not '1'" >> "$LLM_OUTPUT" && exit 1 }
|
||||
}
|
||||
|
||||
# Example usage 2: ./script.sh --multi --multi
|
||||
main() {
|
||||
[[ $argc_multi == "2" ]] || { echo "The value of the 'multi' flag is not 2" >> "$LLM_OUTPUT" && exit 1 }
|
||||
}
|
||||
```
|
||||
|
||||
### Options:
|
||||
To access the value of an option defined via the `@option` annotation, you can check the value of the `argc_<option_name>` variable.
|
||||
|
||||
```bash
|
||||
#############
|
||||
## Options ##
|
||||
#############
|
||||
|
||||
# Use `@option` to define flags that accept values
|
||||
# To reference an option within your script, use the `argc_<argument_name>` variable.
|
||||
# @option --option An option that accepts a value with only a long flag
|
||||
# @option -o --option An option that accepts a value with both a short and long flag
|
||||
# @option -o An option that accepts a value with only a short flag
|
||||
# @option --required A required option that accepts a value
|
||||
# @option --multi* An option that accepts multiple values
|
||||
# @option --required-multi+ An option that accepts multiple values and is required
|
||||
# @option --multi-comma*, An option that accepts multiple values in the form of a comma-separated list
|
||||
# @option --value <VALUE> An option that explicitly specifies the name for documentation (e.g. Usage: ./script.sh --value [VALUE])
|
||||
# @option --two-args <SRC> <DEST> An option that accepts two arguments and explicitly names them for documentation
|
||||
# (e.g. Usage: ./script.sh --two-args [SRC] [DEST])
|
||||
# @option --unlimited-args <SRC> <DEST+> An option that accepts an unlimited number of arguments and explicitly names them for documentation
|
||||
# (e.g. Usage: ./script.sh --unlimited-args [SRC] [DEST ...])
|
||||
# @option --default=default An option that has a default value if unset
|
||||
# @option --default-from-fn=`_default_opt_fn` An option that has a default value calculated from a function if unset
|
||||
# @option --choice[even|odd] An option that, if set, must be set to either `even` or `odd`
|
||||
# @option --choice-default[=even|odd] An option that, if unset, defaults to `even`, but if set must be either `even` or `odd`
|
||||
# @option --choice-multi*[even|odd] An option that, if set, must be set to either `even` or `odd`, and can be specified multiple times
|
||||
# (e.g. ./script.sh --choice-multi even --choice-multi odd)
|
||||
# @option --required-choice-multi+[even|odd] A required option that, must be set to either `even` or `odd`, and can be specified multiple times
|
||||
# @option --choice-fn[`_choice_opt_fn`] An option that, if set, must be set to one of the values returned by the `_choice_opt_fn` function.`
|
||||
# @option --choice-fn-no-valid[?`_choice_opt_fn`] An option that, if set, can be set to one of the values returned by the `_choice_opt_fn` function, with no validation
|
||||
# @option --choice-multi-fn*[`_choice_opt_fn`] An option that, if set, must be set to one of the values returned by the `_choice_opt_fn` function,
|
||||
# and can be specified multiple times
|
||||
# @option --choice-multi-comma*,[`_choice_opt_fn`] An option that, if set, must be set to one of the values returned by the `_choice_opt_fn` function,
|
||||
# and is specified as a comma-separated list
|
||||
# @option --capture~ An option that captures all remaining arguments passed to the script
|
||||
|
||||
# Example usage 1: ./script.sh --option some_value
|
||||
main() {
|
||||
[[ $argc_option == "some_value" ]] || { echo "The value of the 'option' option is not 'some_value'" >> "$LLM_OUTPUT" && exit 1 }
|
||||
}
|
||||
|
||||
# Example usage 2: ./script.sh --multi value1 --multi value2
|
||||
main() {
|
||||
[[ "${argc_multi[*]}" == "value1 value2" ]] || { echo "The value of the 'multi' option is not 'value1 value2'" >> "$LLM_OUTPUT" && exit 1 }
|
||||
}
|
||||
|
||||
|
||||
# Loki does not pass functions prefixed with `_` to the LLM, so these are essentially `private` functions
|
||||
_default_opt_fn() {
|
||||
echo "calculated default option value"
|
||||
}
|
||||
|
||||
# Loki does not pass functions prefixed with `_` to the LLM, so these are essentially `private` functions
|
||||
_choice_opt_fn() {
|
||||
echo even
|
||||
echo odd
|
||||
}
|
||||
```
|
||||
|
||||
### Subcommands (Agents only):
|
||||
By default, if no `@cmd` annotations are defined, the script's `main` function is treated as the default command.
|
||||
However, for agents, there can be many functions defined in one file, and thus it is useful to create subcommands
|
||||
to organize your agent's tools.
|
||||
|
||||
```bash
|
||||
#################
|
||||
## Subcommands ##
|
||||
#################
|
||||
|
||||
# Use the `@cmd` annotation to define subcommands for your script.
|
||||
# @cmd List all files
|
||||
list() {
|
||||
ls -la >> "$LLM_OUTPUT"
|
||||
}
|
||||
|
||||
# @cmd Output the contents of the specified file
|
||||
# @arg file! The file to output
|
||||
cat() {
|
||||
cat "$argc_file" >> "$LLM_OUTPUT"
|
||||
}
|
||||
|
||||
# Example usage 1: ./script.sh cat myfile.txt
|
||||
```
|
||||
|
||||
## Execute and Test Your Bash Tools
|
||||
Your bash tools are just normal bash scripts stored in the `functions/tools` directory. So you can execute and test them
|
||||
directly by first having Loki compile them so all this syntactic sugar means something.
|
||||
|
||||
This is achieved via the `loki --build-tools` command.
|
||||
|
||||
### Example
|
||||
Suppose we want to execute the `functions/tools/get_current_time.sh` script for testing.
|
||||
|
||||
We'd first make sure the script is visible in all contexts by ensuring it's in the `visible_tools` array in your global
|
||||
`config.yaml` file. This ensures Loki builds the tool so it's ready to use in any context.
|
||||
|
||||
You can find the location of your global `config.yaml` file with the following command:
|
||||
|
||||
```shell
|
||||
loki --info | grep 'config_file' | awk '{print $2}'
|
||||
```
|
||||
|
||||
Then, we can instruct Loki to build the script so we can test it out:
|
||||
|
||||
```shell
|
||||
loki --build-tools
|
||||
```
|
||||
|
||||
This will add additional boilerplate to the top of the script so that it can be executed directly.
|
||||
|
||||
Finally, we can now execute the script:
|
||||
|
||||
```bash
|
||||
$ ./get_current_time.sh
|
||||
Fri Oct 24 05:55:04 PM MDT 2025
|
||||
```
|
||||
|
||||
## Prompt Helpers
|
||||
It's often useful to create interactive prompts for our bash tools so that our tools can get input from
|
||||
users.
|
||||
|
||||
To accommodate this, Loki provides a set of prompt helper functions that can be referenced and used within your Bash
|
||||
tools.
|
||||
|
||||
For more information, refer to the [Bash Prompt Helpers documentation](BASH-PROMPT-HELPERS.md).
|
||||
@@ -1,282 +0,0 @@
|
||||
# Custom Tools
|
||||
Loki is designed to be as flexible and as customizable as possible. One of the key
|
||||
features that enables this flexibility is the ability to create and integrate custom tools
|
||||
into your Loki setup. This document provides a guide on how to create and use custom tools within Loki.
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Supported Languages](#supported-languages)
|
||||
- [Creating a Custom Tool](#creating-a-custom-tool)
|
||||
- [Environment Variables](#environment-variables)
|
||||
- [Custom Bash-Based Tools](#custom-bash-based-tools)
|
||||
- [Custom Python-Based Tools](#custom-python-based-tools)
|
||||
- [Custom TypeScript-Based Tools](#custom-typescript-based-tools)
|
||||
- [Custom Runtime](#custom-runtime)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Supported Languages
|
||||
Loki supports custom tools written in the following programming languages:
|
||||
|
||||
* Python
|
||||
* Bash
|
||||
* TypeScript
|
||||
|
||||
## Creating a Custom Tool
|
||||
All tools are created as scripts in either Python, Bash, or TypeScript. They should be placed in the `functions/tools` directory.
|
||||
The location of the `functions` directory varies between systems, so you can use the following command to locate
|
||||
your `functions` directory:
|
||||
|
||||
```shell
|
||||
loki --info | grep functions_dir | awk '{print $2}'
|
||||
```
|
||||
|
||||
Once you've created your custom tool, remember to add it to the `visible_tools` array in your global `config.yaml` file
|
||||
to enable it globally. See the [Tools](TOOLS.md#enablingdisabling-global-tools) documentation for more information on how Loki utilizes the
|
||||
`visible_tools` array.
|
||||
|
||||
### Environment Variables
|
||||
All tools have access to the following environment variables that provide context about the current execution environment:
|
||||
|
||||
| Variable | Description |
|
||||
|----------------------|--------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `LLM_OUTPUT` | Indicates where the output of the tool should go. <br>In certain situations, this may be set to a temporary file instead of `/dev/stdout`. |
|
||||
| `LLM_ROOT_DIR` | The root `config_dir` directory for Loki <br>(i.e. `dirname $(loki --info \| grep config_file \| awk '{print $2}')`) |
|
||||
| `LLM_TOOL_NAME` | The name of the tool being executed |
|
||||
| `LLM_TOOL_CACHE_DIR` | A directory specific to the tool for storing cache or temporary files |
|
||||
|
||||
Loki also searches the tools directory on startup for a `.env` file. If found, all tools in `functions/tools/` will have
|
||||
the environment variables defined in the `.env` file available to them.
|
||||
|
||||
### Custom Bash-Based Tools
|
||||
To create a Bash-based tool, refer to the [custom bash tools documentation](CUSTOM-BASH-TOOLS.md).
|
||||
|
||||
### Custom Python-Based Tools
|
||||
Loki supports tools written in Python.
|
||||
|
||||
Each Python-based tool must follow a specific structure in order for Loki to be able to properly compile and
|
||||
execute it:
|
||||
|
||||
* The tool must be a Python script with a `.py` file extension.
|
||||
* The tool must have a `def run` function that serves as the entry point for the tool.
|
||||
* The `run` function must accept parameters that define the inputs for the tool.
|
||||
* Always use type hints to specify the data type of each parameter.
|
||||
* Use `Optional[...]` to indicate optional parameters
|
||||
* The `run` function must return a `str`.
|
||||
* For Python, this is automatically written to the `LLM_OUTPUT` environment variable, so there's no need to explicitly
|
||||
write to the environment variable within the function.
|
||||
* The function must also have a docstring that describes the tool and its parameters.
|
||||
* Each parameter in the `run` function should be documented in the docstring using the `Args:` section. They should use the following format:
|
||||
* `<parameter_name>: <description>` Where
|
||||
* `<parameter_name>`: The name of the parameter
|
||||
* `<description>`: The description of the parameter
|
||||
* These are *very* important because these descriptions are what's passed to the LLM as the description of the tool,
|
||||
letting the LLM know what the tool does and how to use it.
|
||||
|
||||
It's important to note that any functions prefixed with `_` are not sent to the LLM, so they will be invisible to the LLM
|
||||
at runtime.
|
||||
|
||||
Below is the [`demo_py.py`](../../assets/functions/tools/demo_py.py) tool definition that comes pre-packaged with
|
||||
Loki and demonstrates how to create a Python-based tool:
|
||||
|
||||
```python
|
||||
import os
|
||||
from typing import List, Literal, Optional
|
||||
|
||||
|
||||
def run(
|
||||
string: str,
|
||||
string_enum: Literal["foo", "bar"],
|
||||
boolean: bool,
|
||||
integer: int,
|
||||
number: float,
|
||||
array: List[str],
|
||||
string_optional: Optional[str] = None,
|
||||
integer_with_default: int = 42,
|
||||
boolean_with_default: bool = True,
|
||||
number_with_default: float = 3.14,
|
||||
string_with_default: str = "hello",
|
||||
array_optional: Optional[List[str]] = None,
|
||||
):
|
||||
"""Demonstrates all supported Python parameter types and variations.
|
||||
Args:
|
||||
string: A required string property
|
||||
string_enum: A required string property constrained to specific values
|
||||
boolean: A required boolean property
|
||||
integer: A required integer property
|
||||
number: A required number (float) property
|
||||
array: A required string array property
|
||||
string_optional: An optional string property (Optional[str] with None default)
|
||||
integer_with_default: An optional integer with a non-None default value
|
||||
boolean_with_default: An optional boolean with a default value
|
||||
number_with_default: An optional number with a default value
|
||||
string_with_default: An optional string with a default value
|
||||
array_optional: An optional string array property
|
||||
"""
|
||||
output = f"""string: {string}
|
||||
string_enum: {string_enum}
|
||||
boolean: {boolean}
|
||||
integer: {integer}
|
||||
number: {number}
|
||||
array: {array}
|
||||
string_optional: {string_optional}
|
||||
integer_with_default: {integer_with_default}
|
||||
boolean_with_default: {boolean_with_default}
|
||||
number_with_default: {number_with_default}
|
||||
string_with_default: {string_with_default}
|
||||
array_optional: {array_optional}"""
|
||||
|
||||
for key, value in os.environ.items():
|
||||
if key.startswith("LLM_"):
|
||||
output = f"{output}\n{key}: {value}"
|
||||
|
||||
return output
|
||||
```
|
||||
|
||||
### Custom TypeScript-Based Tools
|
||||
Loki supports tools written in TypeScript. TypeScript tools require [Node.js](https://nodejs.org/) and
|
||||
[tsx](https://tsx.is/) (`npx tsx` is used as the default runtime).
|
||||
|
||||
Each TypeScript-based tool must follow a specific structure in order for Loki to properly compile and execute it:
|
||||
|
||||
* The tool must be a TypeScript file with a `.ts` file extension.
|
||||
* The tool must have an `export function run(...)` that serves as the entry point for the tool.
|
||||
* Non-exported functions are ignored by the compiler and can be used as private helpers.
|
||||
* The `run` function must accept flat parameters that define the inputs for the tool.
|
||||
* Always use type annotations to specify the data type of each parameter.
|
||||
* Use `param?: type` or `type | null` to indicate optional parameters.
|
||||
* Use `param: type = value` for parameters with default values.
|
||||
* The `run` function must return a `string` (or `Promise<string>` for async functions).
|
||||
* For TypeScript, the return value is automatically written to the `LLM_OUTPUT` environment variable, so there's
|
||||
no need to explicitly write to the environment variable within the function.
|
||||
* The function must have a JSDoc comment that describes the tool and its parameters.
|
||||
* Each parameter should be documented using `@param name - description` tags.
|
||||
* These descriptions are passed to the LLM as the tool description, letting the LLM know what the tool does and
|
||||
how to use it.
|
||||
* Async functions (`export async function run(...)`) are fully supported and handled transparently.
|
||||
|
||||
**Supported Parameter Types:**
|
||||
|
||||
| TypeScript Type | JSON Schema | Notes |
|
||||
|-------------------|--------------------------------------------------|-----------------------------|
|
||||
| `string` | `{"type": "string"}` | Required string |
|
||||
| `number` | `{"type": "number"}` | Required number |
|
||||
| `boolean` | `{"type": "boolean"}` | Required boolean |
|
||||
| `string[]` | `{"type": "array", "items": {"type": "string"}}` | Array (bracket syntax) |
|
||||
| `Array<string>` | `{"type": "array", "items": {"type": "string"}}` | Array (generic syntax) |
|
||||
| `"foo" \| "bar"` | `{"type": "string", "enum": ["foo", "bar"]}` | String enum (literal union) |
|
||||
| `param?: string` | `{"type": "string"}` (not required) | Optional via question mark |
|
||||
| `string \| null` | `{"type": "string"}` (not required) | Optional via null union |
|
||||
| `param = "value"` | `{"type": "string"}` (not required) | Optional via default value |
|
||||
|
||||
**Unsupported Patterns (will produce a compile error):**
|
||||
|
||||
* Rest parameters (`...args: string[]`)
|
||||
* Destructured object parameters (`{ a, b }: { a: string, b: string }`)
|
||||
* Arrow functions (`const run = (x: string) => ...`)
|
||||
* Function expressions (`const run = function(x: string) { ... }`)
|
||||
|
||||
Only `export function` declarations are recognized. Non-exported functions are invisible to the compiler.
|
||||
|
||||
Below is the [`demo_ts.ts`](../../assets/functions/tools/demo_ts.ts) tool definition that comes pre-packaged with
|
||||
Loki and demonstrates how to create a TypeScript-based tool:
|
||||
|
||||
```typescript
|
||||
/**
|
||||
* Demonstrates all supported TypeScript parameter types and variations.
|
||||
*
|
||||
* @param string - A required string property
|
||||
* @param string_enum - A required string property constrained to specific values
|
||||
* @param boolean - A required boolean property
|
||||
* @param number - A required number property
|
||||
* @param array_bracket - A required string array using bracket syntax
|
||||
* @param array_generic - A required string array using generic syntax
|
||||
* @param string_optional - An optional string using the question mark syntax
|
||||
* @param string_nullable - An optional string using the union-with-null syntax
|
||||
* @param number_with_default - An optional number with a default value
|
||||
* @param boolean_with_default - An optional boolean with a default value
|
||||
* @param string_with_default - An optional string with a default value
|
||||
* @param array_optional - An optional string array using the question mark syntax
|
||||
*/
|
||||
export function run(
|
||||
string: string,
|
||||
string_enum: "foo" | "bar",
|
||||
boolean: boolean,
|
||||
number: number,
|
||||
array_bracket: string[],
|
||||
array_generic: Array<string>,
|
||||
string_optional?: string,
|
||||
string_nullable: string | null = null,
|
||||
number_with_default: number = 42,
|
||||
boolean_with_default: boolean = true,
|
||||
string_with_default: string = "hello",
|
||||
array_optional?: string[],
|
||||
): string {
|
||||
const parts = [
|
||||
`string: ${string}`,
|
||||
`string_enum: ${string_enum}`,
|
||||
`boolean: ${boolean}`,
|
||||
`number: ${number}`,
|
||||
`array_bracket: ${JSON.stringify(array_bracket)}`,
|
||||
`array_generic: ${JSON.stringify(array_generic)}`,
|
||||
`string_optional: ${string_optional}`,
|
||||
`string_nullable: ${string_nullable}`,
|
||||
`number_with_default: ${number_with_default}`,
|
||||
`boolean_with_default: ${boolean_with_default}`,
|
||||
`string_with_default: ${string_with_default}`,
|
||||
`array_optional: ${JSON.stringify(array_optional)}`,
|
||||
];
|
||||
|
||||
for (const [key, value] of Object.entries(process.env)) {
|
||||
if (key.startsWith("LLM_")) {
|
||||
parts.push(`${key}: ${value}`);
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join("\n");
|
||||
}
|
||||
```
|
||||
|
||||
## Custom Runtime
|
||||
By default, Loki uses the following runtimes to execute tools:
|
||||
|
||||
| Language | Default Runtime | Requirement |
|
||||
|------------|-----------------|--------------------------------|
|
||||
| Python | `python` | Python 3 on `$PATH` |
|
||||
| TypeScript | `npx tsx` | Node.js + tsx (`npm i -g tsx`) |
|
||||
| Bash | `bash` | Bash on `$PATH` |
|
||||
|
||||
You can override the runtime for Python and TypeScript tools using a **shebang line** (`#!`) at the top of your
|
||||
script. Loki reads the first line of each tool file; if it starts with `#!`, the specified interpreter is used instead
|
||||
of the default.
|
||||
|
||||
**Examples:**
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3.11
|
||||
# This Python tool will be executed with python3.11 instead of the default `python`
|
||||
|
||||
def run(name: str):
|
||||
"""Greet someone.
|
||||
Args:
|
||||
name: The name to greet
|
||||
"""
|
||||
return f"Hello, {name}!"
|
||||
```
|
||||
|
||||
```typescript
|
||||
#!/usr/bin/env bun
|
||||
// This TypeScript tool will be executed with Bun instead of the default `npx tsx`
|
||||
|
||||
/**
|
||||
* Greet someone.
|
||||
* @param name - The name to greet
|
||||
*/
|
||||
export function run(name: string): string {
|
||||
return `Hello, ${name}!`;
|
||||
}
|
||||
```
|
||||
|
||||
This is useful for pinning a specific Python version, using an alternative TypeScript runtime like
|
||||
[Bun](https://bun.sh/) or [Deno](https://deno.com/), or working with virtual environments.
|
||||
@@ -1,209 +0,0 @@
|
||||
# MCP Servers
|
||||
[MCP servers](https://modelcontextprotocol.io/docs/getting-started/intro) are essentially APIs designed specifically for LLMs that work like a remote repository of
|
||||
tools for the model to access and extend its capabilities.
|
||||
|
||||
So think of it like this: Instead of having to write all your own custom tools to interact with different
|
||||
services, those services can expose their functionality through an MCP server.
|
||||
|
||||
Loki has first-class support for MCP servers.
|
||||
|
||||
As mentioned in the [Loki Vault documentation](../VAULT.md), Loki can inject sensitive
|
||||
configuration data into your MCP configuration file to ensure that secrets are not hard-coded.
|
||||
|
||||
## Quick Links
|
||||
<!--toc:start-->
|
||||
- [Important Note](#important-note)
|
||||
- [MCP Server Configuration](#mcp-server-configuration)
|
||||
- [Secret Injection](#secret-injection)
|
||||
- [Default MCP Servers](#default-mcp-servers)
|
||||
- [Loki Configuration](#loki-configuration)
|
||||
- [Global Configuration](#global-configuration)
|
||||
- [Role Configuration](#role-configuration)
|
||||
- [Agent Configuration](#agent-configuration)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Important Note
|
||||
Be careful how many MCP servers you enable at one time, regardless of the context. When there is a significant
|
||||
number of configured MCP servers, enabling too many MCP servers may overwhelm the context length of a model,
|
||||
and quickly exceed token limits.
|
||||
|
||||
## MCP Server Configuration
|
||||
Loki stores the MCP server configuration file, `functions/mcp.json`, in the `functions` directory. You can find
|
||||
this directory using the following command:
|
||||
|
||||
```shell
|
||||
loki --info | grep functions_dir | awk '{print $2}'
|
||||
```
|
||||
|
||||
The syntax for the `functions/mcp.json` file is compatible with MCP server configurations for Claude Desktop.
|
||||
So any time you're looking to add a new server, look at the docs for it and find the configuration example for
|
||||
Claude Desktop. You should be able to use the exact same configuration in your `functions/mcp.json` file.
|
||||
|
||||
Every server entry **must** include a `"type"` field set to one of: `"stdio"`, `"http"`, or `"sse"`.
|
||||
|
||||
### Transport Types
|
||||
|
||||
Loki supports three MCP transport types:
|
||||
|
||||
| Type | Use Case |
|
||||
|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `stdio` | Spawns a local subprocess and communicates over stdin/stdout |
|
||||
| `http` | Connects to a remote server via [Streamable HTTP](https://modelcontextprotocol.io/docs/concepts/transports#streamable-http) |
|
||||
| `sse` | Connects to a remote server via the legacy [HTTP+SSE](https://modelcontextprotocol.io/docs/concepts/transports#http-with-sse) transport (Claude Desktop format) |
|
||||
|
||||
### Stdio Servers
|
||||
|
||||
Stdio is the standard transport for locally-installed MCP servers. Loki spawns the process and communicates
|
||||
over stdin/stdout:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"github": {
|
||||
"type": "stdio",
|
||||
"command": "docker",
|
||||
"args": ["run", "-i", "--rm", "ghcr.io/github/github-mcp-server"],
|
||||
"env": {
|
||||
"GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Required | Description |
|
||||
|-----------|----------|------------------------------------------|
|
||||
| `type` | yes | Must be `"stdio"` |
|
||||
| `command` | yes | The executable to spawn |
|
||||
| `args` | no | Arguments passed to the command |
|
||||
| `env` | no | Environment variables for the subprocess |
|
||||
| `cwd` | no | Working directory for the subprocess |
|
||||
|
||||
### HTTP (Streamable HTTP) Servers
|
||||
|
||||
For remote MCP servers that support the Streamable HTTP transport:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"datadog": {
|
||||
"type": "http",
|
||||
"url": "https://mcp.datadoghq.com/api/unstable/mcp-server/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Required | Description |
|
||||
|-----------|----------|--------------------------------------------------------|
|
||||
| `type` | yes | Must be `"http"` |
|
||||
| `url` | yes | The server endpoint URL |
|
||||
| `headers` | no | Custom HTTP headers to include with every request |
|
||||
|
||||
### SSE Servers
|
||||
|
||||
For remote MCP servers that use the legacy HTTP+SSE transport (the format used by Claude Desktop):
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"my-sse-server": {
|
||||
"type": "sse",
|
||||
"url": "http://127.0.0.1:64342/sse",
|
||||
"headers": {
|
||||
"Authorization": "Bearer my-token"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Required | Description |
|
||||
|-----------|----------|--------------------------------------------------------|
|
||||
| `type` | yes | Must be `"sse"` |
|
||||
| `url` | yes | The server SSE endpoint URL |
|
||||
| `headers` | no | Custom HTTP headers to include with every request |
|
||||
|
||||
**Note:** Both `http` and `sse` types use the same underlying transport, which auto-negotiates the
|
||||
protocol with the server. The `type` field primarily serves as documentation of which protocol the
|
||||
server speaks. Neither type supports `command`, `args`, or `cwd` fields.
|
||||
|
||||
### Secret Injection
|
||||
As mentioned in the [Loki Vault documentation](../VAULT.md), you can use Loki Vault to inject secrets into your MCP configuration file.
|
||||
|
||||
In fact, this is why you need to set up your vault before using Loki at all: the built-in MCP configuration
|
||||
requires you set up some secrets to use it.
|
||||
|
||||
For more information about how to set up your vault and inject secrets, please refer to the [Loki Vault documentation](../VAULT.md).
|
||||
|
||||
## Default MCP Servers
|
||||
Loki ships with a `functions/mcp.json` file that includes some useful MCP servers:
|
||||
|
||||
* [github](https://github.com/github/github-mcp-server) - Interact with GitHub repositories, issues, pull requests, and more.
|
||||
* [docker](https://github.com/ckreiling/mcp-server-docker) - Manage your local Docker containers with natural language
|
||||
* [slack](https://github.com/korotovsky/slack-mcp-server) - Interact with Slack
|
||||
* [ddg-search](https://github.com/nickclyde/duckduckgo-mcp-server) - Perform web searches with the DuckDuckGo search engine
|
||||
|
||||
## Loki Configuration
|
||||
MCP servers, like tools, can be used in a handful of contexts:
|
||||
* Inside a session
|
||||
* Inside a role
|
||||
* Inside an agent
|
||||
* Globally (i.e. outside a session, role, or agent)
|
||||
|
||||
Each of these has a different configuration and interaction with the global configuration.
|
||||
|
||||
***Note:** The names of each MCP server referenced in the below configuration properties directly corresponds
|
||||
to the names given in the `functions/mcp.json` configuration file. So if you change the name of an MCP server
|
||||
from `slack` to `lucem-slack`, then you need to also update your Loki configuration accordingly.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration is essentially what settings you want to have on by default when
|
||||
you just invoke `loki`. (Don't worry about agents, roles, or sessions yet. We'll get to them in a bit).
|
||||
|
||||
The following settings are available in the global configuration for MCP servers:
|
||||
|
||||
```yaml
|
||||
mcp_server_support: true # Enables or disables MCP server support (globally).
|
||||
mapping_mcp_servers: # Alias for an MCP server or set of servers
|
||||
git: github,gitmcp
|
||||
enabled_mcp_servers: null # Which MCP servers to enable by default (e.g. 'github,slack')
|
||||
```
|
||||
|
||||
A special note about `enabled_mcp_servers`: a user can set this to `all` to enable all configured MCP servers in the
|
||||
`functions/mcp.json` configuration.
|
||||
|
||||
(See the [Configuration Example](../../config.example.yaml) file for an example global configuration with all options.)
|
||||
|
||||
When running in REPL-mode, the `mcp_server_support` and `enabled_mcp_servers` settings can be overridden using the
|
||||
`.set` command:
|
||||
|
||||

|
||||
|
||||
### Role Configuration
|
||||
When you create a role, you have the following MCP-related configuration options available to you:
|
||||
|
||||
```yaml
|
||||
enabled_mcp_servers: github # Which MCP servers the role uses.
|
||||
```
|
||||
|
||||
The values for `mapping_mcp_servers` are inherited from the `[global configuration](#global-configuration)`.
|
||||
|
||||
For more information about roles, refer to the [Roles](../ROLES.md) documentation.
|
||||
|
||||
### Agent Configuration
|
||||
When you create an agent, you have the following MCP-related configuration options available to you:
|
||||
|
||||
```yaml
|
||||
mcp_servers: # Which MCP servers the agent uses
|
||||
- github
|
||||
- docker
|
||||
```
|
||||
|
||||
The values for `mapping_mcp_servers` are inherited from the [global configuration](#global-configuration).
|
||||
|
||||
For more information about agents, refer to the [Agents](../AGENTS.md) documentation.
|
||||
|
||||
For a full example configuration for an agent, see the [Agent Configuration Example](../../config.agent.example.yaml) file.
|
||||
@@ -1,192 +0,0 @@
|
||||
# Tools
|
||||
Loki supports function calling with various tools built-in to enhance LLM capabilities. All built-in tools for Loki
|
||||
are located in the [`functions/tools`](../../assets/functions/tools) directory. These tools are also stored in your Loki `functions`
|
||||
directory, which is also where you'd go to add more tools.
|
||||
|
||||
**Pro Tip:** The Loki functions directory can be found by running the following command:
|
||||
```bash
|
||||
loki --info | grep functions_dir | awk '{print $2}'
|
||||
```
|
||||
|
||||
# Quick Links
|
||||
<!--toc:start-->
|
||||
- [Built-In Tools](#built-in-tools)
|
||||
- [Configuration](#configuration)
|
||||
- [Global Configuration](#global-configuration)
|
||||
- [Enabling/Disabling Global Tools](#enablingdisabling-global-tools)
|
||||
- [Role Configuration](#role-configuration)
|
||||
- [Agent Configuration](#agent-configuration)
|
||||
- [Tool Error Handling](#tool-error-handling)
|
||||
- [Native/Shell Tool Errors](#nativeshell-tool-errors)
|
||||
- [MCP Errors](#mcp-tool-errors)
|
||||
- [Why Tool Error Handling Is Important](#why-this-matters)
|
||||
<!--toc:end-->
|
||||
|
||||
---
|
||||
|
||||
## Built-In Tools
|
||||
The following tools are built-in to Loki by default, and their default enabled/disabled status is indicated. More about how tools can
|
||||
be enabled/disabled can be found in the [Configuration](#configuration) section below.
|
||||
|
||||
| Tool | Description | Enabled/Disabled |
|
||||
|-------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|
|
||||
| [`demo_py.py`](../../assets/functions/tools/demo_py.py) | Demonstrates how to create a tool using Python and how to use comments. | 🔴 |
|
||||
| [`demo_sh.sh`](../../assets/functions/tools/demo_sh.sh) | Demonstrate how to create a tool using Bash and how to use comment tags. | 🔴 |
|
||||
| [`demo_ts.ts`](../../assets/functions/tools/demo_ts.ts) | Demonstrates how to create a tool using TypeScript and how to use JSDoc comments. | 🔴 |
|
||||
| [`execute_command.sh`](../../assets/functions/tools/execute_command.sh) | Execute the shell command. | 🟢 |
|
||||
| [`execute_py_code.py`](../../assets/functions/tools/execute_py_code.py) | Execute the given Python code. | 🔴 |
|
||||
| [`execute_sql_code.sh`](../../assets/functions/tools/execute_sql_code.sh) | Execute SQL code. | 🔴 |
|
||||
| [`fetch_url_via_curl.sh`](../../assets/functions/tools/fetch_url_via_curl.sh) | Extract the content from a given URL using cURL. | 🔴 |
|
||||
| [`fetch_url_via_jina.sh`](../../assets/functions/tools/fetch_url_via_jina.sh) | Extract the content from a given URL using Jina. | 🔴 |
|
||||
| [`fs_cat.sh`](../../assets/functions/tools/fs_cat.sh) | Read the contents of a file at the specified path. | 🟢 |
|
||||
| [`fs_read.sh`](../../assets/functions/tools/fs_read.sh) | Controlled reading of the contents of a file at the specified path with line numbers, offset, and limit to read specific sections. | 🟢 |
|
||||
| [`fs_glob.sh`](../../assets/functions/tools/fs_glob.sh) | Find files by glob pattern. Returns matching file paths sorted by modification time. | 🟢 |
|
||||
| [`fs_grep.sh`](../../assets/functions/tools/fs_grep.sh) | Search file contents using regular expressions. Returns matching file paths and lines. | 🟢 |
|
||||
| [`fs_ls.sh`](../../assets/functions/tools/fs_ls.sh) | List all files and directories at the specified path. | 🟢 |
|
||||
| [`fs_mkdir.sh`](../../assets/functions/tools/fs_mkdir.sh) | Create a new directory at the specified path. | 🔴 |
|
||||
| [`fs_patch.sh`](../../assets/functions/tools/fs_patch.sh) | Apply a patch to a file at the specified path. <br>This can be used to edit a file without having to rewrite the whole file. | 🔴 |
|
||||
| [`fs_rm.sh`](../../assets/functions/tools/fs_rm.sh) | Remove a file or directory at the specified path. | 🔴 |
|
||||
| [`fs_write.sh`](../../assets/functions/tools/fs_write.sh) | Write the full file contents to a file at the specified path. | 🟢 |
|
||||
| [`get_current_time.sh`](../../assets/functions/tools/get_current_time.sh) | Get the current time. | 🟢 |
|
||||
| [`get_current_weather.py`](../../assets/functions/tools/get_current_weather.py) | Get the current weather in a given location (Python implementation) | 🔴 |
|
||||
| [`get_current_weather.sh`](../../assets/functions/tools/get_current_weather.sh) | Get the current weather in a given location. | 🟢 |
|
||||
| [`get_current_weather.ts`](../../assets/functions/tools/get_current_weather.ts) | Get the current weather in a given location (TypeScript implementation) | 🔴 |
|
||||
| [`query_jira_issues.sh`](../../assets/functions/tools/query_jira_issues.sh) | Query for jira issues using a Jira Query Language (JQL) query. | 🟢 |
|
||||
| [`search_arxiv.sh`](../../assets/functions/tools/search_arxiv.sh) | Search arXiv using the given search query and return the top papers. | 🔴 |
|
||||
| [`search_wikipedia.sh`](../../assets/functions/tools/search_wikipedia.sh) | Search Wikipedia using the given search query. <br>Use it to get detailed information about a public figure, interpretation of a <br>complex scientific concept or in-depth connectivity of a significant historical <br>event, etc. | 🔴 |
|
||||
| [`search_wolframalpha.sh`](../../assets/functions/tools/search_wolframalpha.sh) | Get an answer to a question using Wolfram Alpha. The input query should be <br>in English. Use it to answer user questions that require computation, detailed <br>facts, data analysis, or complex queries. | 🔴 |
|
||||
| [`send_mail.sh`](../../assets/functions/tools/send_mail.sh) | Send an email. | 🔴 |
|
||||
| [`send_twilio.sh`](../../assets/functions/tools/send_twilio.sh) | Send SMS or Twilio Messaging Channels messages using the Twilio API. | 🔴 |
|
||||
| [`web_search_loki.sh`](../../assets/functions/tools/web_search_loki.sh) | Perform a web search to get up-to-date information or additional context. <br>Use this when you need current information or feel a search could provide <br>a better answer. | 🔴 |
|
||||
| [`web_search_perplexity.sh`](../../assets/functions/tools/web_search_perplexity.sh) | Perform a web search using the Perplexity API to get up-to-date <br>information or additional context. Use this when you need current <br>information or feel a search could provide a better answer. | 🔴 |
|
||||
| [`web_search_tavily.sh`](../../assets/functions/tools/web_search_tavily.sh) | Perform a web search using the Tavily API to get up-to-date <br>information or additional context. Use this when you need current <br>information or feel a search could provide a better answer. | 🔴 |
|
||||
|
||||
Details on what configuration, if any, is necessary for each tool can be found inside the tool file definition itself.
|
||||
|
||||
## Configuration
|
||||
Tools can be used in a handful of contexts:
|
||||
* Inside a session
|
||||
* Inside a role
|
||||
* Inside an agent
|
||||
* Globally (i.e. outside a session, role, or agent)
|
||||
|
||||
Each of these has a different configuration and interaction with the global configuration.
|
||||
|
||||
**Note:** For each configuration property listed below, the functions that are mentioned *only*
|
||||
correspond to the tool scripts located in your Loki `functions/tools` directory.
|
||||
|
||||
### Global Configuration
|
||||
The global configuration is essentially what settings you want to have on by default when
|
||||
you just invoke `loki`. (Don't worry about agents, roles, or sessions yet. We'll get to them in a bit).
|
||||
|
||||
The following settings are available in the global configuration for tools:
|
||||
|
||||
```yaml
|
||||
function_calling_support: true # Enables or disables function calling in any context
|
||||
mapping_tools: # Alias for a tool or toolset
|
||||
fs: 'fs_cat,fs_ls,fs_mkdir,fs_rm,fs_write'
|
||||
enabled_tools: null # Which tools to use by default. (e.g. 'fs,web_search_loki')
|
||||
visible_tools: # Which tools are visible to be compiled (and are thus able to be defined in 'enabled_tools')
|
||||
# - demo_py.py
|
||||
- execute_command.sh
|
||||
```
|
||||
|
||||
A special not about `enabled_tools`: a user can set this to `all` to enable all available tools listed in the
|
||||
`visible_tools` section of your Loki `config.yaml` file.
|
||||
See the [Enabling/Disabling Global Tools](#enablingdisabling-global-tools) section below for more information on how tools
|
||||
are globally enabled/disabled globally.
|
||||
|
||||
(See the [Configuration Example](../../config.example.yaml) file for an example global configuration with all options.)
|
||||
|
||||
When running in REPL-mode, the `function_calling_support` and `enabled_tools` settings can be overridden using the
|
||||
`.set` command:
|
||||
|
||||

|
||||
|
||||
You'll notice that mentioned above, some tools are disabled while others are enabled. How is that determined?
|
||||
|
||||
### Enabling/Disabling Global Tools
|
||||
The configured tools are enabled/disabled by looking at the values in the `visible_tools` array in your `config.yaml`
|
||||
file. This file is located in the root of the Loki `config` directory. The location of the Loki config varies by system,
|
||||
so your config file can be found using the following command:
|
||||
|
||||
```bash
|
||||
loki --info | grep 'config_file' | awk '{print $2}'
|
||||
```
|
||||
|
||||
Each line in the `visible_tools` array lists a tool.
|
||||
|
||||
If that line is commented out, then that tool is not included in the global tool set, and cannot be used in any context;
|
||||
This means it will not be built, and even if enabled under `enabled_tools`, it still will not be available in any
|
||||
context.
|
||||
|
||||
### Role Configuration
|
||||
When you create a role, you have the following global tool-related configuration options available to you:
|
||||
|
||||
```yaml
|
||||
enabled_tools: query_jira_issues # Which tools the role uses.
|
||||
```
|
||||
|
||||
The values for `mapping_tools` are inherited from the [global configuration](#global-configuration).
|
||||
|
||||
For more information about roles, refer to the [Roles](../ROLES.md) documentation.
|
||||
|
||||
### Agent Configuration
|
||||
When you create an agent, you have the following global tool-related configuration options available to you:
|
||||
|
||||
```yaml
|
||||
global_tools: # Which global tools the agent uses
|
||||
- query_jira_issues.sh
|
||||
- fs_cat.sh
|
||||
- fs_ls.sh
|
||||
```
|
||||
|
||||
The values for `mapping_tools` are inherited from the [global configuration](#global-configuration).
|
||||
|
||||
For more information about agents, refer to the [Agents](../AGENTS.md) documentation.
|
||||
|
||||
For a full example configuration for an agent, see the [Agent Configuration Example](../../config.agent.example.yaml) file.
|
||||
|
||||
---
|
||||
|
||||
## Tool Error Handling
|
||||
When tools fail, Loki captures error information and passes it back to the model so it can diagnose issues and
|
||||
potentially retry or adjust its approach.
|
||||
|
||||
### Native/Shell Tool Errors
|
||||
When a shell-based tool exits with a non-zero exit code, the model receives:
|
||||
|
||||
```json
|
||||
{
|
||||
"tool_call_error": "Tool call 'my_tool' exited with code 1",
|
||||
"stderr": "Error: file not found: config.json"
|
||||
}
|
||||
```
|
||||
|
||||
The `stderr` field contains the actual error output from the tool, giving the model context about what went wrong.
|
||||
If the tool produces no stderr output, only the `tool_call_error` field is included.
|
||||
|
||||
**Note:** Tool stdout streams to your terminal in real-time so you can see progress. Only stderr is captured for
|
||||
error reporting.
|
||||
|
||||
### MCP Tool Errors
|
||||
When an MCP (Model Context Protocol) tool invocation fails due to connection issues, timeouts, or server errors,
|
||||
the model receives:
|
||||
|
||||
```json
|
||||
{
|
||||
"tool_call_error": "MCP tool invocation failed: connection refused"
|
||||
}
|
||||
```
|
||||
|
||||
This allows the model to understand that an external service failed and take appropriate action (retry, use an
|
||||
alternative approach, or inform the user).
|
||||
|
||||
### Why This Matters
|
||||
Without proper error propagation, models would only know that "something went wrong" without understanding *what*
|
||||
went wrong. By including stderr output and detailed error messages, models can:
|
||||
|
||||
- Diagnose the root cause of failures
|
||||
- Suggest fixes (e.g., "the file doesn't exist, should I create it?")
|
||||
- Retry with corrected parameters
|
||||
- Fall back to alternative approaches when appropriate
|
||||
|
Before Width: | Height: | Size: 370 KiB |
|
Before Width: | Height: | Size: 587 KiB |
|
Before Width: | Height: | Size: 55 KiB |
|
Before Width: | Height: | Size: 446 KiB |
|
Before Width: | Height: | Size: 8.1 KiB |
|
Before Width: | Height: | Size: 1.0 MiB |
|
Before Width: | Height: | Size: 21 KiB |
|
Before Width: | Height: | Size: 878 KiB |
|
Before Width: | Height: | Size: 170 KiB |
|
Before Width: | Height: | Size: 225 KiB |
|
Before Width: | Height: | Size: 189 KiB |
|
Before Width: | Height: | Size: 67 KiB |
|
Before Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 303 KiB |
|
Before Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 151 KiB |
|
Before Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 78 KiB |
|
Before Width: | Height: | Size: 43 KiB |
|
Before Width: | Height: | Size: 58 KiB |
|
Before Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 58 KiB |
|
Before Width: | Height: | Size: 70 KiB |
|
Before Width: | Height: | Size: 68 KiB |
|
Before Width: | Height: | Size: 67 KiB |
|
Before Width: | Height: | Size: 66 KiB |
|
Before Width: | Height: | Size: 67 KiB |
|
Before Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 55 KiB |
|
Before Width: | Height: | Size: 67 KiB |
|
Before Width: | Height: | Size: 57 KiB |
|
Before Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 9.1 KiB |
|
Before Width: | Height: | Size: 12 KiB |