30 Commits

Author SHA1 Message Date
d5e0728532 feat: Implemented retry logic for failed tool invocations so the LLM can learn from the result and try again; Also implemented chain loop detection to prevent loops
CI / All (ubuntu-latest) (push) Failing after 5m27s
CI / All (macos-latest) (push) Has been cancelled
CI / All (windows-latest) (push) Has been cancelled
2026-02-01 17:06:16 -07:00
25c0885dcc fix: Claude tool calls work incorrectly when tool doesn't require any arguments or flags; would provide an empty JSON object or error on no args 2026-02-01 17:05:36 -07:00
f56ed7d005 feat: Added gemini-3-pro to the supported vertexai models 2026-01-30 19:03:41 -07:00
d79e4b9dff Fixed some typos in tool call error messages 2026-01-30 12:25:57 -07:00
cdd829199f build: Created justfile to make life easier
CI / All (ubuntu-latest) (push) Failing after 5m26s
CI / All (macos-latest) (push) Has been cancelled
CI / All (windows-latest) (push) Has been cancelled
2026-01-27 13:49:36 -07:00
e3c644b8ca docs: Created a CREDITS file to document the history and origins of Loki from the original AIChat project
CI / All (ubuntu-latest) (push) Failing after 5m28s
CI / All (macos-latest) (push) Has been cancelled
CI / All (windows-latest) (push) Has been cancelled
2026-01-27 13:15:20 -07:00
5cb8070da1 build: Support Claude Opus 4.5
CI / All (ubuntu-latest) (push) Failing after 5m26s
CI / All (macos-latest) (push) Has been cancelled
CI / All (windows-latest) (push) Has been cancelled
2026-01-26 12:40:06 -07:00
66801b5d07 feat: Added an environment variable that lets users bypass guard operations in bash scripts. This is useful for agent routing
CI / All (ubuntu-latest) (push) Failing after 5m29s
CI / All (macos-latest) (push) Has been cancelled
CI / All (windows-latest) (push) Has been cancelled
2026-01-23 14:18:52 -07:00
f2de196e22 fix: Fixed a bug where --agent-variable values were not being passed to the agents 2026-01-23 14:15:59 -07:00
2eba530895 feat: Added support for thought-signatures for Gemini 3+ models
CI / All (ubuntu-latest) (push) Failing after 5m25s
CI / All (macos-latest) (push) Has been cancelled
CI / All (windows-latest) (push) Has been cancelled
2026-01-21 15:11:55 -07:00
3baa3102a3 style: Cleaned up an anyhow error
CI / All (macos-latest) (push) Has been cancelled
CI / All (ubuntu-latest) (push) Has been cancelled
CI / All (windows-latest) (push) Has been cancelled
2025-12-16 14:51:35 -07:00
github-actions[bot]
2d4fad596c bump: version 0.1.2 → 0.1.3 [skip ci] 2025-12-13 20:57:37 +00:00
7259e59d2a ci: Prep for 0.1.3 release 2025-12-13 13:38:09 -07:00
cec04c4597 style: Improved error message for un-fully configured MCP configuration 2025-12-13 13:37:01 -07:00
github-actions[bot]
a7f5677195 chore: bump Cargo.toml to 0.1.3 2025-12-13 20:28:10 +00:00
github-actions[bot]
6075f0a190 bump: version 0.1.2 → 0.1.3 [skip ci] 2025-12-13 20:27:58 +00:00
15310a9e2c chore: Updated the models 2025-12-11 09:05:41 -07:00
f7df54f2f7 docs: Removed the warning about MCP token usage since that has been fixed 2025-12-05 12:38:15 -07:00
212d4bace4 docs: Fixed an unclosed backtick typo in the Environment Variables docs 2025-12-05 12:37:59 -07:00
f4b3267c89 docs: Fixed typo in vault readme 2025-12-05 11:05:14 -07:00
9eeeb11871 style: Applied formatting 2025-12-03 15:06:50 -07:00
b8db3f689d Merge branch 'main' of github.com:Dark-Alex-17/loki 2025-12-03 14:57:03 -07:00
3b21ce2aa5 feat: Improved MCP implementation to minimize the tokens needed to utilize it so it doesn't quickly overwhelm the token space for a given model 2025-12-03 12:12:51 -07:00
Alex Clarke
9bf4fcd943 ci: Updated the README to be a bit more clear in some sections 2025-11-26 15:53:54 -07:00
github-actions[bot]
c1f5cfbbda bump: version 0.1.1 → 0.1.2 [skip ci] 2025-11-08 23:13:34 +00:00
46517a4e15 refactor: Gave the GitHub MCP server a default placeholder value that doesn't require the vault 2025-11-08 16:09:32 -07:00
github-actions[bot]
efbe76e1fc bump: version 0.1.1 → 0.1.2 [skip ci] 2025-11-08 23:02:40 +00:00
245c567d30 bug: Removed the github MCP server and slack MCP server from mcp.json so users can just use Loki without any other setup and add more later 2025-11-08 15:59:05 -07:00
Alex Clarke
cbb3d2c34a build: Removed the remaining IDE metadata directories 2025-11-07 18:21:58 -07:00
bddec85fa5 build: Added forgotten IDE configuration directories into my .gitignore 2025-11-07 18:18:32 -07:00
29 changed files with 1370 additions and 693 deletions
+2 -1
View File
@@ -2,5 +2,6 @@
/tmp /tmp
/.env /.env
!cli/** !cli/**
/.idea/ .idea/
/loki.iml /loki.iml
/.idea/
-10
View File
@@ -1,10 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Zeppelin ignored files
/ZeppelinRemoteNotebooks/
+12
View File
@@ -1,3 +1,15 @@
## v0.1.3 (2025-12-13)
### Feat
- Improved MCP implementation to minimize the tokens needed to utilize it so it doesn't quickly overwhelm the token space for a given model
## v0.1.2 (2025-11-08)
### Refactor
- Gave the GitHub MCP server a default placeholder value that doesn't require the vault
## v0.1.1 (2025-11-08) ## v0.1.1 (2025-11-08)
## v0.1.0 (2025-11-07) ## v0.1.0 (2025-11-07)
+2 -1
View File
@@ -48,7 +48,8 @@ cz commit
1. Clone this repo 1. Clone this repo
2. Run `cargo test` to set up hooks 2. Run `cargo test` to set up hooks
3. Make changes 3. Make changes
4. Run the application using `make run` or `cargo run` 4. Run the application using `just run` or `just run`
- Install `just` (`cargo install just`) if you haven't already to use the [justfile](./justfile) in this project.
5. Commit changes. This will trigger pre-commit hooks that will run format, test and lint. If there are errors or 5. Commit changes. This will trigger pre-commit hooks that will run format, test and lint. If there are errors or
warnings from Clippy, please fix them. warnings from Clippy, please fix them.
6. Push your code to a new branch named after the feature/bug/etc. you're adding. This will trigger pre-push hooks that 6. Push your code to a new branch named after the feature/bug/etc. you're adding. This will trigger pre-push hooks that
+31
View File
@@ -0,0 +1,31 @@
# Credits
## AIChat
Loki originally started as a fork of the fantastic
[AIChat CLI](https://github.com/sigoden/aichat). The initial goal was simply
to fix a bug in how MCP servers worked with AIChat, allowing different MCP
servers to be specified per agent. Since then, Loki has evolved far beyond
its original scope and grown into a passion project with a life of its own.
Today, Loki includes first-class MCP server support (for both local and remote
servers), a built-in vault for interpolating secrets in configuration files,
built-in agents and macros, dynamic tab completions, integrated custom
functions (no external `argc` dependency), improved documentation, and much
more with many more ideas planned for the future.
Loki is now developed and maintained as an independent project. Full credit
for the original foundation goes to the developers of the wonderful
AIChat project.
This project is not affiliated with or endorsed by the AIChat maintainers.
## AIChat
Loki originally began as a fork of [AIChat CLI](https://github.com/sigoden/aichat),
created and maintained by the AIChat contributors.
While Loki has since diverged significantly and is now developed as an
independent project, its early foundation and inspiration came from the
AIChat project.
AIChat is licensed under the MIT License.
Generated
+364 -279
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,6 +1,6 @@
[package] [package]
name = "loki-ai" name = "loki-ai"
version = "0.1.1" version = "0.1.3"
edition = "2024" edition = "2024"
authors = ["Alex Clarke <alex.j.tusa@gmail.com>"] authors = ["Alex Clarke <alex.j.tusa@gmail.com>"]
description = "An all-in-one, batteries included LLM CLI Tool" description = "An all-in-one, batteries included LLM CLI Tool"
+11 -31
View File
@@ -19,7 +19,6 @@ Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration g
## Quick Links ## Quick Links
* [AIChat Migration Guide](./docs/AICHAT-MIGRATION.md): Coming from AIChat? Follow the migration guide to get started. * [AIChat Migration Guide](./docs/AICHAT-MIGRATION.md): Coming from AIChat? Follow the migration guide to get started.
* [History](#history): A history of how Loki came to be.
* [Installation](#install): Install Loki * [Installation](#install): Install Loki
* [Getting Started](#getting-started): Get started with Loki by doing first-run setup steps. * [Getting Started](#getting-started): Get started with Loki by doing first-run setup steps.
* [REPL](./docs/REPL.md): Interactive Read-Eval-Print Loop for conversational interactions with LLMs and Loki. * [REPL](./docs/REPL.md): Interactive Read-Eval-Print Loop for conversational interactions with LLMs and Loki.
@@ -41,21 +40,7 @@ Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration g
* [Client Configurations](./docs/clients/CLIENTS.md): Configuration instructions for various LLM providers. * [Client Configurations](./docs/clients/CLIENTS.md): Configuration instructions for various LLM providers.
* [Patching API Requests](./docs/clients/PATCHES.md): Learn how to patch API requests for advanced customization. * [Patching API Requests](./docs/clients/PATCHES.md): Learn how to patch API requests for advanced customization.
* [Custom Themes](./docs/THEMES.md): Change the look and feel of Loki to your preferences with custom themes. * [Custom Themes](./docs/THEMES.md): Change the look and feel of Loki to your preferences with custom themes.
* [History](#history): A history of how Loki came to be.
---
## History
Loki originally started as a fork of the fantastic [AIChat CLI](https://github.com/sigoden/aichat). The purpose was to
simply fix a bug in how MCP servers worked with AIChat so that I could specify different ones for agents. However, it
has since evolved far beyond that and become a passion project with a life of its own!
Loki now has first class MCP server support (with support for local and remote servers alike), a built-in vault for
interpolating secrets in configuration files, built-in agents, built-in macros, dynamic tab completions, integrated
custom functions (no `argc` dependency), improved documentation, and much more with many more plans for the future!
The original kudos goes out to all the developers of the wonderful AIChat project!
---
## Prerequisites ## Prerequisites
Loki requires the following tools to be installed on your system: Loki requires the following tools to be installed on your system:
@@ -164,21 +149,6 @@ guide you through the process when you first attempt to access the vault. So, to
loki --list-secrets loki --list-secrets
``` ```
### First Time Setup
In order for Loki to function correctly, you'll need to add a few secrets to the Loki vault so the MCP servers can
function.
**GitHub MCP Server:**
* `GITHUB_PERSONAL_ACCESS_TOKEN` - A GitHub Personal Access Token with `repo` and `workflow` scopes.
See [Creating a GitHub Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens)
#### Add the secrets to the Loki vault
You can add the secrets to the Loki vault using the following commands (First time use will prompt you to create a vault
password file):
```sh
loki --add-secret GITHUB_PERSONAL_ACCESS_TOKEN
```
### Tab-Completions ### Tab-Completions
You can also enable tab completions to make using Loki easier. To do so, add the following to your shell profile: You can also enable tab completions to make using Loki easier. To do so, add the following to your shell profile:
```shell ```shell
@@ -272,5 +242,15 @@ The appearance of Loki can be modified using the following settings:
| `user_agent` | `null` | The name of the `User-Agent` that should be passed in the `User-Agent` header on all requests to model providers | | `user_agent` | `null` | The name of the `User-Agent` that should be passed in the `User-Agent` header on all requests to model providers |
| `save_shell_history` | `true` | Enables or disables REPL command history | | `save_shell_history` | `true` | Enables or disables REPL command history |
---
## History
Loki began as a fork of [AIChat CLI](https://github.com/sigoden/aichat) and has since evolved into an independent project.
See [CREDITS.md](./CREDITS.md) for full attribution and background.
---
## Creator ## Creator
* [Alex Clarke](https://github.com/Dark-Alex-17) * [Alex Clarke](https://github.com/Dark-Alex-17)
+1 -10
View File
@@ -11,21 +11,12 @@
"ghcr.io/github/github-mcp-server" "ghcr.io/github/github-mcp-server"
], ],
"env": { "env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "{{GITHUB_PERSONAL_ACCESS_TOKEN}}" "GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
} }
}, },
"docker": { "docker": {
"command": "uvx", "command": "uvx",
"args": ["mcp-server-docker"] "args": ["mcp-server-docker"]
},
"slack": {
"command": "npx",
"args": ["-y", "slack-mcp-server@latest", "--transport", "stdio"],
"env": {
"SLACK_MCP_XOXC_TOKEN": "{{SLACK_MCP_XOXC_TOKEN}}",
"SLACK_MCP_XOXD_TOKEN": "{{SLACK_MCP_XOXD_TOKEN}}",
"SLACK_MCP_ADD_MESSAGE_TOOL": true
}
} }
} }
} }
+13 -11
View File
@@ -507,12 +507,14 @@ open_link() {
guard_operation() { guard_operation() {
if [[ -t 1 ]]; then if [[ -t 1 ]]; then
ans="$(confirm "${1:-Are you sure you want to continue?}")" if [[ -z "$AUTO_CONFIRM" ]]; then
ans="$(confirm "${1:-Are you sure you want to continue?}")"
if [[ "$ans" == 0 ]]; then if [[ "$ans" == 0 ]]; then
error "Operation aborted!" 2>&1 error "Operation aborted!" 2>&1
exit 1 exit 1
fi fi
fi
fi fi
} }
@@ -657,13 +659,13 @@ guard_path() {
path="$(_to_real_path "$1")" path="$(_to_real_path "$1")"
confirmation_prompt="$2" confirmation_prompt="$2"
if [[ ! "$path" == "$(pwd)"* ]]; then if [[ ! "$path" == "$(pwd)"* && -z "$AUTO_CONFIRM" ]]; then
ans="$(confirm "$confirmation_prompt")" ans="$(confirm "$confirmation_prompt")"
if [[ "$ans" == 0 ]]; then if [[ "$ans" == 0 ]]; then
error "Operation aborted!" >&2 error "Operation aborted!" >&2
exit 1 exit 1
fi fi
fi fi
fi fi
} }
+2 -2
View File
@@ -3,8 +3,8 @@ Loki originally started as a fork of AIChat but has since evolved into its own s
As a result, there's some changes you'll need to make to your AIChat configuration to be able to use Loki. As a result, there's some changes you'll need to make to your AIChat configuration to be able to use Loki.
Be sure you've followed the [first-time setup steps](../README.md#first-time-setup) so that the Loki configuration Be sure you've run `loki` at least once so that the Loki configuration directory and subdirectories exist and is
directory and subdirectories exist and is populated with the built-in defaults. populated with the built-in defaults.
## Global Configuration File ## Global Configuration File
You should be able to copy/paste your AIChat configuration file into your Loki configuration directory. Since the You should be able to copy/paste your AIChat configuration file into your Loki configuration directory. Since the
+7 -1
View File
@@ -17,6 +17,7 @@ loki --info | grep 'config_dir' | awk '{print $2}'
- [Files and Directory Related Variables](#files-and-directory-related-variables) - [Files and Directory Related Variables](#files-and-directory-related-variables)
- [Agent Related Variables](#agent-related-variables) - [Agent Related Variables](#agent-related-variables)
- [Logging Related Variables](#logging-related-variables) - [Logging Related Variables](#logging-related-variables)
- [Miscellaneous Variables](#miscellaneous-variables)
<!--toc:end--> <!--toc:end-->
--- ---
@@ -84,7 +85,7 @@ You can also customize the location of full agent configurations using the follo
| Environment Variable | Description | | Environment Variable | Description |
|------------------------------|-------------------------------------------------------------------------------------------------------------------------------------| |------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|
| `<AGENT_NAME>_CONFIG_FILE | Customize the location of the agent's configuration file; e.g. `SQL_CONFIG_FILE` | | `<AGENT_NAME>_CONFIG_FILE` | Customize the location of the agent's configuration file; e.g. `SQL_CONFIG_FILE` |
| `<AGENT_NAME>_MODEL` | Customize the `model` used for the agent; e.g `SQL_MODEL` | | `<AGENT_NAME>_MODEL` | Customize the `model` used for the agent; e.g `SQL_MODEL` |
| `<AGENT_NAME>_TEMPERATURE` | Customize the `temperature` used for the agent; e.g. `SQL_TEMPERATURE` | | `<AGENT_NAME>_TEMPERATURE` | Customize the `temperature` used for the agent; e.g. `SQL_TEMPERATURE` |
| `<AGENT_NAME>_TOP_P` | Customize the `top_p` used for the agent; e.g. `SQL_TOP_P` | | `<AGENT_NAME>_TOP_P` | Customize the `top_p` used for the agent; e.g. `SQL_TOP_P` |
@@ -104,3 +105,8 @@ The following variables can be used to change the log level of Loki or the locat
**Pro-Tip:** You can always tail the Loki logs using the `--tail-logs` flag. If you need to disable color output, you **Pro-Tip:** You can always tail the Loki logs using the `--tail-logs` flag. If you need to disable color output, you
can also pass the `--disable-log-colors` flag as well. can also pass the `--disable-log-colors` flag as well.
## Miscellaneous Variables
| Environment Variable | Description | Default Value |
|----------------------|--------------------------------------------------------------------------------------------------|---------------|
| `AUTO_CONFIRM` | Bypass all `guard_*` checks in the bash prompt helpers; useful for agent composition and routing | |
+1 -1
View File
@@ -114,7 +114,7 @@ At the time of writing, the following files support Loki secret injection:
|-------------------------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------| |-------------------------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|
| `config.yaml` | The main Loki configuration file | Cannot use secret injection on the `vault_password_file` field | | `config.yaml` | The main Loki configuration file | Cannot use secret injection on the `vault_password_file` field |
| `functions/mcp.json` | The MCP server configuration file | | | `functions/mcp.json` | The MCP server configuration file | |
| `<agent>/tools.<py/sh>` | Tool files for agents | Specific configuration and only supported for Agents, not all global tools ([see below](#environment-variable-secret-injection-in-agents) | | `<agent>/tools.<py/sh>` | Tool files for agents | Specific configuration and only supported for Agents, not all global tools ([see below](#environment-variable-secret-injection-in-agents)) |
Note that all paths are relative to the Loki configuration directory. The directory varies by system, so you can find yours by Note that all paths are relative to the Loki configuration directory. The directory varies by system, so you can find yours by
+6 -2
View File
@@ -207,7 +207,9 @@ open_link https://www.google.com
``` ```
### guard_operation ### guard_operation
Prompt for permission to run an operation Prompt for permission to run an operation.
Can be disabled by setting the environment variable `AUTO_CONFIRM`.
**Example:** **Example:**
```bash ```bash
@@ -216,7 +218,9 @@ _run_sql
``` ```
### guard_path ### guard_path
Prompt for permission to perform path operations Prompt for permission to perform path operations.
Can be disabled by setting the environment variable `AUTO_CONFIRM`.
**Example:*** **Example:***
```bash ```bash
+1 -3
View File
@@ -83,9 +83,7 @@ enabled_mcp_servers: null # Which MCP servers to enable by default (e.g.
``` ```
A special note about `enabled_mcp_servers`: a user can set this to `all` to enable all configured MCP servers in the A special note about `enabled_mcp_servers`: a user can set this to `all` to enable all configured MCP servers in the
`functions/mcp.json` configuration. However, **this should be used with caution**. When there is a significant number `functions/mcp.json` configuration.
of configured MCP servers, enabling all MCP servers may overwhelm the context length of a model, and quickly exceed
token limits.
(See the [Configuration Example](../../config.example.yaml) file for an example global configuration with all options.) (See the [Configuration Example](../../config.example.yaml) file for an example global configuration with all options.)
+25
View File
@@ -0,0 +1,25 @@
# List all recipes
default:
@just --list
# Run all tests
[group: 'test']
test:
cargo test --all
# See what linter errors and warnings are unaddressed
[group: 'style']
lint:
cargo clippy --all
# Run Rustfmt against all source files
[group: 'style']
fmt:
cargo fmt --all
# Build the project for the current system architecture
# (Gets stored at ./target/[debug|release]/loki)
[group: 'build']
[arg('build_type', pattern="debug|release")]
build build_type='debug':
@cargo build {{ if build_type == "release" { "--release" } else { "" } }}
+243 -216
View File
@@ -3,6 +3,20 @@
# - https://platform.openai.com/docs/api-reference/chat # - https://platform.openai.com/docs/api-reference/chat
- provider: openai - provider: openai
models: models:
- name: gpt-5.1
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: gpt-5.1-chat-latest
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: gpt-5 - name: gpt-5
max_input_tokens: 400000 max_input_tokens: 400000
max_output_tokens: 128000 max_output_tokens: 128000
@@ -31,13 +45,6 @@
output_price: 0.4 output_price: 0.4
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: gpt-5-codex
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: gpt-4.1 - name: gpt-4.1
max_input_tokens: 1047576 max_input_tokens: 1047576
max_output_tokens: 32768 max_output_tokens: 32768
@@ -259,6 +266,56 @@
thinking: thinking:
type: enabled type: enabled
budget_tokens: 16000 budget_tokens: 16000
- name: claude-haiku-4-5-20251001
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: claude-haiku-4-5-20251001:thinking
real_name: claude-haiku-4-5-20251001
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
- name: claude-opus-4-5-20251101
type: chat
max_input_tokens: 200000
input_price: 15.0
output_price: 75.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-opus-4-5-20251101:thinking
type: chat
real_name: claude-opus-4-5-20251101
max_input_tokens: 200000
input_price: 15.0
output_price: 75.0
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-opus-4-1-20250805 - name: claude-opus-4-1-20250805
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -602,6 +659,14 @@
# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini # - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
- provider: vertexai - provider: vertexai
models: models:
- name: gemini-3-pro-preview
hipaa_safe: true
max_input_tokens: 1048576
max_output_tokens: 65536
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-2.5-flash - name: gemini-2.5-flash
max_input_tokens: 1048576 max_input_tokens: 1048576
max_output_tokens: 65536 max_output_tokens: 65536
@@ -660,6 +725,29 @@
thinking: thinking:
type: enabled type: enabled
budget_tokens: 16000 budget_tokens: 16000
- name: claude-haiku-4-5@20251001
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: claude-haiku-4-5@20251001:thinking
real_name: claude-haiku-4-5@20251001
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
- name: claude-opus-4-1@20250805 - name: claude-opus-4-1@20250805
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -817,6 +905,31 @@
thinking: thinking:
type: enabled type: enabled
budget_tokens: 16000 budget_tokens: 16000
- name: us.anthropic.claude-haiku-4-5-20251001-v1:0
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: us.anthropic.claude-haiku-4-5-20251001-v1:0:thinking
real_name: us.anthropic.claude-haiku-4-5-20251001-v1:0
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
patch:
body:
inferenceConfig:
temperature: null
topP: null
additionalModelRequestFields:
thinking:
type: enabled
budget_tokens: 16000
- name: us.anthropic.claude-opus-4-1-20250805-v1:0 - name: us.anthropic.claude-opus-4-1-20250805-v1:0
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -1004,6 +1117,12 @@
require_max_tokens: true require_max_tokens: true
input_price: 0 input_price: 0
output_price: 0 output_price: 0
- name: '@cf/qwen/qwen3-30b-a3b-fp8'
max_input_tokens: 131072
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/qwen/qwen2.5-coder-32b-instruct' - name: '@cf/qwen/qwen2.5-coder-32b-instruct'
max_input_tokens: 131072 max_input_tokens: 131072
max_output_tokens: 2048 max_output_tokens: 2048
@@ -1030,8 +1149,8 @@
max_batch_size: 100 max_batch_size: 100
# Links: # Links:
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Wm9cvy6rl # - https://cloud.baidu.com/doc/qianfan/s/rmh4stp0j
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Qm9cw2s7m # - https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya
- provider: ernie - provider: ernie
models: models:
- name: ernie-4.5-turbo-128k - name: ernie-4.5-turbo-128k
@@ -1043,8 +1162,12 @@
input_price: 0.42 input_price: 0.42
output_price: 1.26 output_price: 1.26
supports_vision: true supports_vision: true
- name: ernie-x1-turbo-32k - name: ernie-5.0-thinking-preview
max_input_tokens: 32768 max_input_tokens: 131072
input_price: 1.4
output_price: 5.6
- name: ernie-x1.1-preview
max_input_tokens: 65536
input_price: 0.14 input_price: 0.14
output_price: 0.56 output_price: 0.56
- name: bge-large-zh - name: bge-large-zh
@@ -1064,75 +1187,31 @@
max_input_tokens: 1024 max_input_tokens: 1024
input_price: 0.07 input_price: 0.07
# Links: # Links:
# - https://help.aliyun.com/zh/model-studio/getting-started/models # - https://help.aliyun.com/zh/model-studio/getting-started/models
# - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api # - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api
- provider: qianwen - provider: qianwen
models: models:
- name: qwen-max-latest
max_input_tokens: 32678
max_output_tokens: 8192
input_price: 1.6
output_price: 6.4
supports_function_calling: true
- name: qwen-plus-latest
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.112
output_price: 0.28
supports_function_calling: true
- name: qwen-turbo-latest
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 0.042
output_price: 0.084
supports_function_calling: true
- name: qwen-long
max_input_tokens: 1000000
input_price: 0.07
output_price: 0.28
- name: qwen-omni-turbo-latest
max_input_tokens: 32768
max_output_tokens: 2048
supports_vision: true
- name: qwen-coder-plus-latest
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.49
output_price: 0.98
- name: qwen-coder-turbo-latest
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.28
output_price: 0.84
- name: qwen-vl-max-latest
max_input_tokens: 30720
max_output_tokens: 2048
input_price: 0.42
output_price: 1.26
supports_vision: true
- name: qwen-vl-plus-latest
max_input_tokens: 30000
max_output_tokens: 2048
input_price: 0.21
output_price: 0.63
supports_vision: true
- name: qwen3-max - name: qwen3-max
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 2.1 supports_function_calling: true
output_price: 8.4 - name: qwen-plus
max_input_tokens: 131072
supports_function_calling: true
- name: qwen-flash
max_input_tokens: 1000000
supports_function_calling: true supports_function_calling: true
- name: qwen3-vl-plus - name: qwen3-vl-plus
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.42
output_price: 4.2
supports_vision: true supports_vision: true
- name: qwen3-max-preview - name: qwen3-vl-flash
max_input_tokens: 262144 max_input_tokens: 262144
max_output_tokens: 32768 supports_vision: true
input_price: 1.4 - name: qwen-coder-plus
output_price: 5.6 max_input_tokens: 1000000
supports_function_calling: true - name: qwen-coder-flash
max_input_tokens: 1000000
- name: qwen3-next-80b-a3b-instruct - name: qwen3-next-80b-a3b-instruct
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.14 input_price: 0.14
@@ -1160,6 +1239,16 @@
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.105 input_price: 0.105
output_price: 1.05 output_price: 1.05
- name: qwen3-vl-32b-instruct
max_input_tokens: 131072
input_price: 0.28
output_price: 1.12
supports_vision: true
- name: qwen3-vl-8b-instruct
max_input_tokens: 131072
input_price: 0.07
output_price: 0.28
supports_vision: true
- name: qwen3-coder-480b-a35b-instruct - name: qwen3-coder-480b-a35b-instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 1.26 input_price: 1.26
@@ -1168,32 +1257,10 @@
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.315 input_price: 0.315
output_price: 1.26 output_price: 1.26
- name: qwen2.5-72b-instruct - name: deepseek-v3.2-exp
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.56
output_price: 1.68
supports_function_calling: true
- name: qwen2.5-vl-72b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 2.24
output_price: 6.72
supports_vision: true
- name: qwen2.5-coder-32b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.49
output_price: 0.98
supports_function_calling: true
- name: deepseek-v3.1
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.28 input_price: 0.28
output_price: 1.12 output_price: 0.42
- name: deepseek-r1-0528
max_input_tokens: 65536
input_price: 0.28
output_price: 1.12
- name: text-embedding-v4 - name: text-embedding-v4
type: embedding type: embedding
input_price: 0.1 input_price: 0.1
@@ -1247,10 +1314,10 @@
# - https://platform.moonshot.cn/docs/api/chat#%E5%85%AC%E5%BC%80%E7%9A%84%E6%9C%8D%E5%8A%A1%E5%9C%B0%E5%9D%80 # - https://platform.moonshot.cn/docs/api/chat#%E5%85%AC%E5%BC%80%E7%9A%84%E6%9C%8D%E5%8A%A1%E5%9C%B0%E5%9D%80
- provider: moonshot - provider: moonshot
models: models:
- name: kimi-latest - name: kimi-k2-turbo-preview
max_input_tokens: 131072 max_input_tokens: 262144
input_price: 1.4 input_price: 1.12
output_price: 4.2 output_price: 8.12
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: kimi-k2-0905-preview - name: kimi-k2-0905-preview
@@ -1259,16 +1326,15 @@
output_price: 2.24 output_price: 2.24
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: kimi-k2-turbo-preview - name: kimi-k2-thinking-turbo
max_input_tokens: 131072 max_input_tokens: 262144
input_price: 1.12 input_price: 1.12
output_price: 4.48 output_price: 8.12
supports_vision: true supports_vision: true
supports_function_calling: true - name: kimi-k2-thinking
- name: kimi-thinking-preview max_input_tokens: 262144
max_input_tokens: 131072 input_price: 0.56
input_price: 28 output_price: 2.24
output_price: 28
supports_vision: true supports_vision: true
# Links: # Links:
@@ -1293,7 +1359,7 @@
# - https://open.bigmodel.cn/dev/api#glm-4 # - https://open.bigmodel.cn/dev/api#glm-4
- provider: zhipuai - provider: zhipuai
models: models:
- name: glm-4.5 - name: glm-4.6
max_input_tokens: 202752 max_input_tokens: 202752
input_price: 0.28 input_price: 0.28
output_price: 1.12 output_price: 1.12
@@ -1353,25 +1419,35 @@
input_price: 0.112 input_price: 0.112
# Links: # Links:
# - https://platform.minimaxi.com/document/pricing # - https://platform.minimaxi.com/docs/guides/pricing
# - https://platform.minimaxi.com/document/ChatCompletion%20v2 # - https://platform.minimaxi.com/document/ChatCompletion%20v2
- provider: minimax - provider: minimax
models: models:
- name: minimax-text-01 - name: minimax-m2
max_input_tokens: 1000192 max_input_tokens: 204800
input_price: 0.14 input_price: 0.294
output_price: 1.12 output_price: 1.176
supports_vision: true supports_function_calling: true
- name: minimax-m1
max_input_tokens: 131072
input_price: 0.112
output_price: 1.12
# Links: # Links:
# - https://openrouter.ai/models # - https://openrouter.ai/models
# - https://openrouter.ai/docs/api-reference/chat-completion # - https://openrouter.ai/docs/api-reference/chat-completion
- provider: openrouter - provider: openrouter
models: models:
- name: openai/gpt-5.1
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/gpt-5.1-chat
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/gpt-5 - name: openai/gpt-5
max_input_tokens: 400000 max_input_tokens: 400000
max_output_tokens: 128000 max_output_tokens: 128000
@@ -1400,13 +1476,6 @@
output_price: 0.4 output_price: 0.4
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: openai/gpt-5-codex
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4.1 - name: openai/gpt-4.1
max_input_tokens: 1047576 max_input_tokens: 1047576
max_output_tokens: 32768 max_output_tokens: 32768
@@ -1563,6 +1632,14 @@
output_price: 15 output_price: 15
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: anthropic/claude-haiku-4.5
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-opus-4.1 - name: anthropic/claude-opus-4.1
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -1696,11 +1773,10 @@
patch: patch:
body: body:
include_reasoning: true include_reasoning: true
- name: qwen/qwen-max - name: qwen/qwen3-max
max_input_tokens: 32768 max_input_tokens: 262144
max_output_tokens: 8192 input_price: 1.2
input_price: 1.6 output_price: 6
output_price: 6.4
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen-plus - name: qwen/qwen-plus
max_input_tokens: 131072 max_input_tokens: 131072
@@ -1708,22 +1784,6 @@
input_price: 0.4 input_price: 0.4
output_price: 1.2 output_price: 1.2
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen-turbo
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 0.05
output_price: 0.2
supports_function_calling: true
- name: qwen/qwen-vl-plus
max_input_tokens: 7500
input_price: 0.21
output_price: 0.63
supports_vision: true
- name: qwen/qwen3-max
max_input_tokens: 262144
input_price: 1.2
output_price: 6
supports_function_calling: true
- name: qwen/qwen3-next-80b-a3b-instruct - name: qwen/qwen3-next-80b-a3b-instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.1 input_price: 0.1
@@ -1733,7 +1793,7 @@
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.1 input_price: 0.1
output_price: 0.8 output_price: 0.8
- name: qwen/qwen3-235b-a22b-2507 - name: qwen/qwen5-235b-a22b-2507 # Qwen3 235B A22B Instruct 2507
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.12 input_price: 0.12
output_price: 0.59 output_price: 0.59
@@ -1750,6 +1810,16 @@
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.071 input_price: 0.071
output_price: 0.285 output_price: 0.285
- name: qwen/qwen3-vl-32b-instruct
max_input_tokens: 262144
input_price: 0.35
output_price: 1.1
supports_vision: true
- name: qwen/qwen3-vl-8b-instruct
max_input_tokens: 262144
input_price: 0.08
output_price: 0.50
supports_vision: true
- name: qwen/qwen3-coder-plus - name: qwen/qwen3-coder-plus
max_input_tokens: 128000 max_input_tokens: 128000
input_price: 1 input_price: 1
@@ -1760,30 +1830,26 @@
input_price: 0.3 input_price: 0.3
output_price: 1.5 output_price: 1.5
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen3-coder # Qwen3 Coder 480B A35B
max_input_tokens: 262144
input_price: 0.22
output_price: 0.95
supports_function_calling: true
- name: qwen/qwen3-coder-30b-a3b-instruct - name: qwen/qwen3-coder-30b-a3b-instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.052 input_price: 0.052
output_price: 0.207 output_price: 0.207
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen-2.5-72b-instruct
max_input_tokens: 131072
input_price: 0.35
output_price: 0.4
supports_function_calling: true
- name: qwen/qwen2.5-vl-72b-instruct
max_input_tokens: 32000
input_price: 0.7
output_price: 0.7
supports_vision: true
- name: qwen/qwen-2.5-coder-32b-instruct
max_input_tokens: 32768
input_price: 0.18
output_price: 0.18
- name: moonshotai/kimi-k2-0905 - name: moonshotai/kimi-k2-0905
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.296 input_price: 0.296
output_price: 1.185 output_price: 1.185
supports_function_calling: true supports_function_calling: true
- name: moonshotai/kimi-k2-thinking
max_input_tokens: 262144
input_price: 0.45
output_price: 2.35
supports_function_calling: true
- name: moonshotai/kimi-dev-72b - name: moonshotai/kimi-dev-72b
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.29 input_price: 0.29
@@ -1804,6 +1870,11 @@
input_price: 0.2 input_price: 0.2
output_price: 1.5 output_price: 1.5
supports_function_calling: true supports_function_calling: true
- name: amazon/nova-premier-v1
max_input_tokens: 1000000
input_price: 2.5
output_price: 12.5
supports_vision: true
- name: amazon/nova-pro-v1 - name: amazon/nova-pro-v1
max_input_tokens: 300000 max_input_tokens: 300000
max_output_tokens: 5120 max_output_tokens: 5120
@@ -1850,29 +1921,15 @@
patch: patch:
body: body:
include_reasoning: true include_reasoning: true
- name: minimax/minimax-01 - name: minimax/minimax-m2
max_input_tokens: 1000192 max_input_tokens: 196608
input_price: 0.2 input_price: 0.15
output_price: 1.1 output_price: 0.45
- name: z-ai/glm-4.6 - name: z-ai/glm-4.6
max_input_tokens: 202752 max_input_tokens: 202752
input_price: 0.5 input_price: 0.5
output_price: 1.75 output_price: 1.75
supports_function_calling: true supports_function_calling: true
- name: z-ai/glm-4.5
max_input_tokens: 131072
input_price: 0.2
output_price: 0.2
supports_function_calling: true
- name: z-ai/glm-4.5-air
max_input_tokens: 131072
input_price: 0.2
output_price: 1.1
- name: z-ai/glm-4.5v
max_input_tokens: 65536
input_price: 0.5
output_price: 1.7
supports_vision: true
# Links: # Links:
# - https://github.com/marketplace?type=models # - https://github.com/marketplace?type=models
@@ -2068,10 +2125,6 @@
input_price: 0.08 input_price: 0.08
output_price: 0.3 output_price: 0.3
supports_vision: true supports_vision: true
- name: meta-llama/Llama-3.3-70B-Instruct
max_input_tokens: 131072
input_price: 0.23
output_price: 0.40
- name: Qwen/Qwen3-Next-80B-A3B-Instruct - name: Qwen/Qwen3-Next-80B-A3B-Instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.14 input_price: 0.14
@@ -2100,27 +2153,15 @@
input_price: 0.07 input_price: 0.07
output_price: 0.27 output_price: 0.27
supports_function_calling: true supports_function_calling: true
- name: Qwen/Qwen3-235B-A22B
max_input_tokens: 40960
input_price: 0.15
output_price: 0.6
- name: Qwen/Qwen3-30B-A3B - name: Qwen/Qwen3-30B-A3B
max_input_tokens: 40960 max_input_tokens: 40960
input_price: 0.1 input_price: 0.1
output_price: 0.3 output_price: 0.3
- name: Qwen/Qwen3-32B - name: Qwen/Qwen3-VL-8B-Instruct
max_input_tokens: 40960 max_input_tokens: 262144
input_price: 0.1 input_price: 0.18
output_price: 0.3 output_price: 0.69
- name: Qwen/Qwen2.5-72B-Instruct supports_vision: true
max_input_tokens: 32768
input_price: 0.23
output_price: 0.40
supports_function_calling: true
- name: Qwen/Qwen2.5-Coder-32B-Instruct
max_input_tokens: 32768
input_price: 0.07
output_price: 0.16
- name: deepseek-ai/DeepSeek-V3.2-Exp - name: deepseek-ai/DeepSeek-V3.2-Exp
max_input_tokens: 163840 max_input_tokens: 163840
input_price: 0.27 input_price: 0.27
@@ -2145,35 +2186,21 @@
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.06 input_price: 0.06
output_price: 0.12 output_price: 0.12
- name: mistralai/Devstral-Small-2507
max_input_tokens: 131072
input_price: 0.07
output_price: 0.28
- name: moonshotai/Kimi-K2-Instruct-0905 - name: moonshotai/Kimi-K2-Instruct-0905
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.5 input_price: 0.5
output_price: 2.0 output_price: 2.0
supports_function_calling: true supports_function_calling: true
- name: moonshotai/Kimi-K2-Thinking
max_input_tokens: 262144
input_price: 0.55
output_price: 2.5
supports_function_calling: true
- name: zai-org/GLM-4.6 - name: zai-org/GLM-4.6
max_input_tokens: 202752 max_input_tokens: 202752
input_price: 0.6 input_price: 0.6
output_price: 1.9 output_price: 1.9
supports_function_calling: true supports_function_calling: true
- name: zai-org/GLM-4.5
max_input_tokens: 131072
input_price: 0.55
output_price: 2.0
supports_function_calling: true
- name: zai-org/GLM-4.5-Air
max_input_tokens: 131072
input_price: 0.2
output_price: 1.1
supports_function_calling: true
- name: zai-org/GLM-4.5V
max_input_tokens: 65536
input_price: 0.5
output_price: 1.7
supports_vision: true
- name: BAAI/bge-large-en-v1.5 - name: BAAI/bge-large-en-v1.5
type: embedding type: embedding
input_price: 0.01 input_price: 0.01
+2 -2
View File
@@ -234,7 +234,7 @@ async fn chat_completions_streaming(
} }
let arguments: Value = let arguments: Value =
function_arguments.parse().with_context(|| { function_arguments.parse().with_context(|| {
format!("Tool call '{function_name}' have non-JSON arguments '{function_arguments}'") format!("Tool call '{function_name}' has non-JSON arguments '{function_arguments}'")
})?; })?;
handler.tool_call(ToolCall::new( handler.tool_call(ToolCall::new(
function_name.clone(), function_name.clone(),
@@ -272,7 +272,7 @@ async fn chat_completions_streaming(
function_arguments = String::from("{}"); function_arguments = String::from("{}");
} }
let arguments: Value = function_arguments.parse().with_context(|| { let arguments: Value = function_arguments.parse().with_context(|| {
format!("Tool call '{function_name}' have non-JSON arguments '{function_arguments}'") format!("Tool call '{function_name}' has non-JSON arguments '{function_arguments}'")
})?; })?;
handler.tool_call(ToolCall::new( handler.tool_call(ToolCall::new(
function_name.clone(), function_name.clone(),
+8 -5
View File
@@ -93,10 +93,13 @@ pub async fn claude_chat_completions_streaming(
data["content_block"]["id"].as_str(), data["content_block"]["id"].as_str(),
) { ) {
if !function_name.is_empty() { if !function_name.is_empty() {
let arguments: Value = let arguments: Value = if function_arguments.is_empty() {
json!({})
} else {
function_arguments.parse().with_context(|| { function_arguments.parse().with_context(|| {
format!("Tool call '{function_name}' have non-JSON arguments '{function_arguments}'") format!("Tool call '{function_name}' has non-JSON arguments '{function_arguments}'")
})?; })?
};
handler.tool_call(ToolCall::new( handler.tool_call(ToolCall::new(
function_name.clone(), function_name.clone(),
arguments, arguments,
@@ -134,7 +137,7 @@ pub async fn claude_chat_completions_streaming(
json!({}) json!({})
} else { } else {
function_arguments.parse().with_context(|| { function_arguments.parse().with_context(|| {
format!("Tool call '{function_name}' have non-JSON arguments '{function_arguments}'") format!("Tool call '{function_name}' has non-JSON arguments '{function_arguments}'")
})? })?
}; };
handler.tool_call(ToolCall::new( handler.tool_call(ToolCall::new(
@@ -286,7 +289,7 @@ pub fn claude_build_chat_completions_body(
body["tools"] = functions body["tools"] = functions
.iter() .iter()
.map(|v| { .map(|v| {
if v.parameters.type_value.is_none() { if v.parameters.is_empty_properties() {
json!({ json!({
"name": v.name, "name": v.name,
"description": v.description, "description": v.description,
+2 -2
View File
@@ -167,7 +167,7 @@ async fn chat_completions_streaming(
"tool-call-end" => { "tool-call-end" => {
if !function_name.is_empty() { if !function_name.is_empty() {
let arguments: Value = function_arguments.parse().with_context(|| { let arguments: Value = function_arguments.parse().with_context(|| {
format!("Tool call '{function_name}' have non-JSON arguments '{function_arguments}'") format!("Tool call '{function_name}' has non-JSON arguments '{function_arguments}'")
})?; })?;
handler.tool_call(ToolCall::new( handler.tool_call(ToolCall::new(
function_name.clone(), function_name.clone(),
@@ -230,7 +230,7 @@ fn extract_chat_completions(data: &Value) -> Result<ChatCompletionsOutput> {
call["id"].as_str(), call["id"].as_str(),
) { ) {
let arguments: Value = arguments.parse().with_context(|| { let arguments: Value = arguments.parse().with_context(|| {
format!("Tool call '{name}' have non-JSON arguments '{arguments}'") format!("Tool call '{name}' has non-JSON arguments '{arguments}'")
})?; })?;
tool_calls.push(ToolCall::new( tool_calls.push(ToolCall::new(
name.to_string(), name.to_string(),
+14 -8
View File
@@ -433,10 +433,13 @@ pub async fn call_chat_completions(
client.global_config().read().print_markdown(&text)?; client.global_config().read().print_markdown(&text)?;
} }
} }
Ok(( let tool_results = eval_tool_calls(client.global_config(), tool_calls).await?;
text, if let Some(tracker) = client.global_config().write().tool_call_tracker.as_mut() {
eval_tool_calls(client.global_config(), tool_calls).await?, tool_results
)) .iter()
.for_each(|res| tracker.record_call(res.call.clone()));
}
Ok((text, tool_results))
} }
Err(err) => Err(err), Err(err) => Err(err),
} }
@@ -467,10 +470,13 @@ pub async fn call_chat_completions_streaming(
if !text.is_empty() && !text.ends_with('\n') { if !text.is_empty() && !text.ends_with('\n') {
println!(); println!();
} }
Ok(( let tool_results = eval_tool_calls(client.global_config(), tool_calls).await?;
text, if let Some(tracker) = client.global_config().write().tool_call_tracker.as_mut() {
eval_tool_calls(client.global_config(), tool_calls).await?, tool_results
)) .iter()
.for_each(|res| tracker.record_call(res.call.clone()));
}
Ok((text, tool_results))
} }
Err(err) => { Err(err) => {
if !text.is_empty() { if !text.is_empty() {
+1 -1
View File
@@ -228,7 +228,7 @@ macro_rules! config_get_fn {
std::env::var(&env_name) std::env::var(&env_name)
.ok() .ok()
.or_else(|| self.config.$field_name.clone()) .or_else(|| self.config.$field_name.clone())
.ok_or_else(|| anyhow::anyhow!("Miss '{}'", stringify!($field_name))) .ok_or_else(|| anyhow::anyhow!("Missing '{}'", stringify!($field_name)))
} }
}; };
} }
+2 -2
View File
@@ -164,7 +164,7 @@ pub async fn openai_chat_completions_streaming(
function_arguments = String::from("{}"); function_arguments = String::from("{}");
} }
let arguments: Value = function_arguments.parse().with_context(|| { let arguments: Value = function_arguments.parse().with_context(|| {
format!("Tool call '{function_name}' have non-JSON arguments '{function_arguments}'") format!("Tool call '{function_name}' has non-JSON arguments '{function_arguments}'")
})?; })?;
handler.tool_call(ToolCall::new( handler.tool_call(ToolCall::new(
function_name.clone(), function_name.clone(),
@@ -370,7 +370,7 @@ pub fn openai_extract_chat_completions(data: &Value) -> Result<ChatCompletionsOu
call["id"].as_str(), call["id"].as_str(),
) { ) {
let arguments: Value = arguments.parse().with_context(|| { let arguments: Value = arguments.parse().with_context(|| {
format!("Tool call '{name}' have non-JSON arguments '{arguments}'") format!("Tool call '{name}' has non-JSON arguments '{arguments}'")
})?; })?;
tool_calls.push(ToolCall::new( tool_calls.push(ToolCall::new(
name.to_string(), name.to_string(),
+153 -2
View File
@@ -13,6 +13,9 @@ pub struct SseHandler {
abort_signal: AbortSignal, abort_signal: AbortSignal,
buffer: String, buffer: String,
tool_calls: Vec<ToolCall>, tool_calls: Vec<ToolCall>,
last_tool_calls: Vec<ToolCall>,
max_call_repeats: usize,
call_repeat_chain_len: usize,
} }
impl SseHandler { impl SseHandler {
@@ -22,11 +25,13 @@ impl SseHandler {
abort_signal, abort_signal,
buffer: String::new(), buffer: String::new(),
tool_calls: Vec::new(), tool_calls: Vec::new(),
last_tool_calls: Vec::new(),
max_call_repeats: 2,
call_repeat_chain_len: 3,
} }
} }
pub fn text(&mut self, text: &str) -> Result<()> { pub fn text(&mut self, text: &str) -> Result<()> {
// debug!("HandleText: {}", text);
if text.is_empty() { if text.is_empty() {
return Ok(()); return Ok(());
} }
@@ -45,7 +50,6 @@ impl SseHandler {
} }
pub fn done(&mut self) { pub fn done(&mut self) {
// debug!("HandleDone");
let ret = self.sender.send(SseEvent::Done); let ret = self.sender.send(SseEvent::Done);
if ret.is_err() { if ret.is_err() {
if self.abort_signal.aborted() { if self.abort_signal.aborted() {
@@ -56,14 +60,114 @@ impl SseHandler {
} }
pub fn tool_call(&mut self, call: ToolCall) -> Result<()> { pub fn tool_call(&mut self, call: ToolCall) -> Result<()> {
if self.is_call_loop(&call) {
let loop_message = self.create_loop_detection_message(&call);
return Err(anyhow!(loop_message));
}
if self.last_tool_calls.len() == self.call_repeat_chain_len * self.max_call_repeats {
self.last_tool_calls.remove(0);
}
self.last_tool_calls.push(call.clone());
self.tool_calls.push(call); self.tool_calls.push(call);
Ok(()) Ok(())
} }
fn is_call_loop(&self, new_call: &ToolCall) -> bool {
if self.last_tool_calls.len() < self.call_repeat_chain_len {
return false;
}
if let Some(last_call) = self.last_tool_calls.last()
&& self.calls_match(last_call, new_call)
{
let mut repeat_count = 1;
for i in (0..self.last_tool_calls.len()).rev() {
if i == 0 {
break;
}
if self.calls_match(&self.last_tool_calls[i - 1], &self.last_tool_calls[i]) {
repeat_count += 1;
if repeat_count >= self.max_call_repeats {
return true;
}
} else {
break;
}
}
}
let chain_start = self
.last_tool_calls
.len()
.saturating_sub(self.call_repeat_chain_len);
let chain = &self.last_tool_calls[chain_start..];
if chain.len() == self.call_repeat_chain_len {
let mut is_repeating = true;
for i in 0..chain.len() - 1 {
if !self.calls_match(&chain[i], &chain[i + 1]) {
is_repeating = false;
break;
}
}
if is_repeating && self.calls_match(&chain[chain.len() - 1], new_call) {
return true;
}
}
false
}
fn calls_match(&self, call1: &ToolCall, call2: &ToolCall) -> bool {
call1.name == call2.name && call1.arguments == call2.arguments
}
fn create_loop_detection_message(&self, new_call: &ToolCall) -> String {
let mut message = String::from("⚠️ Call loop detected! ⚠️");
message.push_str(&format!(
"The call '{}' with arguments '{}' is repeating.\n",
new_call.name, new_call.arguments
));
if self.last_tool_calls.len() >= self.call_repeat_chain_len {
let chain_start = self
.last_tool_calls
.len()
.saturating_sub(self.call_repeat_chain_len);
let chain = &self.last_tool_calls[chain_start..];
message.push_str("The following sequence of calls is repeating:\n");
for (i, call) in chain.iter().enumerate() {
message.push_str(&format!(
" {}. {} with arguments {}\n",
i + 1,
call.name,
call.arguments
));
}
}
message.push_str("\nPlease move on to the next task in your sequence using the last output you got from the call or chain you are trying to re-execute. ");
message.push_str(
"Consider using different parameters or a different approach to avoid this loop.",
);
message
}
pub fn abort(&self) -> AbortSignal { pub fn abort(&self) -> AbortSignal {
self.abort_signal.clone() self.abort_signal.clone()
} }
#[cfg(test)]
pub fn last_tool_calls(&self) -> &[ToolCall] {
&self.last_tool_calls
}
pub fn take(self) -> (String, Vec<ToolCall>) { pub fn take(self) -> (String, Vec<ToolCall>) {
let Self { let Self {
buffer, tool_calls, .. buffer, tool_calls, ..
@@ -239,6 +343,53 @@ mod tests {
use bytes::Bytes; use bytes::Bytes;
use futures_util::stream; use futures_util::stream;
use rand::Rng; use rand::Rng;
use serde_json::json;
#[test]
fn test_last_tool_calls_ring_buffer() {
let (sender, _) = tokio::sync::mpsc::unbounded_channel();
let abort_signal = crate::utils::create_abort_signal();
let mut handler = SseHandler::new(sender, abort_signal);
for i in 0..15 {
let call = ToolCall::new(format!("test_function_{}", i), json!({"param": i}), None);
handler.tool_call(call.clone()).unwrap();
}
let lt_len = handler.call_repeat_chain_len * handler.max_call_repeats;
assert_eq!(handler.last_tool_calls().len(), lt_len);
assert_eq!(
handler.last_tool_calls()[lt_len - 1].name,
"test_function_14"
);
assert_eq!(
handler.last_tool_calls()[0].name,
format!("test_function_{}", 14 - lt_len + 1)
);
}
#[test]
fn test_call_loop_detection() {
let (sender, _) = tokio::sync::mpsc::unbounded_channel();
let abort_signal = crate::utils::create_abort_signal();
let mut handler = SseHandler::new(sender, abort_signal);
handler.max_call_repeats = 2;
handler.call_repeat_chain_len = 3;
let call = ToolCall::new("test_function_loop".to_string(), json!({"param": 1}), None);
for _ in 0..3 {
handler.tool_call(call.clone()).unwrap();
}
let result = handler.tool_call(call.clone());
assert!(result.is_err());
let error_message = result.unwrap_err().to_string();
assert!(error_message.contains("Call loop detected!"));
assert!(error_message.contains("test_function_loop"));
}
fn split_chunks(text: &str) -> Vec<Vec<u8>> { fn split_chunks(text: &str) -> Vec<Vec<u8>> {
let mut rng = rand::rng(); let mut rng = rand::rng();
+22 -4
View File
@@ -219,7 +219,14 @@ pub async fn gemini_chat_completions_streaming(
part["functionCall"]["name"].as_str(), part["functionCall"]["name"].as_str(),
part["functionCall"]["args"].as_object(), part["functionCall"]["args"].as_object(),
) { ) {
handler.tool_call(ToolCall::new(name.to_string(), json!(args), None))?; let thought_signature = part["thoughtSignature"]
.as_str()
.or_else(|| part["thought_signature"].as_str())
.map(|s| s.to_string());
handler.tool_call(
ToolCall::new(name.to_string(), json!(args), None)
.with_thought_signature(thought_signature),
)?;
} }
} }
} else if let Some("SAFETY") = data["promptFeedback"]["blockReason"] } else if let Some("SAFETY") = data["promptFeedback"]["blockReason"]
@@ -280,7 +287,14 @@ fn gemini_extract_chat_completions_text(data: &Value) -> Result<ChatCompletionsO
part["functionCall"]["name"].as_str(), part["functionCall"]["name"].as_str(),
part["functionCall"]["args"].as_object(), part["functionCall"]["args"].as_object(),
) { ) {
tool_calls.push(ToolCall::new(name.to_string(), json!(args), None)); let thought_signature = part["thoughtSignature"]
.as_str()
.or_else(|| part["thought_signature"].as_str())
.map(|s| s.to_string());
tool_calls.push(
ToolCall::new(name.to_string(), json!(args), None)
.with_thought_signature(thought_signature),
);
} }
} }
} }
@@ -347,12 +361,16 @@ pub fn gemini_build_chat_completions_body(
}, },
MessageContent::ToolCalls(MessageContentToolCalls { tool_results, .. }) => { MessageContent::ToolCalls(MessageContentToolCalls { tool_results, .. }) => {
let model_parts: Vec<Value> = tool_results.iter().map(|tool_result| { let model_parts: Vec<Value> = tool_results.iter().map(|tool_result| {
json!({ let mut part = json!({
"functionCall": { "functionCall": {
"name": tool_result.call.name, "name": tool_result.call.name,
"args": tool_result.call.arguments, "args": tool_result.call.arguments,
} }
}) });
if let Some(sig) = &tool_result.call.thought_signature {
part["thoughtSignature"] = json!(sig);
}
part
}).collect(); }).collect();
let function_parts: Vec<Value> = tool_results.into_iter().map(|tool_result| { let function_parts: Vec<Value> = tool_results.into_iter().map(|tool_result| {
json!({ json!({
+5
View File
@@ -204,6 +204,7 @@ impl Agent {
pub fn init_agent_variables( pub fn init_agent_variables(
agent_variables: &[AgentVariable], agent_variables: &[AgentVariable],
pre_set_variables: Option<&AgentVariables>,
no_interaction: bool, no_interaction: bool,
) -> Result<AgentVariables> { ) -> Result<AgentVariables> {
let mut output = IndexMap::new(); let mut output = IndexMap::new();
@@ -214,6 +215,10 @@ impl Agent {
let mut unset_variables = vec![]; let mut unset_variables = vec![];
for agent_variable in agent_variables { for agent_variable in agent_variables {
let key = agent_variable.name.clone(); let key = agent_variable.name.clone();
if let Some(value) = pre_set_variables.and_then(|v| v.get(&key)) {
output.insert(key, value.clone());
continue;
}
if let Some(value) = agent_variable.default.clone() { if let Some(value) = agent_variable.default.clone() {
output.insert(key, value); output.insert(key, value);
continue; continue;
+37 -15
View File
@@ -17,14 +17,15 @@ use crate::client::{
ClientConfig, MessageContentToolCalls, Model, ModelType, OPENAI_COMPATIBLE_PROVIDERS, ClientConfig, MessageContentToolCalls, Model, ModelType, OPENAI_COMPATIBLE_PROVIDERS,
ProviderModels, create_client_config, list_client_types, list_models, ProviderModels, create_client_config, list_client_types, list_models,
}; };
use crate::function::{FunctionDeclaration, Functions, ToolResult}; use crate::function::{FunctionDeclaration, Functions, ToolCallTracker, ToolResult};
use crate::rag::Rag; use crate::rag::Rag;
use crate::render::{MarkdownRender, RenderOptions}; use crate::render::{MarkdownRender, RenderOptions};
use crate::utils::*; use crate::utils::*;
use crate::config::macros::Macro; use crate::config::macros::Macro;
use crate::mcp::{ use crate::mcp::{
MCP_INVOKE_META_FUNCTION_NAME_PREFIX, MCP_LIST_META_FUNCTION_NAME_PREFIX, McpRegistry, MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX, MCP_INVOKE_META_FUNCTION_NAME_PREFIX,
MCP_SEARCH_META_FUNCTION_NAME_PREFIX, McpRegistry,
}; };
use crate::vault::{GlobalVault, Vault, create_vault_password_file, interpolate_secrets}; use crate::vault::{GlobalVault, Vault, create_vault_password_file, interpolate_secrets};
use anyhow::{Context, Result, anyhow, bail}; use anyhow::{Context, Result, anyhow, bail};
@@ -198,6 +199,8 @@ pub struct Config {
pub rag: Option<Arc<Rag>>, pub rag: Option<Arc<Rag>>,
#[serde(skip)] #[serde(skip)]
pub agent: Option<Agent>, pub agent: Option<Agent>,
#[serde(skip)]
pub(crate) tool_call_tracker: Option<ToolCallTracker>,
} }
impl Default for Config { impl Default for Config {
@@ -270,6 +273,7 @@ impl Default for Config {
session: None, session: None,
rag: None, rag: None,
agent: None, agent: None,
tool_call_tracker: Some(ToolCallTracker::default()),
} }
} }
} }
@@ -799,7 +803,7 @@ impl Config {
|| s == "all" || s == "all"
}) { }) {
bail!( bail!(
"Some of the specified MCP servers in 'enabled_mcp_servers' are configured. Please check your MCP server configuration." "Some of the specified MCP servers in 'enabled_mcp_servers' are not fully configured. Please check your MCP server configuration."
); );
} }
} }
@@ -1972,7 +1976,8 @@ impl Config {
.iter() .iter()
.filter(|v| { .filter(|v| {
!v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) !v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) && !v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.map(|v| v.name.to_string()) .map(|v| v.name.to_string())
.collect(); .collect();
@@ -2015,7 +2020,8 @@ impl Config {
.into_iter() .into_iter()
.filter(|v| { .filter(|v| {
!v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) !v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) && !v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.collect(); .collect();
let tool_names: HashSet<String> = agent_functions let tool_names: HashSet<String> = agent_functions
@@ -2051,7 +2057,8 @@ impl Config {
.iter() .iter()
.filter(|v| { .filter(|v| {
v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) || v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.map(|v| v.name.to_string()) .map(|v| v.name.to_string())
.collect(); .collect();
@@ -2062,8 +2069,10 @@ impl Config {
let item = item.trim(); let item = item.trim();
let item_invoke_name = let item_invoke_name =
format!("{}_{item}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX); format!("{}_{item}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX);
let item_list_name = let item_search_name =
format!("{}_{item}", MCP_LIST_META_FUNCTION_NAME_PREFIX); format!("{}_{item}", MCP_SEARCH_META_FUNCTION_NAME_PREFIX);
let item_describe_name =
format!("{}_{item}", MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX);
if let Some(values) = self.mapping_mcp_servers.get(item) { if let Some(values) = self.mapping_mcp_servers.get(item) {
server_names.extend( server_names.extend(
values values
@@ -2077,7 +2086,12 @@ impl Config {
), ),
format!( format!(
"{}_{}", "{}_{}",
MCP_LIST_META_FUNCTION_NAME_PREFIX, MCP_SEARCH_META_FUNCTION_NAME_PREFIX,
v.to_string()
),
format!(
"{}_{}",
MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX,
v.to_string() v.to_string()
), ),
] ]
@@ -2086,7 +2100,8 @@ impl Config {
) )
} else if mcp_declaration_names.contains(&item_invoke_name) { } else if mcp_declaration_names.contains(&item_invoke_name) {
server_names.insert(item_invoke_name); server_names.insert(item_invoke_name);
server_names.insert(item_list_name); server_names.insert(item_search_name);
server_names.insert(item_describe_name);
} }
} }
} }
@@ -2112,7 +2127,8 @@ impl Config {
.into_iter() .into_iter()
.filter(|v| { .filter(|v| {
v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) || v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.collect(); .collect();
let tool_names: HashSet<String> = agent_functions let tool_names: HashSet<String> = agent_functions
@@ -2594,8 +2610,11 @@ impl Config {
None => return Ok(()), None => return Ok(()),
}; };
if !agent.defined_variables().is_empty() && agent.shared_variables().is_empty() { if !agent.defined_variables().is_empty() && agent.shared_variables().is_empty() {
let new_variables = let new_variables = Agent::init_agent_variables(
Agent::init_agent_variables(agent.defined_variables(), self.info_flag)?; agent.defined_variables(),
self.agent_variables.as_ref(),
self.info_flag,
)?;
agent.set_shared_variables(new_variables); agent.set_shared_variables(new_variables);
} }
if !self.info_flag { if !self.info_flag {
@@ -2613,8 +2632,11 @@ impl Config {
let shared_variables = agent.shared_variables().clone(); let shared_variables = agent.shared_variables().clone();
let session_variables = let session_variables =
if !agent.defined_variables().is_empty() && shared_variables.is_empty() { if !agent.defined_variables().is_empty() && shared_variables.is_empty() {
let new_variables = let new_variables = Agent::init_agent_variables(
Agent::init_agent_variables(agent.defined_variables(), self.info_flag)?; agent.defined_variables(),
self.agent_variables.as_ref(),
self.info_flag,
)?;
agent.set_shared_variables(new_variables.clone()); agent.set_shared_variables(new_variables.clone());
new_variables new_variables
} else { } else {
+282 -53
View File
@@ -4,7 +4,10 @@ use crate::{
}; };
use crate::config::ensure_parent_exists; use crate::config::ensure_parent_exists;
use crate::mcp::{MCP_INVOKE_META_FUNCTION_NAME_PREFIX, MCP_LIST_META_FUNCTION_NAME_PREFIX}; use crate::mcp::{
MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX, MCP_INVOKE_META_FUNCTION_NAME_PREFIX,
MCP_SEARCH_META_FUNCTION_NAME_PREFIX,
};
use crate::parsers::{bash, python}; use crate::parsers::{bash, python};
use anyhow::{Context, Result, anyhow, bail}; use anyhow::{Context, Result, anyhow, bail};
use indexmap::IndexMap; use indexmap::IndexMap;
@@ -12,6 +15,7 @@ use indoc::formatdoc;
use rust_embed::Embed; use rust_embed::Embed;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{Value, json}; use serde_json::{Value, json};
use std::collections::VecDeque;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::fs::File; use std::fs::File;
use std::io::Write; use std::io::Write;
@@ -87,6 +91,19 @@ pub async fn eval_tool_calls(
} }
let mut is_all_null = true; let mut is_all_null = true;
for call in calls { for call in calls {
if let Some(checker) = &config.read().tool_call_tracker
&& let Some(msg) = checker.check_loop(&call.clone())
{
let dup_msg = format!("{{\"tool_call_loop_alert\":{}}}", &msg.trim());
println!(
"{}",
warning_text(format!("{}: ⚠️ Tool-call loop detected! ⚠️", &call.name).as_str())
);
let val = json!(dup_msg);
output.push(ToolResult::new(call, val));
is_all_null = false;
continue;
}
let mut result = call.eval(config).await?; let mut result = call.eval(config).await?;
if result.is_null() { if result.is_null() {
result = json!("DONE"); result = json!("DONE");
@@ -247,19 +264,13 @@ impl Functions {
pub fn clear_mcp_meta_functions(&mut self) { pub fn clear_mcp_meta_functions(&mut self) {
self.declarations.retain(|d| { self.declarations.retain(|d| {
!d.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) !d.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
&& !d.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) && !d.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
&& !d.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}); });
} }
pub fn append_mcp_meta_functions(&mut self, mcp_servers: Vec<String>) { pub fn append_mcp_meta_functions(&mut self, mcp_servers: Vec<String>) {
let mut invoke_function_properties = IndexMap::new(); let mut invoke_function_properties = IndexMap::new();
invoke_function_properties.insert(
"server".to_string(),
JsonSchema {
type_value: Some("string".to_string()),
..Default::default()
},
);
invoke_function_properties.insert( invoke_function_properties.insert(
"tool".to_string(), "tool".to_string(),
JsonSchema { JsonSchema {
@@ -275,32 +286,86 @@ impl Functions {
}, },
); );
let mut search_function_properties = IndexMap::new();
search_function_properties.insert(
"query".to_string(),
JsonSchema {
type_value: Some("string".to_string()),
description: Some("Generalized explanation of what you want to do".into()),
..Default::default()
},
);
search_function_properties.insert(
"top_k".to_string(),
JsonSchema {
type_value: Some("integer".to_string()),
description: Some("How many results to return, between 1 and 20".into()),
default: Some(Value::from(8usize)),
..Default::default()
},
);
let mut describe_function_properties = IndexMap::new();
describe_function_properties.insert(
"tool".to_string(),
JsonSchema {
type_value: Some("string".to_string()),
description: Some("The name of the tool; e.g., search_issues".into()),
..Default::default()
},
);
for server in mcp_servers { for server in mcp_servers {
let search_function_name = format!("{}_{server}", MCP_SEARCH_META_FUNCTION_NAME_PREFIX);
let describe_function_name =
format!("{}_{server}", MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX);
let invoke_function_name = format!("{}_{server}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX); let invoke_function_name = format!("{}_{server}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX);
let invoke_function_declaration = FunctionDeclaration { let invoke_function_declaration = FunctionDeclaration {
name: invoke_function_name.clone(), name: invoke_function_name.clone(),
description: formatdoc!( description: formatdoc!(
r#" r#"
Invoke the specified tool on the {server} MCP server. Always call {invoke_function_name} first to find the Invoke the specified tool on the {server} MCP server. Always call {describe_function_name} first to
correct names of tools before calling '{invoke_function_name}'. find the correct invocation schema for the given tool.
"# "#
), ),
parameters: JsonSchema { parameters: JsonSchema {
type_value: Some("object".to_string()), type_value: Some("object".to_string()),
properties: Some(invoke_function_properties.clone()), properties: Some(invoke_function_properties.clone()),
required: Some(vec!["server".to_string(), "tool".to_string()]), required: Some(vec!["tool".to_string()]),
..Default::default() ..Default::default()
}, },
agent: false, agent: false,
}; };
let list_functions_declaration = FunctionDeclaration { let search_functions_declaration = FunctionDeclaration {
name: format!("{}_{}", MCP_LIST_META_FUNCTION_NAME_PREFIX, server), name: search_function_name.clone(),
description: format!("List all the available tools for the {server} MCP server"), description: formatdoc!(
parameters: JsonSchema::default(), r#"
Find candidate tools by keywords for the {server} MCP server. Returns small suggestions; fetch
schemas with {describe_function_name}.
"#
),
parameters: JsonSchema {
type_value: Some("object".to_string()),
properties: Some(search_function_properties.clone()),
required: Some(vec!["query".to_string()]),
..Default::default()
},
agent: false,
};
let describe_functions_declaration = FunctionDeclaration {
name: describe_function_name.clone(),
description: "Get the full JSON schema for exactly one MCP tool.".to_string(),
parameters: JsonSchema {
type_value: Some("object".to_string()),
properties: Some(describe_function_properties.clone()),
required: Some(vec!["tool".to_string()]),
..Default::default()
},
agent: false, agent: false,
}; };
self.declarations.push(invoke_function_declaration); self.declarations.push(invoke_function_declaration);
self.declarations.push(list_functions_declaration); self.declarations.push(search_functions_declaration);
self.declarations.push(describe_functions_declaration);
} }
} }
@@ -705,6 +770,10 @@ pub struct ToolCall {
pub name: String, pub name: String,
pub arguments: Value, pub arguments: Value,
pub id: Option<String>, pub id: Option<String>,
/// Gemini 3's thought signature for stateful reasoning in function calling.
/// Must be preserved and sent back when submitting function responses.
#[serde(skip_serializing_if = "Option::is_none")]
pub thought_signature: Option<String>,
} }
type CallConfig = (String, String, Vec<String>, HashMap<String, String>); type CallConfig = (String, String, Vec<String>, HashMap<String, String>);
@@ -734,9 +803,15 @@ impl ToolCall {
name, name,
arguments, arguments,
id, id,
thought_signature: None,
} }
} }
pub fn with_thought_signature(mut self, thought_signature: Option<String>) -> Self {
self.thought_signature = thought_signature;
self
}
pub async fn eval(&self, config: &GlobalConfig) -> Result<Value> { pub async fn eval(&self, config: &GlobalConfig) -> Result<Value> {
let (call_name, cmd_name, mut cmd_args, envs) = match &config.read().agent { let (call_name, cmd_name, mut cmd_args, envs) = match &config.read().agent {
Some(agent) => self.extract_call_config_from_agent(config, agent)?, Some(agent) => self.extract_call_config_from_agent(config, agent)?,
@@ -771,51 +846,105 @@ impl ToolCall {
} }
let output = match cmd_name.as_str() { let output = match cmd_name.as_str() {
_ if cmd_name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) => { _ if cmd_name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX) => {
let registry_arc = { Self::search_mcp_tools(config, &cmd_name, &json_data)?
let cfg = config.read(); }
cfg.mcp_registry _ if cmd_name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX) => {
.clone() Self::describe_mcp_tool(config, &cmd_name, json_data).await?
.with_context(|| "MCP is not configured")?
};
registry_arc.catalog().await?
} }
_ if cmd_name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) => { _ if cmd_name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) => {
let server = json_data Self::invoke_mcp_tool(config, &cmd_name, &json_data).await?
.get("server")
.ok_or_else(|| anyhow!("Missing 'server' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'server' in arguments"))?;
let tool = json_data
.get("tool")
.ok_or_else(|| anyhow!("Missing 'tool' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'tool' in arguments"))?;
let arguments = json_data
.get("arguments")
.cloned()
.unwrap_or_else(|| json!({}));
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let result = registry_arc.invoke(server, tool, arguments).await?;
serde_json::to_value(result)?
} }
_ => match run_llm_function(cmd_name, cmd_args, envs, agent_name)? { _ => match run_llm_function(cmd_name, cmd_args, envs, agent_name) {
Some(contents) => serde_json::from_str(&contents) Ok(Some(contents)) => serde_json::from_str(&contents)
.ok() .ok()
.unwrap_or_else(|| json!({"output": contents})), .unwrap_or_else(|| json!({"output": contents})),
None => Value::Null, Ok(None) => Value::Null,
Err(e) => serde_json::from_str(&e.to_string())
.ok()
.unwrap_or_else(|| json!({"output": e.to_string()})),
}, },
}; };
Ok(output) Ok(output)
} }
async fn describe_mcp_tool(
config: &GlobalConfig,
cmd_name: &str,
json_data: Value,
) -> Result<Value> {
let server_id = cmd_name.replace(&format!("{MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX}_"), "");
let tool = json_data
.get("tool")
.ok_or_else(|| anyhow!("Missing 'tool' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'tool' in arguments"))?;
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let result = registry_arc.describe(&server_id, tool).await?;
Ok(serde_json::to_value(result)?)
}
fn search_mcp_tools(config: &GlobalConfig, cmd_name: &str, json_data: &Value) -> Result<Value> {
let server = cmd_name.replace(&format!("{MCP_SEARCH_META_FUNCTION_NAME_PREFIX}_"), "");
let query = json_data
.get("query")
.ok_or_else(|| anyhow!("Missing 'query' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'query' in arguments"))?;
let top_k = json_data
.get("top_k")
.cloned()
.unwrap_or_else(|| Value::from(8u64))
.as_u64()
.ok_or_else(|| anyhow!("Invalid 'top_k' in arguments"))? as usize;
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let catalog_items = registry_arc
.search_tools_server(&server, query, top_k)
.into_iter()
.map(|it| serde_json::to_value(&it).unwrap_or_default())
.collect();
Ok(Value::Array(catalog_items))
}
async fn invoke_mcp_tool(
config: &GlobalConfig,
cmd_name: &str,
json_data: &Value,
) -> Result<Value> {
let server = cmd_name.replace(&format!("{MCP_INVOKE_META_FUNCTION_NAME_PREFIX}_"), "");
let tool = json_data
.get("tool")
.ok_or_else(|| anyhow!("Missing 'tool' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'tool' in arguments"))?;
let arguments = json_data
.get("arguments")
.cloned()
.unwrap_or_else(|| json!({}));
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let result = registry_arc.invoke(&server, tool, arguments).await?;
Ok(serde_json::to_value(result)?)
}
fn extract_call_config_from_agent( fn extract_call_config_from_agent(
&self, &self,
config: &GlobalConfig, config: &GlobalConfig,
@@ -866,7 +995,9 @@ pub fn run_llm_function(
agent_name: Option<String>, agent_name: Option<String>,
) -> Result<Option<String>> { ) -> Result<Option<String>> {
let mut bin_dirs: Vec<PathBuf> = vec![]; let mut bin_dirs: Vec<PathBuf> = vec![];
let mut command_name = cmd_name.clone();
if let Some(agent_name) = agent_name { if let Some(agent_name) = agent_name {
command_name = cmd_args[0].clone();
let dir = Config::agent_bin_dir(&agent_name); let dir = Config::agent_bin_dir(&agent_name);
if dir.exists() { if dir.exists() {
bin_dirs.push(dir); bin_dirs.push(dir);
@@ -889,9 +1020,13 @@ pub fn run_llm_function(
let cmd_name = polyfill_cmd_name(&cmd_name, &bin_dirs); let cmd_name = polyfill_cmd_name(&cmd_name, &bin_dirs);
let exit_code = run_command(&cmd_name, &cmd_args, Some(envs)) let exit_code = run_command(&cmd_name, &cmd_args, Some(envs))
.map_err(|err| anyhow!("Unable to run {cmd_name}, {err}"))?; .map_err(|err| anyhow!("Unable to run {command_name}, {err}"))?;
if exit_code != 0 { if exit_code != 0 {
bail!("Tool call exited with {exit_code}"); let tool_error_message =
format!("⚠️ Tool call '{command_name}' threw exit code {exit_code} ⚠️");
println!("{}", warning_text(&tool_error_message));
let tool_error_json = format!("{{\"tool_call_error\":\"{}\"}}", &tool_error_message);
return Ok(Some(tool_error_json));
} }
let mut output = None; let mut output = None;
if temp_file.exists() { if temp_file.exists() {
@@ -920,3 +1055,97 @@ fn polyfill_cmd_name<T: AsRef<Path>>(cmd_name: &str, bin_dir: &[T]) -> String {
} }
cmd_name cmd_name
} }
#[derive(Debug, Clone)]
pub struct ToolCallTracker {
last_calls: VecDeque<ToolCall>,
max_repeats: usize,
chain_len: usize,
}
impl ToolCallTracker {
pub fn new(max_repeats: usize, chain_len: usize) -> Self {
Self {
last_calls: VecDeque::new(),
max_repeats,
chain_len,
}
}
pub fn default() -> Self {
Self::new(2, 3)
}
pub fn check_loop(&self, new_call: &ToolCall) -> Option<String> {
if self.last_calls.len() < self.max_repeats {
return None;
}
if let Some(last) = self.last_calls.back()
&& self.calls_match(last, new_call)
{
let mut repeat_count = 1;
for i in (1..self.last_calls.len()).rev() {
if self.calls_match(&self.last_calls[i - 1], &self.last_calls[i]) {
repeat_count += 1;
if repeat_count >= self.max_repeats {
return Some(self.create_loop_message());
}
} else {
break;
}
}
}
let start = self.last_calls.len().saturating_sub(self.chain_len);
let chain: Vec<_> = self.last_calls.iter().skip(start).collect();
if chain.len() == self.chain_len {
let mut is_repeating = true;
for i in 0..chain.len() - 1 {
if !self.calls_match(chain[i], chain[i + 1]) {
is_repeating = false;
break;
}
}
if is_repeating && self.calls_match(chain[chain.len() - 1], new_call) {
return Some(self.create_loop_message());
}
}
None
}
fn calls_match(&self, a: &ToolCall, b: &ToolCall) -> bool {
a.name == b.name && a.arguments == b.arguments
}
fn create_loop_message(&self) -> String {
let message = r#"{"error":{"message":"⚠️ Tool-call loop detected! ⚠️","code":400,"param":"Use the output of the last call to this function and parameter-set then move on to the next step of workflow, change tools/parameters called, or request assistance in the conversation sream"}}"#;
if self.last_calls.len() >= self.chain_len {
let start = self.last_calls.len().saturating_sub(self.chain_len);
let chain: Vec<_> = self.last_calls.iter().skip(start).collect();
let mut loopset = "[".to_string();
for c in chain {
loopset +=
format!("{{\"name\":{},\"parameters\":{}}},", c.name, c.arguments).as_str();
}
let _ = loopset.pop();
loopset.push(']');
format!(
"{},\"call_history\":{}}}}}",
&message[..(&message.len() - 2)],
loopset
)
} else {
message.to_string()
}
}
pub fn record_call(&mut self, call: ToolCall) {
if self.last_calls.len() >= self.chain_len * self.max_repeats {
self.last_calls.pop_front();
}
self.last_calls.push_back(call);
}
}
+117 -27
View File
@@ -2,6 +2,7 @@ use crate::config::Config;
use crate::utils::{AbortSignal, abortable_run_with_spinner}; use crate::utils::{AbortSignal, abortable_run_with_spinner};
use crate::vault::interpolate_secrets; use crate::vault::interpolate_secrets;
use anyhow::{Context, Result, anyhow}; use anyhow::{Context, Result, anyhow};
use bm25::{Document, Language, SearchEngine, SearchEngineBuilder};
use futures_util::future::BoxFuture; use futures_util::future::BoxFuture;
use futures_util::{StreamExt, TryStreamExt, stream}; use futures_util::{StreamExt, TryStreamExt, stream};
use indoc::formatdoc; use indoc::formatdoc;
@@ -9,7 +10,7 @@ use rmcp::model::{CallToolRequestParam, CallToolResult};
use rmcp::service::RunningService; use rmcp::service::RunningService;
use rmcp::transport::TokioChildProcess; use rmcp::transport::TokioChildProcess;
use rmcp::{RoleClient, ServiceExt}; use rmcp::{RoleClient, ServiceExt};
use serde::Deserialize; use serde::{Deserialize, Serialize};
use serde_json::{Value, json}; use serde_json::{Value, json};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
@@ -20,10 +21,46 @@ use std::sync::Arc;
use tokio::process::Command; use tokio::process::Command;
pub const MCP_INVOKE_META_FUNCTION_NAME_PREFIX: &str = "mcp_invoke"; pub const MCP_INVOKE_META_FUNCTION_NAME_PREFIX: &str = "mcp_invoke";
pub const MCP_LIST_META_FUNCTION_NAME_PREFIX: &str = "mcp_list"; pub const MCP_SEARCH_META_FUNCTION_NAME_PREFIX: &str = "mcp_search";
pub const MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX: &str = "mcp_describe";
type ConnectedServer = RunningService<RoleClient, ()>; type ConnectedServer = RunningService<RoleClient, ()>;
#[derive(Clone, Debug, Default, Serialize)]
pub struct CatalogItem {
pub name: String,
pub server: String,
pub description: String,
}
#[derive(Debug)]
struct ServerCatalog {
engine: SearchEngine<String>,
items: HashMap<String, CatalogItem>,
}
impl ServerCatalog {
pub fn build_bm25(items: &HashMap<String, CatalogItem>) -> SearchEngine<String> {
let docs = items.values().map(|it| {
let contents = format!("{}\n{}\nserver:{}", it.name, it.description, it.server);
Document {
id: it.name.clone(),
contents,
}
});
SearchEngineBuilder::<String>::with_documents(Language::English, docs).build()
}
}
impl Clone for ServerCatalog {
fn clone(&self) -> Self {
Self {
engine: Self::build_bm25(&self.items),
items: self.items.clone(),
}
}
}
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
struct McpServersConfig { struct McpServersConfig {
#[serde(rename = "mcpServers")] #[serde(rename = "mcpServers")]
@@ -50,7 +87,8 @@ enum JsonField {
pub struct McpRegistry { pub struct McpRegistry {
log_path: Option<PathBuf>, log_path: Option<PathBuf>,
config: Option<McpServersConfig>, config: Option<McpServersConfig>,
servers: HashMap<String, Arc<RunningService<RoleClient, ()>>>, servers: HashMap<String, Arc<ConnectedServer>>,
catalogs: HashMap<String, ServerCatalog>,
} }
impl McpRegistry { impl McpRegistry {
@@ -173,7 +211,7 @@ impl McpRegistry {
.collect() .collect()
}; };
let results: Vec<(String, Arc<_>)> = stream::iter( let results: Vec<(String, Arc<_>, ServerCatalog)> = stream::iter(
server_ids server_ids
.into_iter() .into_iter()
.map(|id| async { self.start_server(id).await }), .map(|id| async { self.start_server(id).await }),
@@ -182,13 +220,24 @@ impl McpRegistry {
.try_collect() .try_collect()
.await?; .await?;
self.servers = results.into_iter().collect(); self.servers = results
.clone()
.into_iter()
.map(|(id, server, _)| (id, server))
.collect();
self.catalogs = results
.into_iter()
.map(|(id, _, catalog)| (id, catalog))
.collect();
} }
Ok(()) Ok(())
} }
async fn start_server(&self, id: String) -> Result<(String, Arc<ConnectedServer>)> { async fn start_server(
&self,
id: String,
) -> Result<(String, Arc<ConnectedServer>, ServerCatalog)> {
let server = self let server = self
.config .config
.as_ref() .as_ref()
@@ -231,14 +280,33 @@ impl McpRegistry {
.await .await
.with_context(|| format!("Failed to start MCP server: {}", &server.command))?, .with_context(|| format!("Failed to start MCP server: {}", &server.command))?,
); );
debug!( let tools = service.list_tools(None).await?;
"Available tools for MCP server {id}: {:?}", debug!("Available tools for MCP server {id}: {tools:?}");
service.list_tools(None).await?
); let mut items_vec = Vec::new();
for t in tools.tools {
let name = t.name.to_string();
let description = t.description.unwrap_or_default().to_string();
items_vec.push(CatalogItem {
name,
server: id.clone(),
description,
});
}
let mut items_map = HashMap::new();
items_vec.into_iter().for_each(|it| {
items_map.insert(it.name.clone(), it);
});
let catalog = ServerCatalog {
engine: ServerCatalog::build_bm25(&items_map),
items: items_map,
};
info!("Started MCP server: {id}"); info!("Started MCP server: {id}");
Ok((id.to_string(), service)) Ok((id.to_string(), service, catalog))
} }
pub async fn stop_all_servers(mut self) -> Result<Self> { pub async fn stop_all_servers(mut self) -> Result<Self> {
@@ -268,26 +336,48 @@ impl McpRegistry {
} }
} }
pub fn catalog(&self) -> BoxFuture<'static, Result<Value>> { pub fn search_tools_server(&self, server: &str, query: &str, top_k: usize) -> Vec<CatalogItem> {
let servers: Vec<(String, Arc<ConnectedServer>)> = self let Some(catalog) = self.catalogs.get(server) else {
return vec![];
};
let engine = &catalog.engine;
let raw = engine.search(query, top_k.min(20));
raw.into_iter()
.filter_map(|r| catalog.items.get(&r.document.id))
.take(top_k)
.cloned()
.collect()
}
pub async fn describe(&self, server_id: &str, tool: &str) -> Result<Value> {
let server = self
.servers .servers
.iter() .iter()
.map(|(id, s)| (id.clone(), s.clone())) .filter(|(id, _)| &server_id == id)
.collect(); .map(|(_, s)| s.clone())
.next()
.ok_or(anyhow!("{server_id} MCP server not found in config"))?;
Box::pin(async move { let tool_schema = server
let mut out = Vec::with_capacity(servers.len()); .list_tools(None)
for (id, server) in servers { .await?
let tools = server.list_tools(None).await?; .tools
let resources = server.list_resources(None).await.unwrap_or_default(); .into_iter()
out.push(json!({ .find(|it| it.name == tool)
"server": id, .ok_or(anyhow!(
"tools": tools, "{tool} not found in {server_id} MCP server catalog"
"resources": resources, ))?
})); .input_schema;
Ok(json!({
"type": "object",
"properties": {
"tool": {
"type": "string",
},
"arguments": tool_schema
} }
Ok(Value::Array(out)) }))
})
} }
pub fn invoke( pub fn invoke(