19 Commits

Author SHA1 Message Date
github-actions[bot]
2d4fad596c bump: version 0.1.2 → 0.1.3 [skip ci] 2025-12-13 20:57:37 +00:00
7259e59d2a ci: Prep for 0.1.3 release 2025-12-13 13:38:09 -07:00
cec04c4597 style: Improved error message for un-fully configured MCP configuration 2025-12-13 13:37:01 -07:00
github-actions[bot]
a7f5677195 chore: bump Cargo.toml to 0.1.3 2025-12-13 20:28:10 +00:00
github-actions[bot]
6075f0a190 bump: version 0.1.2 → 0.1.3 [skip ci] 2025-12-13 20:27:58 +00:00
15310a9e2c chore: Updated the models 2025-12-11 09:05:41 -07:00
f7df54f2f7 docs: Removed the warning about MCP token usage since that has been fixed 2025-12-05 12:38:15 -07:00
212d4bace4 docs: Fixed an unclosed backtick typo in the Environment Variables docs 2025-12-05 12:37:59 -07:00
f4b3267c89 docs: Fixed typo in vault readme 2025-12-05 11:05:14 -07:00
9eeeb11871 style: Applied formatting 2025-12-03 15:06:50 -07:00
b8db3f689d Merge branch 'main' of github.com:Dark-Alex-17/loki 2025-12-03 14:57:03 -07:00
3b21ce2aa5 feat: Improved MCP implementation to minimize the tokens needed to utilize it so it doesn't quickly overwhelm the token space for a given model 2025-12-03 12:12:51 -07:00
Alex Clarke
9bf4fcd943 ci: Updated the README to be a bit more clear in some sections 2025-11-26 15:53:54 -07:00
github-actions[bot]
c1f5cfbbda bump: version 0.1.1 → 0.1.2 [skip ci] 2025-11-08 23:13:34 +00:00
46517a4e15 refactor: Gave the GitHub MCP server a default placeholder value that doesn't require the vault 2025-11-08 16:09:32 -07:00
github-actions[bot]
efbe76e1fc bump: version 0.1.1 → 0.1.2 [skip ci] 2025-11-08 23:02:40 +00:00
245c567d30 bug: Removed the github MCP server and slack MCP server from mcp.json so users can just use Loki without any other setup and add more later 2025-11-08 15:59:05 -07:00
Alex Clarke
cbb3d2c34a build: Removed the remaining IDE metadata directories 2025-11-07 18:21:58 -07:00
bddec85fa5 build: Added forgotten IDE configuration directories into my .gitignore 2025-11-07 18:18:32 -07:00
15 changed files with 903 additions and 643 deletions
+2 -1
View File
@@ -2,5 +2,6 @@
/tmp /tmp
/.env /.env
!cli/** !cli/**
/.idea/ .idea/
/loki.iml /loki.iml
/.idea/
-10
View File
@@ -1,10 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Zeppelin ignored files
/ZeppelinRemoteNotebooks/
+12
View File
@@ -1,3 +1,15 @@
## v0.1.3 (2025-12-13)
### Feat
- Improved MCP implementation to minimize the tokens needed to utilize it so it doesn't quickly overwhelm the token space for a given model
## v0.1.2 (2025-11-08)
### Refactor
- Gave the GitHub MCP server a default placeholder value that doesn't require the vault
## v0.1.1 (2025-11-08) ## v0.1.1 (2025-11-08)
## v0.1.0 (2025-11-07) ## v0.1.0 (2025-11-07)
Generated
+364 -279
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,6 +1,6 @@
[package] [package]
name = "loki-ai" name = "loki-ai"
version = "0.1.1" version = "0.1.3"
edition = "2024" edition = "2024"
authors = ["Alex Clarke <alex.j.tusa@gmail.com>"] authors = ["Alex Clarke <alex.j.tusa@gmail.com>"]
description = "An all-in-one, batteries included LLM CLI Tool" description = "An all-in-one, batteries included LLM CLI Tool"
+16 -31
View File
@@ -19,7 +19,6 @@ Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration g
## Quick Links ## Quick Links
* [AIChat Migration Guide](./docs/AICHAT-MIGRATION.md): Coming from AIChat? Follow the migration guide to get started. * [AIChat Migration Guide](./docs/AICHAT-MIGRATION.md): Coming from AIChat? Follow the migration guide to get started.
* [History](#history): A history of how Loki came to be.
* [Installation](#install): Install Loki * [Installation](#install): Install Loki
* [Getting Started](#getting-started): Get started with Loki by doing first-run setup steps. * [Getting Started](#getting-started): Get started with Loki by doing first-run setup steps.
* [REPL](./docs/REPL.md): Interactive Read-Eval-Print Loop for conversational interactions with LLMs and Loki. * [REPL](./docs/REPL.md): Interactive Read-Eval-Print Loop for conversational interactions with LLMs and Loki.
@@ -41,21 +40,7 @@ Coming from [AIChat](https://github.com/sigoden/aichat)? Follow the [migration g
* [Client Configurations](./docs/clients/CLIENTS.md): Configuration instructions for various LLM providers. * [Client Configurations](./docs/clients/CLIENTS.md): Configuration instructions for various LLM providers.
* [Patching API Requests](./docs/clients/PATCHES.md): Learn how to patch API requests for advanced customization. * [Patching API Requests](./docs/clients/PATCHES.md): Learn how to patch API requests for advanced customization.
* [Custom Themes](./docs/THEMES.md): Change the look and feel of Loki to your preferences with custom themes. * [Custom Themes](./docs/THEMES.md): Change the look and feel of Loki to your preferences with custom themes.
* [History](#history): A history of how Loki came to be.
---
## History
Loki originally started as a fork of the fantastic [AIChat CLI](https://github.com/sigoden/aichat). The purpose was to
simply fix a bug in how MCP servers worked with AIChat so that I could specify different ones for agents. However, it
has since evolved far beyond that and become a passion project with a life of its own!
Loki now has first class MCP server support (with support for local and remote servers alike), a built-in vault for
interpolating secrets in configuration files, built-in agents, built-in macros, dynamic tab completions, integrated
custom functions (no `argc` dependency), improved documentation, and much more with many more plans for the future!
The original kudos goes out to all the developers of the wonderful AIChat project!
---
## Prerequisites ## Prerequisites
Loki requires the following tools to be installed on your system: Loki requires the following tools to be installed on your system:
@@ -164,21 +149,6 @@ guide you through the process when you first attempt to access the vault. So, to
loki --list-secrets loki --list-secrets
``` ```
### First Time Setup
In order for Loki to function correctly, you'll need to add a few secrets to the Loki vault so the MCP servers can
function.
**GitHub MCP Server:**
* `GITHUB_PERSONAL_ACCESS_TOKEN` - A GitHub Personal Access Token with `repo` and `workflow` scopes.
See [Creating a GitHub Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens)
#### Add the secrets to the Loki vault
You can add the secrets to the Loki vault using the following commands (First time use will prompt you to create a vault
password file):
```sh
loki --add-secret GITHUB_PERSONAL_ACCESS_TOKEN
```
### Tab-Completions ### Tab-Completions
You can also enable tab completions to make using Loki easier. To do so, add the following to your shell profile: You can also enable tab completions to make using Loki easier. To do so, add the following to your shell profile:
```shell ```shell
@@ -272,5 +242,20 @@ The appearance of Loki can be modified using the following settings:
| `user_agent` | `null` | The name of the `User-Agent` that should be passed in the `User-Agent` header on all requests to model providers | | `user_agent` | `null` | The name of the `User-Agent` that should be passed in the `User-Agent` header on all requests to model providers |
| `save_shell_history` | `true` | Enables or disables REPL command history | | `save_shell_history` | `true` | Enables or disables REPL command history |
---
## History
Loki originally started as a fork of the fantastic [AIChat CLI](https://github.com/sigoden/aichat). The purpose was to
simply fix a bug in how MCP servers worked with AIChat so that I could specify different ones for agents. However, it
has since evolved far beyond that and become a passion project with a life of its own!
Loki now has first class MCP server support (with support for local and remote servers alike), a built-in vault for
interpolating secrets in configuration files, built-in agents, built-in macros, dynamic tab completions, integrated
custom functions (no `argc` dependency), improved documentation, and much more with many more plans for the future!
The original kudos goes out to all the developers of the wonderful AIChat project!
---
## Creator ## Creator
* [Alex Clarke](https://github.com/Dark-Alex-17) * [Alex Clarke](https://github.com/Dark-Alex-17)
+1 -10
View File
@@ -11,21 +11,12 @@
"ghcr.io/github/github-mcp-server" "ghcr.io/github/github-mcp-server"
], ],
"env": { "env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "{{GITHUB_PERSONAL_ACCESS_TOKEN}}" "GITHUB_PERSONAL_ACCESS_TOKEN": "YOUR_GITHUB_TOKEN"
} }
}, },
"docker": { "docker": {
"command": "uvx", "command": "uvx",
"args": ["mcp-server-docker"] "args": ["mcp-server-docker"]
},
"slack": {
"command": "npx",
"args": ["-y", "slack-mcp-server@latest", "--transport", "stdio"],
"env": {
"SLACK_MCP_XOXC_TOKEN": "{{SLACK_MCP_XOXC_TOKEN}}",
"SLACK_MCP_XOXD_TOKEN": "{{SLACK_MCP_XOXD_TOKEN}}",
"SLACK_MCP_ADD_MESSAGE_TOOL": true
}
} }
} }
} }
+2 -2
View File
@@ -3,8 +3,8 @@ Loki originally started as a fork of AIChat but has since evolved into its own s
As a result, there's some changes you'll need to make to your AIChat configuration to be able to use Loki. As a result, there's some changes you'll need to make to your AIChat configuration to be able to use Loki.
Be sure you've followed the [first-time setup steps](../README.md#first-time-setup) so that the Loki configuration Be sure you've run `loki` at least once so that the Loki configuration directory and subdirectories exist and is
directory and subdirectories exist and is populated with the built-in defaults. populated with the built-in defaults.
## Global Configuration File ## Global Configuration File
You should be able to copy/paste your AIChat configuration file into your Loki configuration directory. Since the You should be able to copy/paste your AIChat configuration file into your Loki configuration directory. Since the
+1 -1
View File
@@ -84,7 +84,7 @@ You can also customize the location of full agent configurations using the follo
| Environment Variable | Description | | Environment Variable | Description |
|------------------------------|-------------------------------------------------------------------------------------------------------------------------------------| |------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|
| `<AGENT_NAME>_CONFIG_FILE | Customize the location of the agent's configuration file; e.g. `SQL_CONFIG_FILE` | | `<AGENT_NAME>_CONFIG_FILE` | Customize the location of the agent's configuration file; e.g. `SQL_CONFIG_FILE` |
| `<AGENT_NAME>_MODEL` | Customize the `model` used for the agent; e.g `SQL_MODEL` | | `<AGENT_NAME>_MODEL` | Customize the `model` used for the agent; e.g `SQL_MODEL` |
| `<AGENT_NAME>_TEMPERATURE` | Customize the `temperature` used for the agent; e.g. `SQL_TEMPERATURE` | | `<AGENT_NAME>_TEMPERATURE` | Customize the `temperature` used for the agent; e.g. `SQL_TEMPERATURE` |
| `<AGENT_NAME>_TOP_P` | Customize the `top_p` used for the agent; e.g. `SQL_TOP_P` | | `<AGENT_NAME>_TOP_P` | Customize the `top_p` used for the agent; e.g. `SQL_TOP_P` |
+1 -1
View File
@@ -114,7 +114,7 @@ At the time of writing, the following files support Loki secret injection:
|-------------------------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------| |-------------------------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|
| `config.yaml` | The main Loki configuration file | Cannot use secret injection on the `vault_password_file` field | | `config.yaml` | The main Loki configuration file | Cannot use secret injection on the `vault_password_file` field |
| `functions/mcp.json` | The MCP server configuration file | | | `functions/mcp.json` | The MCP server configuration file | |
| `<agent>/tools.<py/sh>` | Tool files for agents | Specific configuration and only supported for Agents, not all global tools ([see below](#environment-variable-secret-injection-in-agents) | | `<agent>/tools.<py/sh>` | Tool files for agents | Specific configuration and only supported for Agents, not all global tools ([see below](#environment-variable-secret-injection-in-agents)) |
Note that all paths are relative to the Loki configuration directory. The directory varies by system, so you can find yours by Note that all paths are relative to the Loki configuration directory. The directory varies by system, so you can find yours by
+1 -3
View File
@@ -83,9 +83,7 @@ enabled_mcp_servers: null # Which MCP servers to enable by default (e.g.
``` ```
A special note about `enabled_mcp_servers`: a user can set this to `all` to enable all configured MCP servers in the A special note about `enabled_mcp_servers`: a user can set this to `all` to enable all configured MCP servers in the
`functions/mcp.json` configuration. However, **this should be used with caution**. When there is a significant number `functions/mcp.json` configuration.
of configured MCP servers, enabling all MCP servers may overwhelm the context length of a model, and quickly exceed
token limits.
(See the [Configuration Example](../../config.example.yaml) file for an example global configuration with all options.) (See the [Configuration Example](../../config.example.yaml) file for an example global configuration with all options.)
+209 -216
View File
@@ -3,6 +3,20 @@
# - https://platform.openai.com/docs/api-reference/chat # - https://platform.openai.com/docs/api-reference/chat
- provider: openai - provider: openai
models: models:
- name: gpt-5.1
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: gpt-5.1-chat-latest
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: gpt-5 - name: gpt-5
max_input_tokens: 400000 max_input_tokens: 400000
max_output_tokens: 128000 max_output_tokens: 128000
@@ -31,13 +45,6 @@
output_price: 0.4 output_price: 0.4
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: gpt-5-codex
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: gpt-4.1 - name: gpt-4.1
max_input_tokens: 1047576 max_input_tokens: 1047576
max_output_tokens: 32768 max_output_tokens: 32768
@@ -259,6 +266,30 @@
thinking: thinking:
type: enabled type: enabled
budget_tokens: 16000 budget_tokens: 16000
- name: claude-haiku-4-5-20251001
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: claude-haiku-4-5-20251001:thinking
real_name: claude-haiku-4-5-20251001
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
- name: claude-opus-4-1-20250805 - name: claude-opus-4-1-20250805
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -660,6 +691,29 @@
thinking: thinking:
type: enabled type: enabled
budget_tokens: 16000 budget_tokens: 16000
- name: claude-haiku-4-5@20251001
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: claude-haiku-4-5@20251001:thinking
real_name: claude-haiku-4-5@20251001
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
- name: claude-opus-4-1@20250805 - name: claude-opus-4-1@20250805
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -817,6 +871,31 @@
thinking: thinking:
type: enabled type: enabled
budget_tokens: 16000 budget_tokens: 16000
- name: us.anthropic.claude-haiku-4-5-20251001-v1:0
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: us.anthropic.claude-haiku-4-5-20251001-v1:0:thinking
real_name: us.anthropic.claude-haiku-4-5-20251001-v1:0
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
patch:
body:
inferenceConfig:
temperature: null
topP: null
additionalModelRequestFields:
thinking:
type: enabled
budget_tokens: 16000
- name: us.anthropic.claude-opus-4-1-20250805-v1:0 - name: us.anthropic.claude-opus-4-1-20250805-v1:0
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -1004,6 +1083,12 @@
require_max_tokens: true require_max_tokens: true
input_price: 0 input_price: 0
output_price: 0 output_price: 0
- name: '@cf/qwen/qwen3-30b-a3b-fp8'
max_input_tokens: 131072
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/qwen/qwen2.5-coder-32b-instruct' - name: '@cf/qwen/qwen2.5-coder-32b-instruct'
max_input_tokens: 131072 max_input_tokens: 131072
max_output_tokens: 2048 max_output_tokens: 2048
@@ -1030,8 +1115,8 @@
max_batch_size: 100 max_batch_size: 100
# Links: # Links:
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Wm9cvy6rl # - https://cloud.baidu.com/doc/qianfan/s/rmh4stp0j
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Qm9cw2s7m # - https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya
- provider: ernie - provider: ernie
models: models:
- name: ernie-4.5-turbo-128k - name: ernie-4.5-turbo-128k
@@ -1043,8 +1128,12 @@
input_price: 0.42 input_price: 0.42
output_price: 1.26 output_price: 1.26
supports_vision: true supports_vision: true
- name: ernie-x1-turbo-32k - name: ernie-5.0-thinking-preview
max_input_tokens: 32768 max_input_tokens: 131072
input_price: 1.4
output_price: 5.6
- name: ernie-x1.1-preview
max_input_tokens: 65536
input_price: 0.14 input_price: 0.14
output_price: 0.56 output_price: 0.56
- name: bge-large-zh - name: bge-large-zh
@@ -1064,75 +1153,31 @@
max_input_tokens: 1024 max_input_tokens: 1024
input_price: 0.07 input_price: 0.07
# Links: # Links:
# - https://help.aliyun.com/zh/model-studio/getting-started/models # - https://help.aliyun.com/zh/model-studio/getting-started/models
# - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api # - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api
- provider: qianwen - provider: qianwen
models: models:
- name: qwen-max-latest
max_input_tokens: 32678
max_output_tokens: 8192
input_price: 1.6
output_price: 6.4
supports_function_calling: true
- name: qwen-plus-latest
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.112
output_price: 0.28
supports_function_calling: true
- name: qwen-turbo-latest
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 0.042
output_price: 0.084
supports_function_calling: true
- name: qwen-long
max_input_tokens: 1000000
input_price: 0.07
output_price: 0.28
- name: qwen-omni-turbo-latest
max_input_tokens: 32768
max_output_tokens: 2048
supports_vision: true
- name: qwen-coder-plus-latest
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.49
output_price: 0.98
- name: qwen-coder-turbo-latest
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.28
output_price: 0.84
- name: qwen-vl-max-latest
max_input_tokens: 30720
max_output_tokens: 2048
input_price: 0.42
output_price: 1.26
supports_vision: true
- name: qwen-vl-plus-latest
max_input_tokens: 30000
max_output_tokens: 2048
input_price: 0.21
output_price: 0.63
supports_vision: true
- name: qwen3-max - name: qwen3-max
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 2.1 supports_function_calling: true
output_price: 8.4 - name: qwen-plus
max_input_tokens: 131072
supports_function_calling: true
- name: qwen-flash
max_input_tokens: 1000000
supports_function_calling: true supports_function_calling: true
- name: qwen3-vl-plus - name: qwen3-vl-plus
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.42
output_price: 4.2
supports_vision: true supports_vision: true
- name: qwen3-max-preview - name: qwen3-vl-flash
max_input_tokens: 262144 max_input_tokens: 262144
max_output_tokens: 32768 supports_vision: true
input_price: 1.4 - name: qwen-coder-plus
output_price: 5.6 max_input_tokens: 1000000
supports_function_calling: true - name: qwen-coder-flash
max_input_tokens: 1000000
- name: qwen3-next-80b-a3b-instruct - name: qwen3-next-80b-a3b-instruct
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.14 input_price: 0.14
@@ -1160,6 +1205,16 @@
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.105 input_price: 0.105
output_price: 1.05 output_price: 1.05
- name: qwen3-vl-32b-instruct
max_input_tokens: 131072
input_price: 0.28
output_price: 1.12
supports_vision: true
- name: qwen3-vl-8b-instruct
max_input_tokens: 131072
input_price: 0.07
output_price: 0.28
supports_vision: true
- name: qwen3-coder-480b-a35b-instruct - name: qwen3-coder-480b-a35b-instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 1.26 input_price: 1.26
@@ -1168,32 +1223,10 @@
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.315 input_price: 0.315
output_price: 1.26 output_price: 1.26
- name: qwen2.5-72b-instruct - name: deepseek-v3.2-exp
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.56
output_price: 1.68
supports_function_calling: true
- name: qwen2.5-vl-72b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 2.24
output_price: 6.72
supports_vision: true
- name: qwen2.5-coder-32b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.49
output_price: 0.98
supports_function_calling: true
- name: deepseek-v3.1
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.28 input_price: 0.28
output_price: 1.12 output_price: 0.42
- name: deepseek-r1-0528
max_input_tokens: 65536
input_price: 0.28
output_price: 1.12
- name: text-embedding-v4 - name: text-embedding-v4
type: embedding type: embedding
input_price: 0.1 input_price: 0.1
@@ -1247,10 +1280,10 @@
# - https://platform.moonshot.cn/docs/api/chat#%E5%85%AC%E5%BC%80%E7%9A%84%E6%9C%8D%E5%8A%A1%E5%9C%B0%E5%9D%80 # - https://platform.moonshot.cn/docs/api/chat#%E5%85%AC%E5%BC%80%E7%9A%84%E6%9C%8D%E5%8A%A1%E5%9C%B0%E5%9D%80
- provider: moonshot - provider: moonshot
models: models:
- name: kimi-latest - name: kimi-k2-turbo-preview
max_input_tokens: 131072 max_input_tokens: 262144
input_price: 1.4 input_price: 1.12
output_price: 4.2 output_price: 8.12
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: kimi-k2-0905-preview - name: kimi-k2-0905-preview
@@ -1259,16 +1292,15 @@
output_price: 2.24 output_price: 2.24
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: kimi-k2-turbo-preview - name: kimi-k2-thinking-turbo
max_input_tokens: 131072 max_input_tokens: 262144
input_price: 1.12 input_price: 1.12
output_price: 4.48 output_price: 8.12
supports_vision: true supports_vision: true
supports_function_calling: true - name: kimi-k2-thinking
- name: kimi-thinking-preview max_input_tokens: 262144
max_input_tokens: 131072 input_price: 0.56
input_price: 28 output_price: 2.24
output_price: 28
supports_vision: true supports_vision: true
# Links: # Links:
@@ -1293,7 +1325,7 @@
# - https://open.bigmodel.cn/dev/api#glm-4 # - https://open.bigmodel.cn/dev/api#glm-4
- provider: zhipuai - provider: zhipuai
models: models:
- name: glm-4.5 - name: glm-4.6
max_input_tokens: 202752 max_input_tokens: 202752
input_price: 0.28 input_price: 0.28
output_price: 1.12 output_price: 1.12
@@ -1353,25 +1385,35 @@
input_price: 0.112 input_price: 0.112
# Links: # Links:
# - https://platform.minimaxi.com/document/pricing # - https://platform.minimaxi.com/docs/guides/pricing
# - https://platform.minimaxi.com/document/ChatCompletion%20v2 # - https://platform.minimaxi.com/document/ChatCompletion%20v2
- provider: minimax - provider: minimax
models: models:
- name: minimax-text-01 - name: minimax-m2
max_input_tokens: 1000192 max_input_tokens: 204800
input_price: 0.14 input_price: 0.294
output_price: 1.12 output_price: 1.176
supports_vision: true supports_function_calling: true
- name: minimax-m1
max_input_tokens: 131072
input_price: 0.112
output_price: 1.12
# Links: # Links:
# - https://openrouter.ai/models # - https://openrouter.ai/models
# - https://openrouter.ai/docs/api-reference/chat-completion # - https://openrouter.ai/docs/api-reference/chat-completion
- provider: openrouter - provider: openrouter
models: models:
- name: openai/gpt-5.1
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/gpt-5.1-chat
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/gpt-5 - name: openai/gpt-5
max_input_tokens: 400000 max_input_tokens: 400000
max_output_tokens: 128000 max_output_tokens: 128000
@@ -1400,13 +1442,6 @@
output_price: 0.4 output_price: 0.4
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: openai/gpt-5-codex
max_input_tokens: 400000
max_output_tokens: 128000
input_price: 1.25
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4.1 - name: openai/gpt-4.1
max_input_tokens: 1047576 max_input_tokens: 1047576
max_output_tokens: 32768 max_output_tokens: 32768
@@ -1563,6 +1598,14 @@
output_price: 15 output_price: 15
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: anthropic/claude-haiku-4.5
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 1
output_price: 5
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-opus-4.1 - name: anthropic/claude-opus-4.1
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 8192 max_output_tokens: 8192
@@ -1696,11 +1739,10 @@
patch: patch:
body: body:
include_reasoning: true include_reasoning: true
- name: qwen/qwen-max - name: qwen/qwen3-max
max_input_tokens: 32768 max_input_tokens: 262144
max_output_tokens: 8192 input_price: 1.2
input_price: 1.6 output_price: 6
output_price: 6.4
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen-plus - name: qwen/qwen-plus
max_input_tokens: 131072 max_input_tokens: 131072
@@ -1708,22 +1750,6 @@
input_price: 0.4 input_price: 0.4
output_price: 1.2 output_price: 1.2
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen-turbo
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 0.05
output_price: 0.2
supports_function_calling: true
- name: qwen/qwen-vl-plus
max_input_tokens: 7500
input_price: 0.21
output_price: 0.63
supports_vision: true
- name: qwen/qwen3-max
max_input_tokens: 262144
input_price: 1.2
output_price: 6
supports_function_calling: true
- name: qwen/qwen3-next-80b-a3b-instruct - name: qwen/qwen3-next-80b-a3b-instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.1 input_price: 0.1
@@ -1733,7 +1759,7 @@
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.1 input_price: 0.1
output_price: 0.8 output_price: 0.8
- name: qwen/qwen3-235b-a22b-2507 - name: qwen/qwen5-235b-a22b-2507 # Qwen3 235B A22B Instruct 2507
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.12 input_price: 0.12
output_price: 0.59 output_price: 0.59
@@ -1750,6 +1776,16 @@
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.071 input_price: 0.071
output_price: 0.285 output_price: 0.285
- name: qwen/qwen3-vl-32b-instruct
max_input_tokens: 262144
input_price: 0.35
output_price: 1.1
supports_vision: true
- name: qwen/qwen3-vl-8b-instruct
max_input_tokens: 262144
input_price: 0.08
output_price: 0.50
supports_vision: true
- name: qwen/qwen3-coder-plus - name: qwen/qwen3-coder-plus
max_input_tokens: 128000 max_input_tokens: 128000
input_price: 1 input_price: 1
@@ -1760,30 +1796,26 @@
input_price: 0.3 input_price: 0.3
output_price: 1.5 output_price: 1.5
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen3-coder # Qwen3 Coder 480B A35B
max_input_tokens: 262144
input_price: 0.22
output_price: 0.95
supports_function_calling: true
- name: qwen/qwen3-coder-30b-a3b-instruct - name: qwen/qwen3-coder-30b-a3b-instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.052 input_price: 0.052
output_price: 0.207 output_price: 0.207
supports_function_calling: true supports_function_calling: true
- name: qwen/qwen-2.5-72b-instruct
max_input_tokens: 131072
input_price: 0.35
output_price: 0.4
supports_function_calling: true
- name: qwen/qwen2.5-vl-72b-instruct
max_input_tokens: 32000
input_price: 0.7
output_price: 0.7
supports_vision: true
- name: qwen/qwen-2.5-coder-32b-instruct
max_input_tokens: 32768
input_price: 0.18
output_price: 0.18
- name: moonshotai/kimi-k2-0905 - name: moonshotai/kimi-k2-0905
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.296 input_price: 0.296
output_price: 1.185 output_price: 1.185
supports_function_calling: true supports_function_calling: true
- name: moonshotai/kimi-k2-thinking
max_input_tokens: 262144
input_price: 0.45
output_price: 2.35
supports_function_calling: true
- name: moonshotai/kimi-dev-72b - name: moonshotai/kimi-dev-72b
max_input_tokens: 131072 max_input_tokens: 131072
input_price: 0.29 input_price: 0.29
@@ -1804,6 +1836,11 @@
input_price: 0.2 input_price: 0.2
output_price: 1.5 output_price: 1.5
supports_function_calling: true supports_function_calling: true
- name: amazon/nova-premier-v1
max_input_tokens: 1000000
input_price: 2.5
output_price: 12.5
supports_vision: true
- name: amazon/nova-pro-v1 - name: amazon/nova-pro-v1
max_input_tokens: 300000 max_input_tokens: 300000
max_output_tokens: 5120 max_output_tokens: 5120
@@ -1850,29 +1887,15 @@
patch: patch:
body: body:
include_reasoning: true include_reasoning: true
- name: minimax/minimax-01 - name: minimax/minimax-m2
max_input_tokens: 1000192 max_input_tokens: 196608
input_price: 0.2 input_price: 0.15
output_price: 1.1 output_price: 0.45
- name: z-ai/glm-4.6 - name: z-ai/glm-4.6
max_input_tokens: 202752 max_input_tokens: 202752
input_price: 0.5 input_price: 0.5
output_price: 1.75 output_price: 1.75
supports_function_calling: true supports_function_calling: true
- name: z-ai/glm-4.5
max_input_tokens: 131072
input_price: 0.2
output_price: 0.2
supports_function_calling: true
- name: z-ai/glm-4.5-air
max_input_tokens: 131072
input_price: 0.2
output_price: 1.1
- name: z-ai/glm-4.5v
max_input_tokens: 65536
input_price: 0.5
output_price: 1.7
supports_vision: true
# Links: # Links:
# - https://github.com/marketplace?type=models # - https://github.com/marketplace?type=models
@@ -2068,10 +2091,6 @@
input_price: 0.08 input_price: 0.08
output_price: 0.3 output_price: 0.3
supports_vision: true supports_vision: true
- name: meta-llama/Llama-3.3-70B-Instruct
max_input_tokens: 131072
input_price: 0.23
output_price: 0.40
- name: Qwen/Qwen3-Next-80B-A3B-Instruct - name: Qwen/Qwen3-Next-80B-A3B-Instruct
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.14 input_price: 0.14
@@ -2100,27 +2119,15 @@
input_price: 0.07 input_price: 0.07
output_price: 0.27 output_price: 0.27
supports_function_calling: true supports_function_calling: true
- name: Qwen/Qwen3-235B-A22B
max_input_tokens: 40960
input_price: 0.15
output_price: 0.6
- name: Qwen/Qwen3-30B-A3B - name: Qwen/Qwen3-30B-A3B
max_input_tokens: 40960 max_input_tokens: 40960
input_price: 0.1 input_price: 0.1
output_price: 0.3 output_price: 0.3
- name: Qwen/Qwen3-32B - name: Qwen/Qwen3-VL-8B-Instruct
max_input_tokens: 40960 max_input_tokens: 262144
input_price: 0.1 input_price: 0.18
output_price: 0.3 output_price: 0.69
- name: Qwen/Qwen2.5-72B-Instruct supports_vision: true
max_input_tokens: 32768
input_price: 0.23
output_price: 0.40
supports_function_calling: true
- name: Qwen/Qwen2.5-Coder-32B-Instruct
max_input_tokens: 32768
input_price: 0.07
output_price: 0.16
- name: deepseek-ai/DeepSeek-V3.2-Exp - name: deepseek-ai/DeepSeek-V3.2-Exp
max_input_tokens: 163840 max_input_tokens: 163840
input_price: 0.27 input_price: 0.27
@@ -2145,35 +2152,21 @@
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.06 input_price: 0.06
output_price: 0.12 output_price: 0.12
- name: mistralai/Devstral-Small-2507
max_input_tokens: 131072
input_price: 0.07
output_price: 0.28
- name: moonshotai/Kimi-K2-Instruct-0905 - name: moonshotai/Kimi-K2-Instruct-0905
max_input_tokens: 262144 max_input_tokens: 262144
input_price: 0.5 input_price: 0.5
output_price: 2.0 output_price: 2.0
supports_function_calling: true supports_function_calling: true
- name: moonshotai/Kimi-K2-Thinking
max_input_tokens: 262144
input_price: 0.55
output_price: 2.5
supports_function_calling: true
- name: zai-org/GLM-4.6 - name: zai-org/GLM-4.6
max_input_tokens: 202752 max_input_tokens: 202752
input_price: 0.6 input_price: 0.6
output_price: 1.9 output_price: 1.9
supports_function_calling: true supports_function_calling: true
- name: zai-org/GLM-4.5
max_input_tokens: 131072
input_price: 0.55
output_price: 2.0
supports_function_calling: true
- name: zai-org/GLM-4.5-Air
max_input_tokens: 131072
input_price: 0.2
output_price: 1.1
supports_function_calling: true
- name: zai-org/GLM-4.5V
max_input_tokens: 65536
input_price: 0.5
output_price: 1.7
supports_vision: true
- name: BAAI/bge-large-en-v1.5 - name: BAAI/bge-large-en-v1.5
type: embedding type: embedding
input_price: 0.01 input_price: 0.01
+23 -10
View File
@@ -24,7 +24,8 @@ use crate::utils::*;
use crate::config::macros::Macro; use crate::config::macros::Macro;
use crate::mcp::{ use crate::mcp::{
MCP_INVOKE_META_FUNCTION_NAME_PREFIX, MCP_LIST_META_FUNCTION_NAME_PREFIX, McpRegistry, MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX, MCP_INVOKE_META_FUNCTION_NAME_PREFIX,
MCP_SEARCH_META_FUNCTION_NAME_PREFIX, McpRegistry,
}; };
use crate::vault::{GlobalVault, Vault, create_vault_password_file, interpolate_secrets}; use crate::vault::{GlobalVault, Vault, create_vault_password_file, interpolate_secrets};
use anyhow::{Context, Result, anyhow, bail}; use anyhow::{Context, Result, anyhow, bail};
@@ -799,7 +800,7 @@ impl Config {
|| s == "all" || s == "all"
}) { }) {
bail!( bail!(
"Some of the specified MCP servers in 'enabled_mcp_servers' are configured. Please check your MCP server configuration." "Some of the specified MCP servers in 'enabled_mcp_servers' are not fully configured. Please check your MCP server configuration."
); );
} }
} }
@@ -1972,7 +1973,8 @@ impl Config {
.iter() .iter()
.filter(|v| { .filter(|v| {
!v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) !v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) && !v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.map(|v| v.name.to_string()) .map(|v| v.name.to_string())
.collect(); .collect();
@@ -2015,7 +2017,8 @@ impl Config {
.into_iter() .into_iter()
.filter(|v| { .filter(|v| {
!v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) !v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) && !v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
&& !v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.collect(); .collect();
let tool_names: HashSet<String> = agent_functions let tool_names: HashSet<String> = agent_functions
@@ -2051,7 +2054,8 @@ impl Config {
.iter() .iter()
.filter(|v| { .filter(|v| {
v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) || v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.map(|v| v.name.to_string()) .map(|v| v.name.to_string())
.collect(); .collect();
@@ -2062,8 +2066,10 @@ impl Config {
let item = item.trim(); let item = item.trim();
let item_invoke_name = let item_invoke_name =
format!("{}_{item}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX); format!("{}_{item}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX);
let item_list_name = let item_search_name =
format!("{}_{item}", MCP_LIST_META_FUNCTION_NAME_PREFIX); format!("{}_{item}", MCP_SEARCH_META_FUNCTION_NAME_PREFIX);
let item_describe_name =
format!("{}_{item}", MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX);
if let Some(values) = self.mapping_mcp_servers.get(item) { if let Some(values) = self.mapping_mcp_servers.get(item) {
server_names.extend( server_names.extend(
values values
@@ -2077,7 +2083,12 @@ impl Config {
), ),
format!( format!(
"{}_{}", "{}_{}",
MCP_LIST_META_FUNCTION_NAME_PREFIX, MCP_SEARCH_META_FUNCTION_NAME_PREFIX,
v.to_string()
),
format!(
"{}_{}",
MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX,
v.to_string() v.to_string()
), ),
] ]
@@ -2086,7 +2097,8 @@ impl Config {
) )
} else if mcp_declaration_names.contains(&item_invoke_name) { } else if mcp_declaration_names.contains(&item_invoke_name) {
server_names.insert(item_invoke_name); server_names.insert(item_invoke_name);
server_names.insert(item_list_name); server_names.insert(item_search_name);
server_names.insert(item_describe_name);
} }
} }
} }
@@ -2112,7 +2124,8 @@ impl Config {
.into_iter() .into_iter()
.filter(|v| { .filter(|v| {
v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) v.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) || v.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
|| v.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}) })
.collect(); .collect();
let tool_names: HashSet<String> = agent_functions let tool_names: HashSet<String> = agent_functions
+150 -48
View File
@@ -4,7 +4,10 @@ use crate::{
}; };
use crate::config::ensure_parent_exists; use crate::config::ensure_parent_exists;
use crate::mcp::{MCP_INVOKE_META_FUNCTION_NAME_PREFIX, MCP_LIST_META_FUNCTION_NAME_PREFIX}; use crate::mcp::{
MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX, MCP_INVOKE_META_FUNCTION_NAME_PREFIX,
MCP_SEARCH_META_FUNCTION_NAME_PREFIX,
};
use crate::parsers::{bash, python}; use crate::parsers::{bash, python};
use anyhow::{Context, Result, anyhow, bail}; use anyhow::{Context, Result, anyhow, bail};
use indexmap::IndexMap; use indexmap::IndexMap;
@@ -247,19 +250,13 @@ impl Functions {
pub fn clear_mcp_meta_functions(&mut self) { pub fn clear_mcp_meta_functions(&mut self) {
self.declarations.retain(|d| { self.declarations.retain(|d| {
!d.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) !d.name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX)
&& !d.name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) && !d.name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX)
&& !d.name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX)
}); });
} }
pub fn append_mcp_meta_functions(&mut self, mcp_servers: Vec<String>) { pub fn append_mcp_meta_functions(&mut self, mcp_servers: Vec<String>) {
let mut invoke_function_properties = IndexMap::new(); let mut invoke_function_properties = IndexMap::new();
invoke_function_properties.insert(
"server".to_string(),
JsonSchema {
type_value: Some("string".to_string()),
..Default::default()
},
);
invoke_function_properties.insert( invoke_function_properties.insert(
"tool".to_string(), "tool".to_string(),
JsonSchema { JsonSchema {
@@ -275,32 +272,86 @@ impl Functions {
}, },
); );
let mut search_function_properties = IndexMap::new();
search_function_properties.insert(
"query".to_string(),
JsonSchema {
type_value: Some("string".to_string()),
description: Some("Generalized explanation of what you want to do".into()),
..Default::default()
},
);
search_function_properties.insert(
"top_k".to_string(),
JsonSchema {
type_value: Some("integer".to_string()),
description: Some("How many results to return, between 1 and 20".into()),
default: Some(Value::from(8usize)),
..Default::default()
},
);
let mut describe_function_properties = IndexMap::new();
describe_function_properties.insert(
"tool".to_string(),
JsonSchema {
type_value: Some("string".to_string()),
description: Some("The name of the tool; e.g., search_issues".into()),
..Default::default()
},
);
for server in mcp_servers { for server in mcp_servers {
let search_function_name = format!("{}_{server}", MCP_SEARCH_META_FUNCTION_NAME_PREFIX);
let describe_function_name =
format!("{}_{server}", MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX);
let invoke_function_name = format!("{}_{server}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX); let invoke_function_name = format!("{}_{server}", MCP_INVOKE_META_FUNCTION_NAME_PREFIX);
let invoke_function_declaration = FunctionDeclaration { let invoke_function_declaration = FunctionDeclaration {
name: invoke_function_name.clone(), name: invoke_function_name.clone(),
description: formatdoc!( description: formatdoc!(
r#" r#"
Invoke the specified tool on the {server} MCP server. Always call {invoke_function_name} first to find the Invoke the specified tool on the {server} MCP server. Always call {describe_function_name} first to
correct names of tools before calling '{invoke_function_name}'. find the correct invocation schema for the given tool.
"# "#
), ),
parameters: JsonSchema { parameters: JsonSchema {
type_value: Some("object".to_string()), type_value: Some("object".to_string()),
properties: Some(invoke_function_properties.clone()), properties: Some(invoke_function_properties.clone()),
required: Some(vec!["server".to_string(), "tool".to_string()]), required: Some(vec!["tool".to_string()]),
..Default::default() ..Default::default()
}, },
agent: false, agent: false,
}; };
let list_functions_declaration = FunctionDeclaration { let search_functions_declaration = FunctionDeclaration {
name: format!("{}_{}", MCP_LIST_META_FUNCTION_NAME_PREFIX, server), name: search_function_name.clone(),
description: format!("List all the available tools for the {server} MCP server"), description: formatdoc!(
parameters: JsonSchema::default(), r#"
Find candidate tools by keywords for the {server} MCP server. Returns small suggestions; fetch
schemas with {describe_function_name}.
"#
),
parameters: JsonSchema {
type_value: Some("object".to_string()),
properties: Some(search_function_properties.clone()),
required: Some(vec!["query".to_string()]),
..Default::default()
},
agent: false,
};
let describe_functions_declaration = FunctionDeclaration {
name: describe_function_name.clone(),
description: "Get the full JSON schema for exactly one MCP tool.".to_string(),
parameters: JsonSchema {
type_value: Some("object".to_string()),
properties: Some(describe_function_properties.clone()),
required: Some(vec!["tool".to_string()]),
..Default::default()
},
agent: false, agent: false,
}; };
self.declarations.push(invoke_function_declaration); self.declarations.push(invoke_function_declaration);
self.declarations.push(list_functions_declaration); self.declarations.push(search_functions_declaration);
self.declarations.push(describe_functions_declaration);
} }
} }
@@ -771,39 +822,14 @@ impl ToolCall {
} }
let output = match cmd_name.as_str() { let output = match cmd_name.as_str() {
_ if cmd_name.starts_with(MCP_LIST_META_FUNCTION_NAME_PREFIX) => { _ if cmd_name.starts_with(MCP_SEARCH_META_FUNCTION_NAME_PREFIX) => {
let registry_arc = { Self::search_mcp_tools(config, &cmd_name, &json_data)?
let cfg = config.read(); }
cfg.mcp_registry _ if cmd_name.starts_with(MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX) => {
.clone() Self::describe_mcp_tool(config, &cmd_name, json_data).await?
.with_context(|| "MCP is not configured")?
};
registry_arc.catalog().await?
} }
_ if cmd_name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) => { _ if cmd_name.starts_with(MCP_INVOKE_META_FUNCTION_NAME_PREFIX) => {
let server = json_data Self::invoke_mcp_tool(config, &cmd_name, &json_data).await?
.get("server")
.ok_or_else(|| anyhow!("Missing 'server' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'server' in arguments"))?;
let tool = json_data
.get("tool")
.ok_or_else(|| anyhow!("Missing 'tool' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'tool' in arguments"))?;
let arguments = json_data
.get("arguments")
.cloned()
.unwrap_or_else(|| json!({}));
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let result = registry_arc.invoke(server, tool, arguments).await?;
serde_json::to_value(result)?
} }
_ => match run_llm_function(cmd_name, cmd_args, envs, agent_name)? { _ => match run_llm_function(cmd_name, cmd_args, envs, agent_name)? {
Some(contents) => serde_json::from_str(&contents) Some(contents) => serde_json::from_str(&contents)
@@ -816,6 +842,82 @@ impl ToolCall {
Ok(output) Ok(output)
} }
async fn describe_mcp_tool(
config: &GlobalConfig,
cmd_name: &str,
json_data: Value,
) -> Result<Value> {
let server_id = cmd_name.replace(&format!("{MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX}_"), "");
let tool = json_data
.get("tool")
.ok_or_else(|| anyhow!("Missing 'tool' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'tool' in arguments"))?;
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let result = registry_arc.describe(&server_id, tool).await?;
Ok(serde_json::to_value(result)?)
}
fn search_mcp_tools(config: &GlobalConfig, cmd_name: &str, json_data: &Value) -> Result<Value> {
let server = cmd_name.replace(&format!("{MCP_SEARCH_META_FUNCTION_NAME_PREFIX}_"), "");
let query = json_data
.get("query")
.ok_or_else(|| anyhow!("Missing 'query' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'query' in arguments"))?;
let top_k = json_data
.get("top_k")
.cloned()
.unwrap_or_else(|| Value::from(8u64))
.as_u64()
.ok_or_else(|| anyhow!("Invalid 'top_k' in arguments"))? as usize;
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let catalog_items = registry_arc
.search_tools_server(&server, query, top_k)
.into_iter()
.map(|it| serde_json::to_value(&it).unwrap_or_default())
.collect();
Ok(Value::Array(catalog_items))
}
async fn invoke_mcp_tool(
config: &GlobalConfig,
cmd_name: &str,
json_data: &Value,
) -> Result<Value> {
let server = cmd_name.replace(&format!("{MCP_INVOKE_META_FUNCTION_NAME_PREFIX}_"), "");
let tool = json_data
.get("tool")
.ok_or_else(|| anyhow!("Missing 'tool' in arguments"))?
.as_str()
.ok_or_else(|| anyhow!("Invalid 'tool' in arguments"))?;
let arguments = json_data
.get("arguments")
.cloned()
.unwrap_or_else(|| json!({}));
let registry_arc = {
let cfg = config.read();
cfg.mcp_registry
.clone()
.with_context(|| "MCP is not configured")?
};
let result = registry_arc.invoke(&server, tool, arguments).await?;
Ok(serde_json::to_value(result)?)
}
fn extract_call_config_from_agent( fn extract_call_config_from_agent(
&self, &self,
config: &GlobalConfig, config: &GlobalConfig,
+117 -27
View File
@@ -2,6 +2,7 @@ use crate::config::Config;
use crate::utils::{AbortSignal, abortable_run_with_spinner}; use crate::utils::{AbortSignal, abortable_run_with_spinner};
use crate::vault::interpolate_secrets; use crate::vault::interpolate_secrets;
use anyhow::{Context, Result, anyhow}; use anyhow::{Context, Result, anyhow};
use bm25::{Document, Language, SearchEngine, SearchEngineBuilder};
use futures_util::future::BoxFuture; use futures_util::future::BoxFuture;
use futures_util::{StreamExt, TryStreamExt, stream}; use futures_util::{StreamExt, TryStreamExt, stream};
use indoc::formatdoc; use indoc::formatdoc;
@@ -9,7 +10,7 @@ use rmcp::model::{CallToolRequestParam, CallToolResult};
use rmcp::service::RunningService; use rmcp::service::RunningService;
use rmcp::transport::TokioChildProcess; use rmcp::transport::TokioChildProcess;
use rmcp::{RoleClient, ServiceExt}; use rmcp::{RoleClient, ServiceExt};
use serde::Deserialize; use serde::{Deserialize, Serialize};
use serde_json::{Value, json}; use serde_json::{Value, json};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
@@ -20,10 +21,46 @@ use std::sync::Arc;
use tokio::process::Command; use tokio::process::Command;
pub const MCP_INVOKE_META_FUNCTION_NAME_PREFIX: &str = "mcp_invoke"; pub const MCP_INVOKE_META_FUNCTION_NAME_PREFIX: &str = "mcp_invoke";
pub const MCP_LIST_META_FUNCTION_NAME_PREFIX: &str = "mcp_list"; pub const MCP_SEARCH_META_FUNCTION_NAME_PREFIX: &str = "mcp_search";
pub const MCP_DESCRIBE_META_FUNCTION_NAME_PREFIX: &str = "mcp_describe";
type ConnectedServer = RunningService<RoleClient, ()>; type ConnectedServer = RunningService<RoleClient, ()>;
#[derive(Clone, Debug, Default, Serialize)]
pub struct CatalogItem {
pub name: String,
pub server: String,
pub description: String,
}
#[derive(Debug)]
struct ServerCatalog {
engine: SearchEngine<String>,
items: HashMap<String, CatalogItem>,
}
impl ServerCatalog {
pub fn build_bm25(items: &HashMap<String, CatalogItem>) -> SearchEngine<String> {
let docs = items.values().map(|it| {
let contents = format!("{}\n{}\nserver:{}", it.name, it.description, it.server);
Document {
id: it.name.clone(),
contents,
}
});
SearchEngineBuilder::<String>::with_documents(Language::English, docs).build()
}
}
impl Clone for ServerCatalog {
fn clone(&self) -> Self {
Self {
engine: Self::build_bm25(&self.items),
items: self.items.clone(),
}
}
}
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
struct McpServersConfig { struct McpServersConfig {
#[serde(rename = "mcpServers")] #[serde(rename = "mcpServers")]
@@ -50,7 +87,8 @@ enum JsonField {
pub struct McpRegistry { pub struct McpRegistry {
log_path: Option<PathBuf>, log_path: Option<PathBuf>,
config: Option<McpServersConfig>, config: Option<McpServersConfig>,
servers: HashMap<String, Arc<RunningService<RoleClient, ()>>>, servers: HashMap<String, Arc<ConnectedServer>>,
catalogs: HashMap<String, ServerCatalog>,
} }
impl McpRegistry { impl McpRegistry {
@@ -173,7 +211,7 @@ impl McpRegistry {
.collect() .collect()
}; };
let results: Vec<(String, Arc<_>)> = stream::iter( let results: Vec<(String, Arc<_>, ServerCatalog)> = stream::iter(
server_ids server_ids
.into_iter() .into_iter()
.map(|id| async { self.start_server(id).await }), .map(|id| async { self.start_server(id).await }),
@@ -182,13 +220,24 @@ impl McpRegistry {
.try_collect() .try_collect()
.await?; .await?;
self.servers = results.into_iter().collect(); self.servers = results
.clone()
.into_iter()
.map(|(id, server, _)| (id, server))
.collect();
self.catalogs = results
.into_iter()
.map(|(id, _, catalog)| (id, catalog))
.collect();
} }
Ok(()) Ok(())
} }
async fn start_server(&self, id: String) -> Result<(String, Arc<ConnectedServer>)> { async fn start_server(
&self,
id: String,
) -> Result<(String, Arc<ConnectedServer>, ServerCatalog)> {
let server = self let server = self
.config .config
.as_ref() .as_ref()
@@ -231,14 +280,33 @@ impl McpRegistry {
.await .await
.with_context(|| format!("Failed to start MCP server: {}", &server.command))?, .with_context(|| format!("Failed to start MCP server: {}", &server.command))?,
); );
debug!( let tools = service.list_tools(None).await?;
"Available tools for MCP server {id}: {:?}", debug!("Available tools for MCP server {id}: {tools:?}");
service.list_tools(None).await?
); let mut items_vec = Vec::new();
for t in tools.tools {
let name = t.name.to_string();
let description = t.description.unwrap_or_default().to_string();
items_vec.push(CatalogItem {
name,
server: id.clone(),
description,
});
}
let mut items_map = HashMap::new();
items_vec.into_iter().for_each(|it| {
items_map.insert(it.name.clone(), it);
});
let catalog = ServerCatalog {
engine: ServerCatalog::build_bm25(&items_map),
items: items_map,
};
info!("Started MCP server: {id}"); info!("Started MCP server: {id}");
Ok((id.to_string(), service)) Ok((id.to_string(), service, catalog))
} }
pub async fn stop_all_servers(mut self) -> Result<Self> { pub async fn stop_all_servers(mut self) -> Result<Self> {
@@ -268,26 +336,48 @@ impl McpRegistry {
} }
} }
pub fn catalog(&self) -> BoxFuture<'static, Result<Value>> { pub fn search_tools_server(&self, server: &str, query: &str, top_k: usize) -> Vec<CatalogItem> {
let servers: Vec<(String, Arc<ConnectedServer>)> = self let Some(catalog) = self.catalogs.get(server) else {
return vec![];
};
let engine = &catalog.engine;
let raw = engine.search(query, top_k.min(20));
raw.into_iter()
.filter_map(|r| catalog.items.get(&r.document.id))
.take(top_k)
.cloned()
.collect()
}
pub async fn describe(&self, server_id: &str, tool: &str) -> Result<Value> {
let server = self
.servers .servers
.iter() .iter()
.map(|(id, s)| (id.clone(), s.clone())) .filter(|(id, _)| &server_id == id)
.collect(); .map(|(_, s)| s.clone())
.next()
.ok_or(anyhow!("{server_id} MCP server not found in config"))?;
Box::pin(async move { let tool_schema = server
let mut out = Vec::with_capacity(servers.len()); .list_tools(None)
for (id, server) in servers { .await?
let tools = server.list_tools(None).await?; .tools
let resources = server.list_resources(None).await.unwrap_or_default(); .into_iter()
out.push(json!({ .find(|it| it.name == tool)
"server": id, .ok_or(anyhow!(
"tools": tools, "{tool} not found in {server_id} MCP server catalog"
"resources": resources, ))?
})); .input_schema;
Ok(json!({
"type": "object",
"properties": {
"tool": {
"type": "string",
},
"arguments": tool_schema
} }
Ok(Value::Array(out)) }))
})
} }
pub fn invoke( pub fn invoke(