From fa203722b2291cebdcde725a3060c721d153a452 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Thu, 7 May 2026 08:35:52 -0600 Subject: [PATCH] chore: updated models.yaml --- models.yaml | 1312 +++++++++++++++++++++------------------------------ 1 file changed, 537 insertions(+), 775 deletions(-) diff --git a/models.yaml b/models.yaml index 987c091..f440e21 100644 --- a/models.yaml +++ b/models.yaml @@ -211,23 +211,31 @@ supports_function_calling: true - name: gemini-2.5-flash max_input_tokens: 1048576 - max_output_tokens: 65535 - input_price: 0.3 - output_price: 2.5 + max_output_tokens: 65536 + input_price: 0 + output_price: 0 supports_vision: true supports_function_calling: true - name: gemini-2.5-pro max_input_tokens: 1048576 max_output_tokens: 65536 - input_price: 1.25 - output_price: 10 + input_price: 0 + output_price: 0 supports_vision: true supports_function_calling: true - name: gemini-2.5-flash-lite + max_input_tokens: 1000000 + max_output_tokens: 64000 + input_price: 0 + output_price: 0 + supports_vision: true + supports_function_calling: true + - name: gemini-3-pro-preview + max_input_tokens: 1048576 + supports_vision: true + supports_function_calling: true + - name: gemini-3-flash-preview max_input_tokens: 1048576 - max_output_tokens: 65535 - input_price: 0.1 - output_price: 0.4 supports_vision: true supports_function_calling: true - name: gemini-2.0-flash @@ -245,11 +253,10 @@ supports_vision: true supports_function_calling: true - name: gemma-3-27b-it - supports_vision: true - max_input_tokens: 128000 - max_output_tokens: 65536 - input_price: 0.04 - output_price: 0.15 + max_input_tokens: 131072 + max_output_tokens: 8192 + input_price: 0 + output_price: 0 - name: text-embedding-004 type: embedding input_price: 0 @@ -432,7 +439,7 @@ thinking: type: enabled budget_tokens: 16000 - - name: claude-sonnet-4-20250514 + - name: claude-sonnet-4-5-20250929 max_input_tokens: 200000 max_output_tokens: 8192 require_max_tokens: true @@ -440,8 +447,8 @@ output_price: 15 supports_vision: true supports_function_calling: true - - name: claude-sonnet-4-20250514:thinking - real_name: claude-sonnet-4-20250514 + - name: claude-sonnet-4-5-20250929:thinking + real_name: claude-sonnet-4-5-20250929 max_input_tokens: 200000 max_output_tokens: 24000 require_max_tokens: true @@ -456,22 +463,23 @@ thinking: type: enabled budget_tokens: 16000 - - name: claude-3-7-sonnet-20250219 + - name: claude-haiku-4-5-20251001 max_input_tokens: 200000 max_output_tokens: 8192 require_max_tokens: true - input_price: 3 - output_price: 15 + input_price: 1 + output_price: 5 supports_vision: true supports_function_calling: true - - name: claude-3-7-sonnet-20250219:thinking - real_name: claude-3-7-sonnet-20250219 + - name: claude-haiku-4-5-20251001:thinking + real_name: claude-haiku-4-5-20251001 max_input_tokens: 200000 max_output_tokens: 24000 require_max_tokens: true - input_price: 3 - output_price: 15 + input_price: 1 + output_price: 5 supports_vision: true + supports_function_calling: true patch: body: temperature: null @@ -494,6 +502,12 @@ # - https://docs.mistral.ai/api/ - provider: mistral models: + - name: mistral-large-latest + max_output_tokens: 262144 + input_price: 0.5 + output_price: 1.5 + supports_function_calling: true + supports_vision: true - name: mistral-medium-latest max_input_tokens: 131072 input_price: 0.4 @@ -507,28 +521,33 @@ supports_function_calling: true supports_vision: true - name: magistral-medium-latest - max_input_tokens: 40960 + max_input_tokens: 131072 input_price: 2 output_price: 5 - name: magistral-small-latest - max_input_tokens: 40960 + max_input_tokens: 131072 input_price: 0.5 output_price: 1.5 - name: devstral-medium-latest - max_input_tokens: 256000 + max_input_tokens: 262144 input_price: 0.4 output_price: 2 supports_function_calling: true - name: devstral-small-latest - max_input_tokens: 256000 + max_input_tokens: 262144 input_price: 0.1 output_price: 0.3 supports_function_calling: true - name: codestral-latest - max_input_tokens: 256000 + max_input_tokens: 262144 input_price: 0.3 output_price: 0.9 supports_function_calling: true + - name: ministral-14b-latest + max_input_tokens: 262144 + input_price: 0.2 + output_price: 0.2 + supports_function_calling: true - name: mistral-embed type: embedding max_input_tokens: 8092 @@ -577,8 +596,8 @@ output_price: 10 supports_vision: true - name: command-r7b-12-2024 - max_input_tokens: 128000 - max_output_tokens: 4000 + max_input_tokens: 131072 + max_output_tokens: 4096 input_price: 0.0375 output_price: 0.15 - name: embed-v4.0 @@ -614,23 +633,17 @@ # - https://docs.x.ai/docs/api-reference#chat-completions - provider: xai models: - - name: grok-4 - supports_vision: true - max_input_tokens: 256000 - input_price: 3 - output_price: 15 - supports_function_calling: true - - name: grok-4-fast-non-reasoning + - name: grok-4-1-fast-non-reasoning max_input_tokens: 2000000 input_price: 0.2 output_price: 0.5 supports_function_calling: true - - name: grok-4-fast-reasoning + - name: grok-4-1-fast-reasoning max_input_tokens: 2000000 input_price: 0.2 output_price: 0.5 supports_function_calling: true - - name: grok-code-fast + - name: grok-code-fast-1 max_input_tokens: 256000 input_price: 0.2 output_price: 1.5 @@ -755,16 +768,24 @@ supports_function_calling: true - name: gemini-2.5-flash-lite max_input_tokens: 1048576 - max_output_tokens: 65535 - input_price: 0.1 + max_output_tokens: 65536 + input_price: 0.3 output_price: 0.4 supports_vision: true supports_function_calling: true + - name: gemini-3-pro-preview + max_input_tokens: 1048576 + supports_vision: true + supports_function_calling: true + - name: gemini-3-flash-preview + max_input_tokens: 1048576 + supports_vision: true + supports_function_calling: true - name: gemini-2.0-flash-001 max_input_tokens: 1048576 max_output_tokens: 8192 - input_price: 0.1 - output_price: 0.4 + input_price: 0.15 + output_price: 0.6 supports_vision: true supports_function_calling: true - name: gemini-2.0-flash-lite-001 @@ -774,6 +795,75 @@ output_price: 0.3 supports_vision: true supports_function_calling: true + - name: claude-opus-4-6 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + supports_function_calling: true + - name: claude-opus-4-6:thinking + real_name: claude-opus-4-6 + max_input_tokens: 200000 + max_output_tokens: 24000 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + patch: + body: + temperature: null + top_p: null + thinking: + type: enabled + budget_tokens: 16000 + - name: claude-sonnet-4-6 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 3 + output_price: 15 + supports_vision: true + supports_function_calling: true + - name: claude-sonnet-4-6:thinking + real_name: claude-sonnet-4-6 + max_input_tokens: 200000 + max_output_tokens: 24000 + require_max_tokens: true + input_price: 3 + output_price: 15 + supports_vision: true + patch: + body: + temperature: null + top_p: null + thinking: + type: enabled + budget_tokens: 16000 + - name: claude-opus-4-5@20251101 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + supports_function_calling: true + - name: claude-opus-4-5@20251101:thinking + real_name: claude-opus-4-5@20251101 + max_input_tokens: 200000 + max_output_tokens: 24000 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + patch: + body: + temperature: null + top_p: null + thinking: + type: enabled + budget_tokens: 16000 - name: claude-sonnet-4-5@20250929 max_input_tokens: 200000 max_output_tokens: 8192 @@ -820,98 +910,6 @@ thinking: type: enabled budget_tokens: 16000 - - name: claude-opus-4-1@20250805 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - supports_function_calling: true - - name: claude-opus-4-1@20250805:thinking - real_name: claude-opus-4-1@20250805 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - patch: - body: - temperature: null - top_p: null - thinking: - type: enabled - budget_tokens: 16000 - - name: claude-opus-4@20250514 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - supports_function_calling: true - - name: claude-opus-4@20250514:thinking - real_name: claude-opus-4@20250514 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - patch: - body: - temperature: null - top_p: null - thinking: - type: enabled - budget_tokens: 16000 - - name: claude-sonnet-4@20250514 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - supports_function_calling: true - - name: claude-sonnet-4@20250514:thinking - real_name: claude-sonnet-4@20250514 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - patch: - body: - temperature: null - top_p: null - thinking: - type: enabled - budget_tokens: 16000 - - name: claude-3-7-sonnet@20250219 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - supports_function_calling: true - - name: claude-3-7-sonnet@20250219:thinking - real_name: claude-3-7-sonnet@20250219 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - patch: - body: - temperature: null - top_p: null - thinking: - type: enabled - budget_tokens: 16000 - name: claude-3-5-haiku@20241022 max_input_tokens: 200000 max_output_tokens: 8192 @@ -952,6 +950,81 @@ # - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-call.html - provider: bedrock models: + - name: us.anthropic.claude-opus-4-6-v1 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + supports_function_calling: true + - name: us.anthropic.claude-opus-4-6-v1:thinking + real_name: us.anthropic.claude-opus-4-6-v1 + max_input_tokens: 200000 + max_output_tokens: 24000 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + patch: + body: + inferenceConfig: + temperature: null + topP: null + additionalModelRequestFields: + thinking: + type: enabled + budget_tokens: 16000 + - name: us.anthropic.claude-sonnet-4-6 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 3 + output_price: 15 + supports_vision: true + supports_function_calling: true + - name: us.anthropic.claude-sonnet-4-6:thinking + real_name: us.anthropic.claude-sonnet-4-6 + max_input_tokens: 200000 + max_output_tokens: 24000 + require_max_tokens: true + input_price: 3 + output_price: 15 + supports_vision: true + patch: + body: + inferenceConfig: + temperature: null + topP: null + additionalModelRequestFields: + thinking: + type: enabled + budget_tokens: 16000 + - name: us.anthropic.claude-opus-4-5-20251101-v1:0 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + supports_function_calling: true + - name: us.anthropic.claude-opus-4-5-20251101-v1:0:thinking + real_name: us.anthropic.claude-opus-4-5-20251101-v1:0 + max_input_tokens: 200000 + max_output_tokens: 24000 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + patch: + body: + inferenceConfig: + temperature: null + topP: null + additionalModelRequestFields: + thinking: + type: enabled + budget_tokens: 16000 - name: us.anthropic.claude-sonnet-4-5-20250929-v1:0 max_input_tokens: 200000 max_output_tokens: 8192 @@ -1002,114 +1075,6 @@ thinking: type: enabled budget_tokens: 16000 - - name: us.anthropic.claude-opus-4-1-20250805-v1:0 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - supports_function_calling: true - - name: us.anthropic.claude-opus-4-1-20250805-v1:0:thinking - real_name: us.anthropic.claude-opus-4-1-20250805-v1:0 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - patch: - body: - inferenceConfig: - temperature: null - topP: null - additionalModelRequestFields: - thinking: - type: enabled - budget_tokens: 16000 - - name: us.anthropic.claude-opus-4-20250514-v1:0 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - supports_function_calling: true - - name: us.anthropic.claude-opus-4-20250514-v1:0:thinking - real_name: us.anthropic.claude-opus-4-20250514-v1:0 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - patch: - body: - inferenceConfig: - temperature: null - topP: null - additionalModelRequestFields: - thinking: - type: enabled - budget_tokens: 16000 - - name: us.anthropic.claude-sonnet-4-20250514-v1:0 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - supports_function_calling: true - - name: us.anthropic.claude-sonnet-4-20250514-v1:0:thinking - real_name: us.anthropic.claude-sonnet-4-20250514-v1:0 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - patch: - body: - inferenceConfig: - temperature: null - topP: null - additionalModelRequestFields: - thinking: - type: enabled - budget_tokens: 16000 - - name: us.anthropic.claude-3-7-sonnet-20250219-v1:0 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - supports_function_calling: true - - name: us.anthropic.claude-3-7-sonnet-20250219-v1:0:thinking - real_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0 - max_input_tokens: 200000 - max_output_tokens: 24000 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - patch: - body: - inferenceConfig: - temperature: null - topP: null - additionalModelRequestFields: - thinking: - type: enabled - budget_tokens: 16000 - - name: anthropic.claude-3-5-haiku-20241022-v1:0 - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 0.8 - output_price: 4 - supports_vision: true - supports_function_calling: true - name: us.meta.llama4-maverick-17b-instruct-v1:0 max_input_tokens: 131072 max_output_tokens: 8192 @@ -1201,6 +1166,12 @@ require_max_tokens: true input_price: 0 output_price: 0 + - name: '@cf/zai-org/glm-4.7-flash' + max_input_tokens: 131072 + max_output_tokens: 2048 + require_max_tokens: true + input_price: 0 + output_price: 0 - name: '@cf/google/gemma-3-12b-it' max_input_tokens: 131072 max_output_tokens: 2048 @@ -1264,40 +1235,55 @@ # - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api - provider: qianwen models: - - name: qwen3-max - input_price: 1.2 - output_price: 6 - max_output_tokens: 32768 + - name: qwen3.5-plus max_input_tokens: 262144 supports_function_calling: true - - name: qwen-plus - input_price: 0.4 - output_price: 1.2 - max_output_tokens: 32768 - max_input_tokens: 1000000 + patch: + body: + enable_thinking: false + - name: qwen3.5-plus:thinking + real_name: qwen3.5-plus + max_input_tokens: 262144 supports_function_calling: true - - name: qwen-flash - max_input_tokens: 1000000 + - name: qwen3-max + max_input_tokens: 262144 supports_function_calling: true + - name: qwen3-max:thinking + real_name: qwen3-max + max_input_tokens: 262144 + supports_function_calling: true + patch: + body: + enable_thinking: true - name: qwen3-vl-plus max_input_tokens: 262144 supports_vision: true - name: qwen3-vl-flash max_input_tokens: 262144 supports_vision: true - - name: qwen-coder-plus + - name: qwen3-coder-plus max_input_tokens: 1000000 - - name: qwen-coder-flash + - name: qwen3-coder-flash max_input_tokens: 1000000 - - name: qwen3-next-80b-a3b-instruct + - name: qwen3.5-397b-a17b max_input_tokens: 262144 - input_price: 0.09 - output_price: 1.1 + supports_function_calling: true + patch: + body: + enable_thinking: false + - name: qwen3.5-397b-a17b:thinking + real_name: qwen3.5-397b-a17b + max_input_tokens: 262144 + supports_function_calling: true + - name: qwen3-next-80b-a3b-instruct + max_input_tokens: 131072 + input_price: 0.14 + output_price: 0.56 supports_function_calling: true - name: qwen3-next-80b-a3b-thinking - max_input_tokens: 128000 - input_price: 0.15 - output_price: 1.2 + max_input_tokens: 131072 + input_price: 0.14 + output_price: 1.4 - name: qwen3-235b-a22b-instruct-2507 max_input_tokens: 131072 input_price: 0.28 @@ -1305,43 +1291,33 @@ supports_function_calling: true - name: qwen3-235b-a22b-thinking-2507 max_input_tokens: 131072 - input_price: 0 - output_price: 0 + input_price: 0.28 + output_price: 2.8 - name: qwen3-30b-a3b-instruct-2507 - max_output_tokens: 262144 - max_input_tokens: 262144 - input_price: 0.09 - output_price: 0.3 + max_input_tokens: 131072 + input_price: 0.105 + output_price: 0.42 supports_function_calling: true - name: qwen3-30b-a3b-thinking-2507 - max_input_tokens: 32768 - input_price: 0.051 - output_price: 0.34 + max_input_tokens: 131072 + input_price: 0.105 + output_price: 1.05 - name: qwen3-vl-32b-instruct - max_output_tokens: 32768 - max_input_tokens: 131072 - input_price: 0.104 - output_price: 0.416 - supports_vision: true - - name: qwen3-vl-8b-instruct - max_output_tokens: 32768 - max_input_tokens: 131072 - input_price: 0.08 - output_price: 0.5 - supports_vision: true - - name: qwen3-coder-480b-a35b-instruct - max_input_tokens: 262144 - input_price: 1.26 - output_price: 5.04 - - name: qwen3-coder-30b-a3b-instruct - max_output_tokens: 32768 - max_input_tokens: 160000 - input_price: 0.07 - output_price: 0.27 - - name: deepseek-v3.2-exp max_input_tokens: 131072 input_price: 0.28 - output_price: 0.42 + output_price: 1.12 + supports_vision: true + - name: qwen3-vl-8b-instruct + max_input_tokens: 131072 + input_price: 0.07 + output_price: 0.28 + supports_vision: true + - name: qwen3-coder-next + max_input_tokens: 262144 + - name: qwen3-coder-480b-a35b-instruct + max_input_tokens: 262144 + - name: qwen3-coder-30b-a3b-instruct + max_input_tokens: 262144 - name: text-embedding-v4 type: embedding input_price: 0.1 @@ -1361,27 +1337,20 @@ # - https://cloud.tencent.com/document/product/1729/111007 - provider: hunyuan models: - - name: hunyuan-turbos-latest - max_input_tokens: 28000 + - name: hunyuan-2.0-instruct-20251111 + max_input_tokens: 131072 input_price: 0.112 output_price: 0.28 supports_function_calling: true - - name: hunyuan-t1-latest - max_input_tokens: 28000 + - name: hunyuan-2.0-thinking-20251109 + max_input_tokens: 131072 input_price: 0.14 output_price: 0.56 - - name: hunyuan-lite - max_input_tokens: 250000 - input_price: 0 - output_price: 0 supports_function_calling: true - - name: hunyuan-turbos-vision - max_input_tokens: 6144 + - name: hunyuan-vision-1.5-instruct + max_input_tokens: 24576 input_price: 0.42 - output_price: 0.84 - supports_vision: true - - name: hunyuan-t1-vision - max_input_tokens: 24000 + output_price: 1.26 supports_vision: true - name: hunyuan-embedding type: embedding @@ -1395,6 +1364,12 @@ # - https://platform.moonshot.cn/docs/api/chat#%E5%85%AC%E5%BC%80%E7%9A%84%E6%9C%8D%E5%8A%A1%E5%9C%B0%E5%9D%80 - provider: moonshot models: + - name: kimi-k2.5 + max_input_tokens: 262144 + input_price: 0.56 + output_price: 2.94 + supports_vision: true + supports_function_calling: true - name: kimi-k2-turbo-preview max_input_tokens: 262144 input_price: 1.12 @@ -1413,9 +1388,9 @@ output_price: 8.12 supports_vision: true - name: kimi-k2-thinking - max_input_tokens: 131072 - input_price: 0.47 - output_price: 2 + max_input_tokens: 262144 + input_price: 0.56 + output_price: 2.24 supports_vision: true # Links: @@ -1424,10 +1399,10 @@ - provider: deepseek models: - name: deepseek-chat - max_input_tokens: 163840 - max_output_tokens: 163840 - input_price: 0.32 - output_price: 0.89 + max_input_tokens: 64000 + max_output_tokens: 8192 + input_price: 0.56 + output_price: 1.68 supports_function_calling: true - name: deepseek-reasoner max_input_tokens: 64000 @@ -1440,54 +1415,41 @@ # - https://open.bigmodel.cn/dev/api#glm-4 - provider: zhipuai models: - - name: glm-4.6 + - name: glm-5 max_input_tokens: 202752 - input_price: 0.28 - output_price: 1.12 supports_function_calling: true - - name: glm-4.5 - max_input_tokens: 131072 - input_price: 0.28 - output_price: 1.12 - - name: glm-4.5-x - max_input_tokens: 131072 - input_price: 1.12 - output_price: 4.48 + - name: glm-5:instruct + real_name: glm-5 + max_input_tokens: 202752 supports_function_calling: true - - name: glm-4.5-air - max_input_tokens: 131072 - input_price: 0.084 - output_price: 0.56 - - name: glm-4.5-airx - max_input_tokens: 131072 - input_price: 0.56 - output_price: 2.24 + patch: + body: + thinking: + type: disabled + - name: glm-4.7 + max_input_tokens: 202752 supports_function_calling: true - - name: glm-4.5-flash - max_input_tokens: 131072 + - name: glm-4.7:instruct + real_name: glm-4.7 + max_input_tokens: 202752 + supports_function_calling: true + patch: + body: + thinking: + type: disabled + - name: glm-4.7-flash + max_input_tokens: 202752 input_price: 0 output_price: 0 - - name: glm-4.5v + supports_function_calling: true + - name: glm-4.6v max_input_tokens: 65536 - input_price: 0.56 - output_price: 1.68 supports_vision: true - - name: glm-z1-air - max_input_tokens: 131072 - input_price: 0.07 - output_price: 0.07 - - name: glm-z1-airx - max_input_tokens: 131072 - input_price: 0.7 - output_price: 0.7 - - name: glm-z1-flashx - max_input_tokens: 131072 - input_price: 0.014 - output_price: 0.014 - - name: glm-z1-flash - max_input_tokens: 131072 + - name: glm-4.6v-flash + max_input_tokens: 65536 input_price: 0 output_price: 0 + supports_vision: true - name: embedding-3 type: embedding max_input_tokens: 8192 @@ -1500,15 +1462,29 @@ input_price: 0.112 # Links: -# - https://platform.minimaxi.com/docs/guides/pricing +# - https://platform.minimaxi.com/docs/guides/pricing-paygo # - https://platform.minimaxi.com/document/ChatCompletion%20v2 - provider: minimax models: - - name: minimax-m2 - max_output_tokens: 65536 - max_input_tokens: 196608 - input_price: 0.255 - output_price: 1 + - name: minimax-m2.5 + max_input_tokens: 204800 + input_price: 0.294 + output_price: 1.176 + supports_function_calling: true + - name: minimax-m2.5-highspeed + max_input_tokens: 204800 + input_price: 0.588 + output_price: 2.352 + supports_function_calling: true + - name: minimax-m2.1 + max_input_tokens: 204800 + input_price: 0.294 + output_price: 1.176 + supports_function_calling: true + - name: minimax-m2.1-highspeed + max_input_tokens: 204800 + input_price: 0.588 + output_price: 2.352 supports_function_calling: true # Links: @@ -1516,18 +1492,11 @@ # - https://openrouter.ai/docs/api-reference/chat-completion - provider: openrouter models: - - name: openai/gpt-5.1 + - name: openai/gpt-5.2 max_input_tokens: 400000 max_output_tokens: 128000 - input_price: 1.25 - output_price: 10 - supports_vision: true - supports_function_calling: true - - name: openai/gpt-5.1-chat - max_input_tokens: 128000 - max_output_tokens: 16384 - input_price: 1.25 - output_price: 10 + input_price: 1.75 + output_price: 14 supports_vision: true supports_function_calling: true - name: openai/gpt-5 @@ -1537,13 +1506,6 @@ output_price: 10 supports_vision: true supports_function_calling: true - - name: openai/gpt-5-chat - max_input_tokens: 128000 - max_output_tokens: 16384 - input_price: 1.25 - output_price: 10 - supports_vision: true - supports_function_calling: true - name: openai/gpt-5-mini max_input_tokens: 400000 max_output_tokens: 128000 @@ -1565,164 +1527,82 @@ output_price: 8 supports_vision: true supports_function_calling: true - - name: openai/gpt-4.1-mini - max_input_tokens: 1047576 - max_output_tokens: 32768 - input_price: 0.4 - output_price: 1.6 - supports_vision: true - supports_function_calling: true - - name: openai/gpt-4.1-nano - max_input_tokens: 1047576 - max_output_tokens: 32768 - input_price: 0.1 - output_price: 0.4 - supports_vision: true - supports_function_calling: true - name: openai/gpt-4o - max_output_tokens: 16384 max_input_tokens: 128000 input_price: 2.5 output_price: 10 supports_vision: true supports_function_calling: true - - name: openai/gpt-4o-mini - max_output_tokens: 16384 - max_input_tokens: 128000 - input_price: 0.15 - output_price: 0.6 - supports_vision: true - supports_function_calling: true - - name: openai/o4-mini - max_output_tokens: 100000 - max_input_tokens: 200000 - input_price: 1.1 - output_price: 4.4 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - max_tokens: null - temperature: null - top_p: null - - name: openai/o4-mini-high - max_output_tokens: 100000 - max_input_tokens: 200000 - input_price: 1.1 - output_price: 4.4 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - reasoning_effort: high - max_tokens: null - temperature: null - top_p: null - - name: openai/o3 - max_output_tokens: 100000 - max_input_tokens: 200000 - input_price: 2 - output_price: 8 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - max_tokens: null - temperature: null - top_p: null - - name: openai/o3-high - real_name: openai/o3 - max_input_tokens: 200000 - input_price: 2 - output_price: 8 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - reasoning_effort: high - temperature: null - top_p: null - - name: openai/o3-mini - max_output_tokens: 100000 - max_input_tokens: 200000 - input_price: 1.1 - output_price: 4.4 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - temperature: null - top_p: null - - name: openai/o3-mini-high - max_output_tokens: 100000 - max_input_tokens: 200000 - input_price: 1.1 - output_price: 4.4 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - temperature: null - top_p: null - name: openai/gpt-oss-120b max_input_tokens: 131072 - input_price: 0.039 - output_price: 0.19 + input_price: 0.09 + output_price: 0.45 supports_function_calling: true - name: openai/gpt-oss-20b max_input_tokens: 131072 - input_price: 0.03 - output_price: 0.14 + input_price: 0.04 + output_price: 0.16 supports_function_calling: true - name: google/gemini-2.5-flash - max_output_tokens: 65535 max_input_tokens: 1048576 input_price: 0.3 output_price: 2.5 supports_vision: true supports_function_calling: true - name: google/gemini-2.5-pro - max_output_tokens: 65536 max_input_tokens: 1048576 input_price: 1.25 output_price: 10 supports_vision: true supports_function_calling: true - name: google/gemini-2.5-flash-lite - max_output_tokens: 65535 max_input_tokens: 1048576 - input_price: 0.1 + input_price: 0.3 output_price: 0.4 supports_vision: true - name: google/gemini-2.0-flash-001 - max_output_tokens: 8192 - max_input_tokens: 1048576 - input_price: 0.1 - output_price: 0.4 + max_input_tokens: 1000000 + input_price: 0.15 + output_price: 0.6 supports_vision: true supports_function_calling: true - name: google/gemini-2.0-flash-lite-001 - max_output_tokens: 8192 max_input_tokens: 1048576 input_price: 0.075 output_price: 0.3 supports_vision: true supports_function_calling: true - name: google/gemma-3-27b-it - max_output_tokens: 65536 + max_input_tokens: 131072 + input_price: 0.1 + output_price: 0.2 + - name: anthropic/claude-opus-4.6 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 5 + output_price: 25 supports_vision: true - max_input_tokens: 128000 - input_price: 0.04 - output_price: 0.15 + supports_function_calling: true + - name: anthropic/claude-sonnet-4.6 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 3 + output_price: 15 + supports_vision: true + supports_function_calling: true + - name: anthropic/claude-opus-4.5 + max_input_tokens: 200000 + max_output_tokens: 8192 + require_max_tokens: true + input_price: 5 + output_price: 25 + supports_vision: true + supports_function_calling: true - name: anthropic/claude-sonnet-4.5 - max_input_tokens: 1000000 - max_output_tokens: 64000 + max_input_tokens: 200000 + max_output_tokens: 8192 require_max_tokens: true input_price: 3 output_price: 15 @@ -1730,71 +1610,33 @@ supports_function_calling: true - name: anthropic/claude-haiku-4.5 max_input_tokens: 200000 - max_output_tokens: 64000 + max_output_tokens: 8192 require_max_tokens: true input_price: 1 output_price: 5 supports_vision: true supports_function_calling: true - - name: anthropic/claude-opus-4.1 - max_input_tokens: 200000 - max_output_tokens: 32000 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - supports_function_calling: true - - name: anthropic/claude-opus-4 - max_input_tokens: 200000 - max_output_tokens: 32000 - require_max_tokens: true - input_price: 15 - output_price: 75 - supports_vision: true - supports_function_calling: true - - name: anthropic/claude-sonnet-4 - max_input_tokens: 1000000 - max_output_tokens: 64000 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - supports_function_calling: true - - name: anthropic/claude-3.7-sonnet - max_input_tokens: 200000 - max_output_tokens: 64000 - require_max_tokens: true - input_price: 3 - output_price: 15 - supports_vision: true - supports_function_calling: true - - name: anthropic/claude-3.5-haiku - max_input_tokens: 200000 - max_output_tokens: 8192 - require_max_tokens: true - input_price: 0.8 - output_price: 4 - supports_vision: true - supports_function_calling: true - name: meta-llama/llama-4-maverick - max_output_tokens: 16384 max_input_tokens: 1048576 - input_price: 0.15 + input_price: 0.18 output_price: 0.6 supports_vision: true supports_function_calling: true - name: meta-llama/llama-4-scout - max_output_tokens: 16384 max_input_tokens: 327680 input_price: 0.08 output_price: 0.3 supports_vision: true supports_function_calling: true - name: meta-llama/llama-3.3-70b-instruct - max_output_tokens: 16384 max_input_tokens: 131072 - input_price: 0.1 - output_price: 0.32 + input_price: 0.12 + output_price: 0.3 + - name: mistralai/mistral-large-2512 + max_input_tokens: 262144 + input_price: 0.5 + output_price: 1.5 + supports_function_calling: true - name: mistralai/mistral-medium-3.1 max_input_tokens: 131072 input_price: 0.4 @@ -1802,142 +1644,119 @@ supports_function_calling: true supports_vision: true - name: mistralai/mistral-small-3.2-24b-instruct - max_output_tokens: 131072 - max_input_tokens: 131072 - input_price: 0.06 - output_price: 0.18 - supports_vision: true - - name: mistralai/magistral-medium-2506 - max_input_tokens: 40960 - input_price: 2 - output_price: 5 - - name: mistralai/magistral-medium-2506:thinking - max_input_tokens: 40960 - input_price: 2 - output_price: 5 - - name: mistralai/magistral-small-2506 - max_input_tokens: 40960 - input_price: 0.5 - output_price: 1.5 - - name: mistralai/devstral-medium - max_input_tokens: 131072 - input_price: 0.4 - output_price: 2 - supports_function_calling: true - - name: mistralai/devstral-small max_input_tokens: 131072 input_price: 0.1 output_price: 0.3 + supports_vision: true + - name: mistralai/devstral-2512 + max_input_tokens: 262144 + input_price: 0.5 + output_price: 0.22 + supports_function_calling: true + - name: mistralai/devstral-small + max_input_tokens: 131072 + input_price: 0.07 + output_price: 0.28 supports_function_calling: true - name: mistralai/codestral-2508 max_input_tokens: 256000 input_price: 0.3 output_price: 0.9 supports_function_calling: true + - name: mistralai/ministral-14b-2512 + max_input_tokens: 262144 + input_price: 0.2 + output_price: 0.2 + supports_function_calling: true - name: ai21/jamba-large-1.7 - max_output_tokens: 4096 max_input_tokens: 256000 input_price: 2 output_price: 8 supports_function_calling: true - - name: ai21/jamba-mini-1.7 - max_input_tokens: 256000 - input_price: 0.2 - output_price: 0.4 - supports_function_calling: true - name: cohere/command-a - max_output_tokens: 8192 max_input_tokens: 256000 input_price: 2.5 output_price: 10 supports_function_calling: true - name: cohere/command-r7b-12-2024 max_input_tokens: 128000 - max_output_tokens: 4000 + max_output_tokens: 4096 input_price: 0.0375 output_price: 0.15 - - name: deepseek/deepseek-v3.2-exp - max_output_tokens: 65536 + - name: deepseek/deepseek-v3.2 max_input_tokens: 163840 - input_price: 0.27 - output_price: 0.41 - - name: deepseek/deepseek-v3.1-terminus - max_input_tokens: 163840 - input_price: 0.21 - output_price: 0.79 - - name: deepseek/deepseek-chat-v3.1 - max_output_tokens: 7168 - max_input_tokens: 32768 - input_price: 0.15 - output_price: 0.75 - - name: deepseek/deepseek-r1-0528 - max_output_tokens: 65536 - max_input_tokens: 163840 - input_price: 0.4 - output_price: 1.75 - patch: - body: - include_reasoning: true + input_price: 0.25 + output_price: 0.38 - name: qwen/qwen3-max - max_output_tokens: 32768 max_input_tokens: 262144 input_price: 1.2 output_price: 6 supports_function_calling: true - - name: qwen/qwen-plus + - name: qwen/qwen3-max-thinking + max_input_tokens: 262144 + input_price: 1.2 + output_price: 6 + supports_function_calling: true + - name: qwen/qwen3.5-plus-02-15 max_input_tokens: 1000000 - max_output_tokens: 32768 + max_output_tokens: 8192 input_price: 0.4 - output_price: 1.2 + output_price: 2.4 + supports_function_calling: true + - name: qwen/qwen3.5-397b-a17b + max_input_tokens: 262144 + max_output_tokens: 8192 + input_price: 0.15 + output_price: 1 supports_function_calling: true - name: qwen/qwen3-next-80b-a3b-instruct max_input_tokens: 262144 - input_price: 0.09 - output_price: 1.1 + input_price: 0.1 + output_price: 0.8 supports_function_calling: true - name: qwen/qwen3-next-80b-a3b-thinking - max_input_tokens: 128000 - input_price: 0.15 - output_price: 1.2 - - name: qwen/qwen5-235b-a22b-2507 # Qwen3 235B A22B Instruct 2507 + max_input_tokens: 262144 + input_price: 0.1 + output_price: 0.8 + - name: qwen/qwen3-235b-a22b-2507 # Qwen3 235B A22B Instruct 2507 max_input_tokens: 262144 input_price: 0.12 output_price: 0.59 supports_function_calling: true - name: qwen/qwen3-235b-a22b-thinking-2507 - max_input_tokens: 131072 - input_price: 0 - output_price: 0 - - name: qwen/qwen3-30b-a3b-instruct-2507 - max_output_tokens: 262144 max_input_tokens: 262144 - input_price: 0.09 - output_price: 0.3 - - name: qwen/qwen3-30b-a3b-thinking-2507 - max_input_tokens: 32768 - input_price: 0.051 - output_price: 0.34 - - name: qwen/qwen3-vl-32b-instruct - max_output_tokens: 32768 + input_price: 0.118 + output_price: 0.118 + - name: qwen/qwen3-30b-a3b-instruct-2507 max_input_tokens: 131072 - input_price: 0.104 - output_price: 0.416 + input_price: 0.2 + output_price: 0.8 + - name: qwen/qwen3-30b-a3b-thinking-2507 + max_input_tokens: 262144 + input_price: 0.071 + output_price: 0.285 + - name: qwen/qwen3-vl-32b-instruct + max_input_tokens: 262144 + input_price: 0.35 + output_price: 1.1 supports_vision: true - name: qwen/qwen3-vl-8b-instruct - max_output_tokens: 32768 - max_input_tokens: 131072 + max_input_tokens: 262144 input_price: 0.08 - output_price: 0.5 + output_price: 0.50 supports_vision: true + - name: qwen/qwen3-coder-next + max_input_tokens: 262144 + input_price: 0.12 + output_price: 0.75 + supports_function_calling: true - name: qwen/qwen3-coder-plus - max_output_tokens: 65536 - max_input_tokens: 1000000 + max_input_tokens: 128000 input_price: 1 output_price: 5 supports_function_calling: true - name: qwen/qwen3-coder-flash - max_output_tokens: 65536 - max_input_tokens: 1000000 + max_input_tokens: 128000 input_price: 0.3 output_price: 1.5 supports_function_calling: true @@ -1947,47 +1766,38 @@ output_price: 0.95 supports_function_calling: true - name: qwen/qwen3-coder-30b-a3b-instruct - max_output_tokens: 32768 - max_input_tokens: 160000 - input_price: 0.07 - output_price: 0.27 + max_input_tokens: 262144 + input_price: 0.052 + output_price: 0.207 + supports_function_calling: true + - name: moonshotai/kimi-k2.5 + max_input_tokens: 262144 + input_price: 0.57 + output_price: 2.85 + supports_vision: true supports_function_calling: true - name: moonshotai/kimi-k2-0905 - max_input_tokens: 131072 - input_price: 0.4 - output_price: 2 + max_input_tokens: 262144 + input_price: 0.296 + output_price: 1.185 + supports_vision: true supports_function_calling: true - name: moonshotai/kimi-k2-thinking - max_input_tokens: 131072 - input_price: 0.47 - output_price: 2 + max_input_tokens: 262144 + input_price: 0.45 + output_price: 2.35 supports_function_calling: true - - name: moonshotai/kimi-dev-72b - max_input_tokens: 131072 - input_price: 0.29 - output_price: 1.15 - supports_function_calling: true - - name: x-ai/grok-4 - supports_vision: true - max_input_tokens: 256000 - input_price: 3 - output_price: 15 - supports_function_calling: true - - name: x-ai/grok-4-fast - max_output_tokens: 30000 - supports_vision: true + - name: x-ai/grok-4.1-fast max_input_tokens: 2000000 input_price: 0.2 output_price: 0.5 supports_function_calling: true - name: x-ai/grok-code-fast-1 - max_output_tokens: 10000 max_input_tokens: 256000 input_price: 0.2 output_price: 1.5 supports_function_calling: true - name: amazon/nova-premier-v1 - max_output_tokens: 32000 max_input_tokens: 1000000 input_price: 2.5 output_price: 12.5 @@ -2010,49 +1820,57 @@ input_price: 0.035 output_price: 0.14 - name: perplexity/sonar-pro - max_output_tokens: 8000 - supports_vision: true max_input_tokens: 200000 input_price: 3 output_price: 15 - name: perplexity/sonar - supports_vision: true max_input_tokens: 127072 input_price: 1 output_price: 1 - name: perplexity/sonar-reasoning-pro - supports_vision: true max_input_tokens: 128000 input_price: 2 output_price: 8 patch: body: include_reasoning: true - - name: perplexity/sonar-reasoning - max_input_tokens: 127000 - input_price: 1 - output_price: 5 - patch: - body: - include_reasoning: true - name: perplexity/sonar-deep-research - max_input_tokens: 128000 + max_input_tokens: 200000 input_price: 2 output_price: 8 patch: body: include_reasoning: true - - name: minimax/minimax-m2 - max_output_tokens: 65536 + - name: minimax/minimax-m2.5 max_input_tokens: 196608 - input_price: 0.255 - output_price: 1 - - name: z-ai/glm-4.6 - max_output_tokens: 131072 - max_input_tokens: 202752 - input_price: 0.35 - output_price: 1.71 + input_price: 0.3 + output_price: 1.1 supports_function_calling: true + - name: minimax/minimax-m2.1 + max_input_tokens: 196608 + input_price: 0.12 + output_price: 0.48 + supports_function_calling: true + - name: z-ai/glm-5 + max_input_tokens: 204800 + input_price: 0.95 + output_price: 2.55 + supports_function_calling: true + - name: z-ai/glm-4.7 + max_input_tokens: 202752 + input_price: 0.16 + output_price: 0.80 + supports_function_calling: true + - name: z-ai/glm-4.7-flash + max_input_tokens: 202752 + input_price: 0.07 + output_price: 0.40 + supports_function_calling: true + - name: z-ai/glm-4.6v + max_input_tokens: 131072 + input_price: 0.3 + output_price: 0.9 + supports_vision: true # Links: # - https://github.com/marketplace?type=models @@ -2063,11 +1881,6 @@ max_output_tokens: 128000 supports_vision: true supports_function_calling: true - - name: gpt-5-chat - max_input_tokens: 400000 - max_output_tokens: 128000 - supports_vision: true - supports_function_calling: true - name: gpt-5-mini max_input_tokens: 400000 max_output_tokens: 128000 @@ -2083,90 +1896,10 @@ max_output_tokens: 32768 supports_vision: true supports_function_calling: true - - name: gpt-4.1-mini - max_input_tokens: 1047576 - max_output_tokens: 32768 - supports_vision: true - supports_function_calling: true - - name: gpt-4.1-nano - max_input_tokens: 1047576 - max_output_tokens: 32768 - supports_vision: true - supports_function_calling: true - name: gpt-4o max_input_tokens: 128000 max_output_tokens: 16384 supports_function_calling: true - - name: gpt-4o-mini - max_input_tokens: 128000 - max_output_tokens: 16384 - supports_function_calling: true - - name: o4-mini - max_input_tokens: 200000 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - max_tokens: null - temperature: null - top_p: null - - name: o4-mini-high - real_name: o4-mini - max_input_tokens: 200000 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - reasoning_effort: high - max_tokens: null - temperature: null - top_p: null - - name: o3 - max_input_tokens: 200000 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - max_tokens: null - temperature: null - top_p: null - - name: o3-high - real_name: o3 - max_input_tokens: 200000 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - reasoning_effort: high - max_tokens: null - temperature: null - top_p: null - - name: o3-mini - max_input_tokens: 200000 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - max_tokens: null - temperature: null - top_p: null - - name: o3-mini-high - real_name: o3-mini - max_input_tokens: 200000 - supports_vision: true - supports_function_calling: true - system_prompt_prefix: Formatting re-enabled - patch: - body: - reasoning_effort: high - max_tokens: null - temperature: null - top_p: null - name: text-embedding-3-large type: embedding max_tokens_per_chunk: 8191 @@ -2248,6 +1981,16 @@ input_price: 0.08 output_price: 0.3 supports_vision: true + - name: Qwen/Qwen3-Max + max_input_tokens: 262144 + input_price: 1.2 + output_price: 6 + supports_function_calling: true + - name: Qwen/Qwen3-Max-Thinking + max_input_tokens: 262144 + input_price: 1.2 + output_price: 6 + supports_function_calling: true - name: Qwen/Qwen3-Next-80B-A3B-Instruct max_input_tokens: 262144 input_price: 0.14 @@ -2285,22 +2028,11 @@ input_price: 0.18 output_price: 0.69 supports_vision: true - - name: deepseek-ai/DeepSeek-V3.2-Exp + - name: deepseek-ai/DeepSeek-V3.2 max_input_tokens: 163840 - input_price: 0.27 - output_price: 0.40 - - name: deepseek-ai/DeepSeek-V3.1-Terminus - max_input_tokens: 163840 - input_price: 0.27 - output_price: 1.0 - - name: deepseek-ai/DeepSeek-V3.1 - max_input_tokens: 163840 - input_price: 0.3 - output_price: 1.0 - - name: deepseek-ai/DeepSeek-R1-0528 - max_input_tokens: 163840 - input_price: 0.5 - output_price: 2.15 + input_price: 0.26 + output_price: 0.39 + supports_function_calling: true - name: google/gemma-3-27b-it max_input_tokens: 131072 input_price: 0.1 @@ -2309,6 +2041,11 @@ max_input_tokens: 32768 input_price: 0.06 output_price: 0.12 + - name: moonshotai/Kimi-K2.5 + max_input_tokens: 262144 + input_price: 0.5 + output_price: 2.8 + supports_function_calling: true - name: moonshotai/Kimi-K2-Instruct-0905 max_input_tokens: 262144 input_price: 0.5 @@ -2319,11 +2056,36 @@ input_price: 0.55 output_price: 2.5 supports_function_calling: true - - name: zai-org/GLM-4.6 - max_input_tokens: 202752 - input_price: 0.6 - output_price: 1.9 + - name: MiniMaxAI/MiniMax-M2.5 + max_input_tokens: 196608 + input_price: 0.27 + output_price: 0.95 supports_function_calling: true + - name: MiniMaxAI/MiniMax-M2.1 + max_input_tokens: 196608 + input_price: 0.27 + output_price: 0.95 + supports_function_calling: true + - name: zai-org/GLM-5 + max_input_tokens: 202752 + input_price: 0.8 + output_price: 2.56 + supports_function_calling: true + - name: zai-org/GLM-4.7 + max_input_tokens: 202752 + input_price: 0.43 + output_price: 1.75 + supports_function_calling: true + - name: zai-org/GLM-4.7-Flash + max_input_tokens: 202752 + input_price: 0.06 + output_price: 0.4 + supports_function_calling: true + - name: zai-org/GLM-4.6V + max_input_tokens: 131072 + input_price: 0.3 + output_price: 0.9 + supports_vision: true - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0.01