399 lines
12 KiB
Rust
399 lines
12 KiB
Rust
use super::{
|
|
ApiPatch, MessageContentToolCalls, RequestPatch, list_all_models, list_client_names,
|
|
message::{Message, MessageContent, MessageContentPart},
|
|
};
|
|
|
|
use crate::config::Config;
|
|
use crate::utils::{estimate_token_length, strip_think_tag};
|
|
|
|
use anyhow::{Result, bail};
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_json::Value;
|
|
use std::fmt::Display;
|
|
|
|
const PER_MESSAGES_TOKENS: usize = 5;
|
|
const BASIS_TOKENS: usize = 2;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Model {
|
|
client_name: String,
|
|
data: ModelData,
|
|
}
|
|
|
|
impl Default for Model {
|
|
fn default() -> Self {
|
|
Model::new("", "")
|
|
}
|
|
}
|
|
|
|
impl Model {
|
|
pub fn new(client_name: &str, name: &str) -> Self {
|
|
Self {
|
|
client_name: client_name.into(),
|
|
data: ModelData::new(name),
|
|
}
|
|
}
|
|
|
|
pub fn from_config(client_name: &str, models: &[ModelData]) -> Vec<Self> {
|
|
models
|
|
.iter()
|
|
.map(|v| Model {
|
|
client_name: client_name.to_string(),
|
|
data: v.clone(),
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
pub fn retrieve_model(config: &Config, model_id: &str, model_type: ModelType) -> Result<Self> {
|
|
let models = list_all_models(config);
|
|
let (client_name, model_name) = match model_id.split_once(':') {
|
|
Some((client_name, model_name)) => {
|
|
if model_name.is_empty() {
|
|
(client_name, None)
|
|
} else {
|
|
(client_name, Some(model_name))
|
|
}
|
|
}
|
|
None => (model_id, None),
|
|
};
|
|
match model_name {
|
|
Some(model_name) => {
|
|
if let Some(model) = models.iter().find(|v| v.id() == model_id) {
|
|
if model.model_type() == model_type {
|
|
return Ok((*model).clone());
|
|
} else {
|
|
bail!("Model '{model_id}' is not a {model_type} model")
|
|
}
|
|
}
|
|
if list_client_names(config)
|
|
.into_iter()
|
|
.any(|v| *v == client_name)
|
|
&& model_type.can_create_from_name()
|
|
{
|
|
let mut new_model = Self::new(client_name, model_name);
|
|
new_model.data.model_type = model_type.to_string();
|
|
return Ok(new_model);
|
|
}
|
|
}
|
|
None => {
|
|
if let Some(found) = models
|
|
.iter()
|
|
.find(|v| v.client_name == client_name && v.model_type() == model_type)
|
|
{
|
|
return Ok((*found).clone());
|
|
}
|
|
}
|
|
};
|
|
bail!("Unknown {model_type} model '{model_id}'")
|
|
}
|
|
|
|
pub fn id(&self) -> String {
|
|
if self.data.name.is_empty() {
|
|
self.client_name.to_string()
|
|
} else {
|
|
format!("{}:{}", self.client_name, self.data.name)
|
|
}
|
|
}
|
|
|
|
pub fn client_name(&self) -> &str {
|
|
&self.client_name
|
|
}
|
|
|
|
pub fn name(&self) -> &str {
|
|
&self.data.name
|
|
}
|
|
|
|
pub fn real_name(&self) -> &str {
|
|
self.data.real_name.as_deref().unwrap_or(&self.data.name)
|
|
}
|
|
|
|
pub fn model_type(&self) -> ModelType {
|
|
if self.data.model_type.starts_with("embed") {
|
|
ModelType::Embedding
|
|
} else if self.data.model_type.starts_with("rerank") {
|
|
ModelType::Reranker
|
|
} else {
|
|
ModelType::Chat
|
|
}
|
|
}
|
|
|
|
pub fn description(&self) -> String {
|
|
match self.model_type() {
|
|
ModelType::Chat => {
|
|
let ModelData {
|
|
max_input_tokens,
|
|
max_output_tokens,
|
|
input_price,
|
|
output_price,
|
|
supports_vision,
|
|
supports_function_calling,
|
|
..
|
|
} = &self.data;
|
|
let max_input_tokens = stringify_option_value(max_input_tokens);
|
|
let max_output_tokens = stringify_option_value(max_output_tokens);
|
|
let input_price = stringify_option_value(input_price);
|
|
let output_price = stringify_option_value(output_price);
|
|
let mut capabilities = vec![];
|
|
if *supports_vision {
|
|
capabilities.push('👁');
|
|
};
|
|
if *supports_function_calling {
|
|
capabilities.push('⚒');
|
|
};
|
|
let capabilities: String = capabilities
|
|
.into_iter()
|
|
.map(|v| format!("{v} "))
|
|
.collect::<Vec<String>>()
|
|
.join("");
|
|
format!(
|
|
"{max_input_tokens:>8} / {max_output_tokens:>8} | {input_price:>6} / {output_price:>6} {capabilities:>6}"
|
|
)
|
|
}
|
|
ModelType::Embedding => {
|
|
let ModelData {
|
|
input_price,
|
|
max_tokens_per_chunk,
|
|
max_batch_size,
|
|
..
|
|
} = &self.data;
|
|
let max_tokens = stringify_option_value(max_tokens_per_chunk);
|
|
let max_batch = stringify_option_value(max_batch_size);
|
|
let price = stringify_option_value(input_price);
|
|
format!("max-tokens:{max_tokens};max-batch:{max_batch};price:{price}")
|
|
}
|
|
ModelType::Reranker => String::new(),
|
|
}
|
|
}
|
|
|
|
pub fn patch(&self) -> Option<&Value> {
|
|
self.data.patch.as_ref()
|
|
}
|
|
|
|
pub fn max_input_tokens(&self) -> Option<usize> {
|
|
self.data.max_input_tokens
|
|
}
|
|
|
|
pub fn max_output_tokens(&self) -> Option<isize> {
|
|
self.data.max_output_tokens
|
|
}
|
|
|
|
pub fn no_stream(&self) -> bool {
|
|
self.data.no_stream
|
|
}
|
|
|
|
pub fn no_system_message(&self) -> bool {
|
|
self.data.no_system_message
|
|
}
|
|
|
|
pub fn system_prompt_prefix(&self) -> Option<&str> {
|
|
self.data.system_prompt_prefix.as_deref()
|
|
}
|
|
|
|
pub fn max_tokens_per_chunk(&self) -> Option<usize> {
|
|
self.data.max_tokens_per_chunk
|
|
}
|
|
|
|
pub fn default_chunk_size(&self) -> usize {
|
|
self.data.default_chunk_size.unwrap_or(1000)
|
|
}
|
|
|
|
pub fn max_batch_size(&self) -> Option<usize> {
|
|
self.data.max_batch_size
|
|
}
|
|
|
|
pub fn max_tokens_param(&self) -> Option<isize> {
|
|
if self.data.require_max_tokens {
|
|
self.data.max_output_tokens
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
pub fn set_max_tokens(
|
|
&mut self,
|
|
max_output_tokens: Option<isize>,
|
|
require_max_tokens: bool,
|
|
) -> &mut Self {
|
|
match max_output_tokens {
|
|
None | Some(0) => self.data.max_output_tokens = None,
|
|
_ => self.data.max_output_tokens = max_output_tokens,
|
|
}
|
|
self.data.require_max_tokens = require_max_tokens;
|
|
self
|
|
}
|
|
|
|
pub fn messages_tokens(&self, messages: &[Message]) -> usize {
|
|
let messages_len = messages.len();
|
|
messages
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, v)| match &v.content {
|
|
MessageContent::Text(text) => {
|
|
if v.role.is_assistant() && i != messages_len - 1 {
|
|
estimate_token_length(&strip_think_tag(text))
|
|
} else {
|
|
estimate_token_length(text)
|
|
}
|
|
}
|
|
MessageContent::Array(list) => list
|
|
.iter()
|
|
.map(|v| match v {
|
|
MessageContentPart::Text { text } => estimate_token_length(text),
|
|
MessageContentPart::ImageUrl { .. } => 0,
|
|
})
|
|
.sum(),
|
|
MessageContent::ToolCalls(MessageContentToolCalls {
|
|
tool_results, text, ..
|
|
}) => {
|
|
estimate_token_length(text)
|
|
+ tool_results
|
|
.iter()
|
|
.map(|v| {
|
|
serde_json::to_string(v)
|
|
.map(|v| estimate_token_length(&v))
|
|
.unwrap_or_default()
|
|
})
|
|
.sum::<usize>()
|
|
}
|
|
})
|
|
.sum()
|
|
}
|
|
|
|
pub fn total_tokens(&self, messages: &[Message]) -> usize {
|
|
if messages.is_empty() {
|
|
return 0;
|
|
}
|
|
let num_messages = messages.len();
|
|
let message_tokens = self.messages_tokens(messages);
|
|
if messages[num_messages - 1].role.is_user() {
|
|
num_messages * PER_MESSAGES_TOKENS + message_tokens
|
|
} else {
|
|
(num_messages - 1) * PER_MESSAGES_TOKENS + message_tokens
|
|
}
|
|
}
|
|
|
|
pub fn guard_max_input_tokens(&self, messages: &[Message]) -> Result<()> {
|
|
let total_tokens = self.total_tokens(messages) + BASIS_TOKENS;
|
|
if let Some(max_input_tokens) = self.data.max_input_tokens
|
|
&& total_tokens >= max_input_tokens
|
|
{
|
|
bail!("Exceed max_input_tokens limit")
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
pub struct ModelData {
|
|
pub name: String,
|
|
#[serde(default = "default_model_type", rename = "type")]
|
|
pub model_type: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub real_name: Option<String>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub max_input_tokens: Option<usize>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub input_price: Option<f64>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub output_price: Option<f64>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub patch: Option<Value>,
|
|
|
|
// chat-only properties
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub max_output_tokens: Option<isize>,
|
|
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
|
pub require_max_tokens: bool,
|
|
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
|
pub supports_vision: bool,
|
|
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
|
pub supports_function_calling: bool,
|
|
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
|
no_stream: bool,
|
|
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
|
no_system_message: bool,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
system_prompt_prefix: Option<String>,
|
|
|
|
// embedding-only properties
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub max_tokens_per_chunk: Option<usize>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub default_chunk_size: Option<usize>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub max_batch_size: Option<usize>,
|
|
}
|
|
|
|
impl ModelData {
|
|
pub fn new(name: &str) -> Self {
|
|
Self {
|
|
name: name.to_string(),
|
|
model_type: default_model_type(),
|
|
..Default::default()
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ProviderModels {
|
|
pub provider: String,
|
|
pub models: Vec<ModelData>,
|
|
}
|
|
|
|
fn default_model_type() -> String {
|
|
"chat".into()
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
|
pub enum ModelType {
|
|
Chat,
|
|
Embedding,
|
|
Reranker,
|
|
}
|
|
|
|
impl Display for ModelType {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
ModelType::Chat => write!(f, "chat"),
|
|
ModelType::Embedding => write!(f, "embedding"),
|
|
ModelType::Reranker => write!(f, "reranker"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl ModelType {
|
|
pub fn can_create_from_name(self) -> bool {
|
|
match self {
|
|
ModelType::Chat => true,
|
|
ModelType::Embedding => false,
|
|
ModelType::Reranker => true,
|
|
}
|
|
}
|
|
|
|
pub fn api_name(self) -> &'static str {
|
|
match self {
|
|
ModelType::Chat => "chat_completions",
|
|
ModelType::Embedding => "embeddings",
|
|
ModelType::Reranker => "rerank",
|
|
}
|
|
}
|
|
|
|
pub fn extract_patch(self, patch: &RequestPatch) -> Option<&ApiPatch> {
|
|
match self {
|
|
ModelType::Chat => patch.chat_completions.as_ref(),
|
|
ModelType::Embedding => patch.embeddings.as_ref(),
|
|
ModelType::Reranker => patch.rerank.as_ref(),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn stringify_option_value<T>(value: &Option<T>) -> String
|
|
where
|
|
T: Display,
|
|
{
|
|
match value {
|
|
Some(value) => value.to_string(),
|
|
None => "-".to_string(),
|
|
}
|
|
}
|