From e7bb668ac7a177a656462164d6726e34704e4df2 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Tue, 19 May 2026 12:25:53 -0600 Subject: [PATCH] fix: update the estimate_token_length function to use the standard word count method --- src/utils/mod.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 8735a39..457190d 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -34,7 +34,6 @@ use is_terminal::IsTerminal; use std::borrow::Cow; use std::sync::LazyLock; use std::{cmp, env, path::PathBuf, process}; -use unicode_segmentation::UnicodeSegmentation; pub static CODE_BLOCK_RE: LazyLock = LazyLock::new(|| Regex::new(r"(?ms)```\w*(.*)```").unwrap()); @@ -74,21 +73,8 @@ pub fn parse_bool(value: &str) -> Option { } pub fn estimate_token_length(text: &str) -> usize { - let words: Vec<&str> = text.unicode_words().collect(); - let mut output: f32 = 0.0; - for word in words { - if word.is_ascii() { - output += 1.3; - } else { - let count = word.chars().count(); - if count == 1 { - output += 1.0 - } else { - output += (count as f32) * 0.5; - } - } - } - output.ceil() as usize + let weighted: usize = text.chars().map(|c| if c.is_ascii() { 1 } else { 2 }).sum(); + weighted.div_ceil(4) } pub fn strip_think_tag(text: &str) -> Cow<'_, str> {