From e7bb668ac7a177a656462164d6726e34704e4df2 Mon Sep 17 00:00:00 2001
From: Alex Clarke <alex.j.tusa@gmail.com>
Date: Tue, 19 May 2026 12:25:53 -0600
Subject: [PATCH] fix: update the estimate_token_length function to use the
 standard word count method

---
 src/utils/mod.rs | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 8735a39..457190d 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -34,7 +34,6 @@ use is_terminal::IsTerminal;
 use std::borrow::Cow;
 use std::sync::LazyLock;
 use std::{cmp, env, path::PathBuf, process};
-use unicode_segmentation::UnicodeSegmentation;
 
 pub static CODE_BLOCK_RE: LazyLock<Regex> =
     LazyLock::new(|| Regex::new(r"(?ms)```\w*(.*)```").unwrap());
@@ -74,21 +73,8 @@ pub fn parse_bool(value: &str) -> Option<bool> {
 }
 
 pub fn estimate_token_length(text: &str) -> usize {
-    let words: Vec<&str> = text.unicode_words().collect();
-    let mut output: f32 = 0.0;
-    for word in words {
-        if word.is_ascii() {
-            output += 1.3;
-        } else {
-            let count = word.chars().count();
-            if count == 1 {
-                output += 1.0
-            } else {
-                output += (count as f32) * 0.5;
-            }
-        }
-    }
-    output.ceil() as usize
+    let weighted: usize = text.chars().map(|c| if c.is_ascii() { 1 } else { 2 }).sum();
+    weighted.div_ceil(4)
 }
 
 pub fn strip_think_tag(text: &str) -> Cow<'_, str> {