pub const THINK_BUF_MAX: usize = 64 * 1024;
pub struct ThinkStripper {
carry: String,
inside: bool,
}
impl Default for ThinkStripper {
fn default() -> Self {
Self::new()
}
}
impl ThinkStripper {
pub fn new() -> Self {
Self {
carry: String::new(),
inside: false,
}
}
pub fn buffered_bytes(&self) -> usize {
self.carry.len()
}
pub fn reset(&mut self) {
self.carry.clear();
self.inside = false;
}
pub fn feed(&mut self, delta: &str) -> String {
if self.carry.len() + delta.len() > THINK_BUF_MAX {
let mut flushed = std::mem::take(&mut self.carry);
flushed.push_str(delta);
if self.inside {
return String::new();
}
return flushed;
}
self.carry.push_str(delta);
let mut out = String::new();
self.drain_into(&mut out);
out
}
fn drain_into(&mut self, out: &mut String) {
loop {
if self.inside {
match find_close_tag(&self.carry) {
Some((_close_start, close_end)) => {
self.carry.drain(..close_end);
self.inside = false;
}
None => {
let keep = 11.min(self.carry.len());
let drop_end = self.carry.len() - keep;
let safe = prev_boundary(&self.carry, drop_end);
self.carry.drain(..safe);
return;
}
}
} else {
match find_open_tag(&self.carry) {
TagScan::None => {
out.push_str(&self.carry);
self.carry.clear();
return;
}
TagScan::Complete { start, end } => {
out.push_str(&self.carry[..start]);
self.carry.drain(..end);
self.inside = true;
}
TagScan::PartialAt(pos) => {
out.push_str(&self.carry[..pos]);
self.carry.drain(..pos);
return;
}
}
}
}
}
}
enum TagScan {
None,
Complete { start: usize, end: usize },
PartialAt(usize),
}
fn find_open_tag(s: &str) -> TagScan {
let mut search_start = 0;
while let Some(lt) = s[search_start..].find('<') {
let abs = search_start + lt;
let rest = &s[abs..];
if let Some(end) = parse_open_tag(rest) {
return TagScan::Complete {
start: abs,
end: abs + end,
};
}
let lower: String = rest.chars().map(|c| c.to_ascii_lowercase()).collect();
let could_be_partial = lower.len() < 9 && "<thinking".starts_with(lower.as_str())
|| lower.len() < 6 && "<think".starts_with(lower.as_str())
|| lower.starts_with("<think") && !lower.contains('>')
|| lower.starts_with("<thinking") && !lower.contains('>');
if could_be_partial {
return TagScan::PartialAt(abs);
}
search_start = abs + 1;
}
TagScan::None
}
fn parse_open_tag(s: &str) -> Option<usize> {
if !s.starts_with('<') {
return None;
}
let lower_head: String = s.chars().take(10).map(|c| c.to_ascii_lowercase()).collect();
let name_end = if lower_head.starts_with("<thinking") {
9
} else if lower_head.starts_with("<think") {
6
} else {
return None;
};
let after = &s[name_end..];
let first = after.chars().next()?;
if first == '>' {
return Some(name_end + 1);
}
if first.is_ascii_whitespace() {
if let Some(gt) = after.find('>') {
return Some(name_end + gt + 1);
}
}
None
}
fn find_close_tag(s: &str) -> Option<(usize, usize)> {
let lower: String = s.chars().map(|c| c.to_ascii_lowercase()).collect();
let p1 = lower
.find("</thinking>")
.map(|p| (p, p + "</thinking>".len()));
let p2 = lower.find("</think>").map(|p| (p, p + "</think>".len()));
match (p1, p2) {
(Some(a), Some(b)) => Some(if a.0 < b.0 { a } else { b }),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}
}
fn prev_boundary(s: &str, mut idx: usize) -> usize {
while idx > 0 && !s.is_char_boundary(idx) {
idx -= 1;
}
idx
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_tags_passes_through() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("hello world"), "hello world");
}
#[test]
fn complete_block_in_one_feed() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("a<think>secret</think>b"), "ab");
}
#[test]
fn tag_split_across_feeds() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("hello <thi"), "hello ");
assert_eq!(s.feed("nk>secret</think> world"), " world");
}
#[test]
fn utf8_boundary_at_feed_edge_no_panic() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("abc<thi"), "abc");
assert_eq!(s.feed("nk>密</think>你好"), "你好");
}
#[test]
fn case_insensitive_tag() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("<THINK>a</THINK>b"), "b");
let mut s2 = ThinkStripper::new();
assert_eq!(s2.feed("<Think>a</Think>b"), "b");
}
#[test]
fn thinking_tag_also_stripped() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("<thinking>a</thinking>b"), "b");
}
#[test]
fn tag_with_attributes() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("<think key=\"v\">a</think>b"), "b");
}
#[test]
fn unclosed_block_capped_at_buf_limit() {
let mut s = ThinkStripper::new();
let junk = "x".repeat(100_000);
let input = format!("<think>{}", junk);
let _ = s.feed(&input);
assert!(s.buffered_bytes() <= THINK_BUF_MAX);
}
#[test]
fn literal_angle_bracket_outside_tag_preserved() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("a < b > c"), "a < b > c");
}
#[test]
fn multiple_blocks() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("a<think>x</think>b<think>y</think>c"), "abc");
}
#[test]
fn reset_clears_stuck_inside_state() {
let mut s = ThinkStripper::new();
let _ = s.feed("prefix <think>still thinking when we got cut");
assert_eq!(
s.feed("hello from the next model"),
"",
"without reset, text leaks through the stuck inside=true state",
);
s.reset();
assert_eq!(
s.feed("hello from the next model"),
"hello from the next model"
);
}
#[test]
fn reset_from_pristine_state_is_a_noop() {
let mut s = ThinkStripper::new();
s.reset();
assert_eq!(s.feed("plain text"), "plain text");
}
#[test]
fn reset_clears_partial_carry_at_feed_boundary() {
let mut s = ThinkStripper::new();
assert_eq!(s.feed("hello <thi"), "hello ");
assert!(s.buffered_bytes() > 0);
s.reset();
assert_eq!(s.buffered_bytes(), 0);
assert_eq!(s.feed("not a tag: <3"), "not a tag: <3");
}
}