use std::sync::OnceLock;
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthChar;
fn is_cjk_locale() -> bool {
static CJK: OnceLock<bool> = OnceLock::new();
*CJK.get_or_init(|| {
std::env::var("ATOMCODE_CJK_WIDTH")
.map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(false)
})
}
pub(crate) fn cell_char_width(ch: char) -> Option<usize> {
if is_cjk_locale() {
UnicodeWidthChar::width_cjk(ch)
} else {
UnicodeWidthChar::width(ch)
}
}
pub(crate) fn cluster_width(g: &str) -> usize {
let mut iter = g.chars();
let Some(first) = iter.next() else {
return 0;
};
if iter.clone().next().is_none() {
return cell_char_width(first).unwrap_or(0);
}
let mut has_emoji_marker = false;
let mut max_w = cell_char_width(first).unwrap_or(0);
for c in std::iter::once(first).chain(iter) {
match c {
'\u{200D}' | '\u{FE0F}' => has_emoji_marker = true,
'\u{1F3FB}'..='\u{1F3FF}' => has_emoji_marker = true,
'\u{1F1E6}'..='\u{1F1FF}' => has_emoji_marker = true,
_ => {}
}
let w = cell_char_width(c).unwrap_or(0);
if w > max_w {
max_w = w;
}
}
if has_emoji_marker {
2
} else {
max_w
}
}
pub fn display_width(s: &str) -> usize {
s.graphemes(true).map(cluster_width).sum()
}
pub fn wrap_line_to_width(line: &str, max_cols: usize) -> Vec<String> {
if max_cols == 0 || line.is_empty() {
return vec![line.to_string()];
}
let mut chunks: Vec<String> = Vec::new();
let mut current = String::new();
let mut cur_width = 0usize;
let bytes = line.as_bytes();
let mut i = 0;
while i < line.len() {
if bytes[i] == 0x1b {
let start = i;
i += 1;
while i < line.len() {
let c = bytes[i];
i += 1;
if c.is_ascii_alphabetic() || c == b'~' {
break;
}
}
current.push_str(&line[start..i]);
continue;
}
let next = line[i..]
.grapheme_indices(true)
.nth(1)
.map(|(idx, _)| idx + i)
.unwrap_or(line.len());
let g = &line[i..next];
let w = cluster_width(g);
if cur_width + w > max_cols && !current.is_empty() {
chunks.push(std::mem::take(&mut current));
cur_width = 0;
}
current.push_str(g);
cur_width += w;
i = next;
}
if !current.is_empty() {
chunks.push(current);
}
if chunks.is_empty() {
chunks.push(String::new());
}
chunks
}
pub fn wrap_with_cursor(
text: &str,
max_cols: usize,
cursor_byte: usize,
) -> (Vec<String>, usize, usize) {
if max_cols == 0 {
return (vec![String::new()], 0, 0);
}
let mut lines: Vec<String> = vec![String::new()];
let mut col = 0usize;
let mut cursor_row = 0usize;
let mut cursor_col = 0usize;
let mut cursor_set = false;
let cell_w = |g: &str| -> usize {
if g == "\t" {
crate::render::cell::SOFT_TAB_WIDTH
} else {
cluster_width(g)
}
};
for (byte_pos, g) in text.grapheme_indices(true) {
let is_newline = g == "\n";
if !is_newline {
let w = cell_w(g);
if col + w > max_cols && !lines.last().unwrap().is_empty() {
lines.push(String::new());
col = 0;
}
}
if !cursor_set && byte_pos == cursor_byte {
cursor_row = lines.len() - 1;
cursor_col = col;
cursor_set = true;
}
if is_newline {
lines.push(String::new());
col = 0;
} else {
let w = cell_w(g);
lines.last_mut().unwrap().push_str(g);
col += w;
}
}
if !cursor_set {
cursor_row = lines.len() - 1;
cursor_col = col;
}
(lines, cursor_row, cursor_col)
}
pub fn slice_cols(s: &str, start_col: usize, max_cols: usize) -> String {
let mut col = 0usize;
let mut acc = String::new();
let mut acc_w = 0usize;
for g in s.graphemes(true) {
let w = cluster_width(g);
if col + w <= start_col {
col += w;
} else if col < start_col {
col += w;
} else {
if acc_w + w > max_cols {
break;
}
acc.push_str(g);
acc_w += w;
col += w;
}
}
acc
}
pub fn truncate_to_width(s: &str, max_cols: usize) -> String {
if max_cols == 0 {
return String::new();
}
let mut acc = String::with_capacity(s.len());
let mut cols = 0usize;
for g in s.graphemes(true) {
let w = cluster_width(g);
if cols + w > max_cols {
break;
}
acc.push_str(g);
cols += w;
}
acc
}
pub fn truncate_with_ellipsis(s: &str, max_cols: usize) -> String {
if max_cols == 0 {
return String::new();
}
if display_width(s) <= max_cols {
return s.to_string();
}
let budget = max_cols.saturating_sub(1).max(1);
let mut acc = truncate_to_width(s, budget);
acc.push('…');
acc
}
pub fn truncate_path(path: &str, max_cols: usize) -> String {
if max_cols == 0 {
return String::new();
}
if display_width(path) <= max_cols {
return path.to_string();
}
let last_sep = path.rfind(|c: char| c == '/' || c == '\\');
let last_segment = match last_sep {
Some(i) => &path[i + 1..],
None => path, // no separator — the whole string is the "segment"
};
// Build the candidate: ".../" + last_segment
let ellipsis_prefix = ".../";
let candidate = format!("{}{}", ellipsis_prefix, last_segment);
if display_width(&candidate) <= max_cols {
return candidate;
}
// Last segment is too long even with ".../" prefix — truncate it.
// Reserve width for ".../" (4 cols).
let prefix_w = display_width(ellipsis_prefix);
let budget = max_cols.saturating_sub(prefix_w).max(1);
let truncated_last = truncate_to_width(last_segment, budget);
format!("{}{}", ellipsis_prefix, truncated_last)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ascii_width_equals_len() {
assert_eq!(display_width("hello"), 5);
}
#[test]
fn cjk_char_is_width_two() {
assert_eq!(display_width("你好"), 4);
assert_eq!(display_width("a你b"), 4); // 1 + 2 + 1
}
#[test]
fn emoji_width_is_two() {
assert_eq!(display_width("👍"), 2);
}
#[test]
fn truncate_to_width_respects_boundary() {
// 15-char ASCII input, limit width 5 → first 5 chars
assert_eq!(truncate_to_width("hello world", 5), "hello");
}
#[test]
fn truncate_to_width_cjk_never_splits_char() {
// "你好world" = 2+2+1+1+1+1+1 = 9 cols; limit 3 → "你" (width 2), not "你\xXX"
let out = truncate_to_width("你好world", 3);
assert_eq!(out, "你");
assert_eq!(display_width(&out), 2);
}
#[test]
fn truncate_to_width_zero_width_safe() {
assert_eq!(truncate_to_width("abc", 0), "");
}
#[test]
fn truncate_to_width_exact_fit() {
assert_eq!(truncate_to_width("你好", 4), "你好");
}
#[test]
fn truncate_to_width_preserves_under_limit() {
assert_eq!(truncate_to_width("hi", 10), "hi");
}
#[test]
fn slice_cols_window_midway() {
// "abcdefghij" start 3, width 4 → "defg"
assert_eq!(slice_cols("abcdefghij", 3, 4), "defg");
}
#[test]
fn slice_cols_cjk_straddle_skipped() {
// "你好world" = 2+2+1+1+1+1+1. start_col=1 straddles "你" → skip it.
// Then start at col 2 with 4 cols → "好wo".
assert_eq!(slice_cols("你好world", 1, 4), "好wo");
}
#[test]
fn slice_cols_past_end_empty() {
assert_eq!(slice_cols("abc", 10, 5), "");
}
#[test]
fn slice_cols_start_zero_matches_truncate() {
assert_eq!(slice_cols("hello world", 0, 5), "hello");
}
#[test]
fn wrap_with_cursor_short_text_single_row() {
let (lines, r, c) = wrap_with_cursor("hi", 10, 2);
assert_eq!(lines, vec!["hi".to_string()]);
assert_eq!((r, c), (0, 2));
}
#[test]
fn wrap_with_cursor_overflow_moves_to_next_row() {
let (lines, r, c) = wrap_with_cursor("abcdef", 3, 3);
assert_eq!(lines, vec!["abc".to_string(), "def".to_string()]);
// cursor at byte 3 (between abc and def) → start of row 1
assert_eq!((r, c), (1, 0));
}
#[test]
fn wrap_with_cursor_honours_explicit_newline() {
let (lines, r, c) = wrap_with_cursor("ab\ncd", 10, 4);
assert_eq!(lines, vec!["ab".to_string(), "cd".to_string()]);
assert_eq!((r, c), (1, 1));
}
#[test]
fn wrap_with_cursor_end_of_buffer() {
let (lines, r, c) = wrap_with_cursor("hello", 10, 5);
assert_eq!(lines, vec!["hello".to_string()]);
assert_eq!((r, c), (0, 5));
}
#[test]
fn wrap_with_cursor_cjk_widths() {
// "你好" = 4 cols. max=3 → wraps after "你" (width 2 fits, next
// char 好 (w=2) would overflow 2+2=4>3, so wrap).
let (lines, _, _) = wrap_with_cursor("你好", 3, 0);
assert_eq!(lines, vec!["你".to_string(), "好".to_string()]);
}
#[test]
fn wrap_with_cursor_tab_counts_as_soft_tab_width() {
// Pasted, tab-indented code: a `\t` is DRAWN as SOFT_TAB_WIDTH (4)
// columns by `push_str_cells`, so the cursor-column model must agree.
// Regression: tabs were measured as 0 cols, so the caret rendered
// SOFT_TAB_WIDTH columns left of the real insertion point on every
// tab-indented line.
let tab_w = crate::render::cell::SOFT_TAB_WIDTH;
// Two physical lines; the 2nd is indented with two tabs then "cd".
let text = "ab\n\t\tcd";
let (lines, row, col) = wrap_with_cursor(text, 80, text.len());
assert_eq!(lines.len(), 2);
assert_eq!(row, 1, "cursor on the 2nd line");
// end-of-line column = 2 tabs * tab_w + width("cd")
assert_eq!(col, tab_w * 2 + 2);
}
// --- truncate_path tests ---
#[test]
fn truncate_path_short_path_unchanged() {
// Path fits within max_cols → returned as-is.
assert_eq!(truncate_path("~/foo", 20), "~/foo");
}
#[test]
fn truncate_path_keeps_last_segment() {
// Long path: keep last segment with ".../" prefix.
assert_eq!(
truncate_path("~/Documents/WPSDrive/NotLoginPage", 20),
".../NotLoginPage"
);
}
#[test]
fn truncate_path_exact_fit() {
// ".../NotLoginPage" = 16 cols. At max_cols = 16 it should fit.
assert_eq!(
truncate_path("~/Documents/WPSDrive/NotLoginPage", 16),
".../NotLoginPage"
);
}
#[test]
fn truncate_path_very_tight_budget() {
// Even a single-char last segment + ".../" = 5 cols should fit.
assert_eq!(truncate_path("~/a/b/c", 6), ".../c");
}
#[test]
fn truncate_path_last_segment_too_long() {
// Last segment itself exceeds budget after ".../" prefix.
// ".../" = 4 cols, budget for last segment = 10 - 4 = 6 cols.
// "NotLoginPage" = 12 cols → truncated to 6 cols.
assert_eq!(
truncate_path("~/Documents/WPSDrive/NotLoginPage", 10),
".../NotLog"
);
}
#[test]
fn truncate_path_no_separator() {
// No path separators → treat entire string as the "last segment".
// "verylongname" = 12 cols, max 8 → ".../" + 4 cols of name.
assert_eq!(truncate_path("verylongname", 8), ".../very");
}
#[test]
fn truncate_path_windows_backslash() {
// Windows paths with backslash separators.
assert_eq!(
truncate_path(r"~\Documents\WPSDrive\NotLoginPage", 20),
".../NotLoginPage"
);
}
#[test]
fn truncate_path_zero_cols() {
assert_eq!(truncate_path("~/foo", 0), "");
}
#[test]
fn truncate_path_cjk_segment() {
// CJK project name: "项目" = 4 cols, ".../项目" = 8 cols.
assert_eq!(
truncate_path("~/Documents/工作/项目", 20),
".../项目"
);
}
#[test]
fn truncate_path_cjk_tight_budget() {
// "项目" = 4 cols, ".../" = 4 cols, total = 8.
assert_eq!(truncate_path("~/a/b/项目", 8), ".../项目");
}
#[test]
fn wrap_line_to_width_truecolor_sgr_passthrough_zero_width() {
// Truecolor open `\x1b[38;2;198;120;221m` is 18 bytes of escape sequence.
// If the SGR-passthrough loop ever stops handling it correctly, those
// bytes leak into column accounting and downstream wrapping shatters.
// Pin the invariant: the visible content `let x = 1;` is 10 cols, so
// it must fit in a 10-col budget with no wrap.
let tinted = "\x1b[38;2;198;120;221mlet\x1b[23;39m x = 1;";
let chunks = wrap_line_to_width(tinted, 10);
assert_eq!(chunks.len(), 1, "must not wrap when visible width fits, got: {:?}", chunks);
// The tinted line is returned verbatim — escapes still present.
assert!(chunks[0].contains("\x1b[38;2;198;120;221m"));
}
#[test]
fn wrap_line_to_width_truecolor_with_italic_passthrough() {
// `\x1b[3;38;2;124;132;153m` is the COMMENT SGR — 3 (italic) plus
// truecolor fg. Same passthrough guarantee.
let tinted = "\x1b[3;38;2;124;132;153m// comment\x1b[23;39m";
let chunks = wrap_line_to_width(tinted, 10);
assert_eq!(chunks.len(), 1);
}
// --- grapheme-cluster width tests ---
//
// These tests pin terminal-display semantics for multi-codepoint emoji
// clusters: ZWJ sequences (family emoji), VS-16 (text→emoji presentation)
// and skin-tone modifiers are rendered by modern terminals (iTerm2,
// Kitty, WezTerm, recent Terminal.app) as a single emoji of width 2.
// Summing per-char widths gives the wrong answer; cursor placement and
// truncation must use the cluster-aggregated width.
#[test]
fn display_width_zwj_family_is_one_emoji() {
// 👨👩👦 = U+1F468 U+200D U+1F469 U+200D U+1F466
// Sum of per-char widths = 2+0+2+0+2 = 6. As one ZWJ-joined emoji = 2.
let family = "👨\u{200D}👩\u{200D}👦";
assert_eq!(display_width(family), 2);
}
#[test]
fn display_width_heart_with_vs16_is_emoji_width() {
// ❤️ = U+2764 (width 1, text presentation) + U+FE0F (VS-16, forces
// emoji presentation). Rendered as an emoji = width 2.
assert_eq!(display_width("❤\u{FE0F}"), 2);
}
#[test]
fn display_width_emoji_with_skin_tone_modifier() {
// 👍🏽 = U+1F44D U+1F3FD. Sum = 4; cluster = 2.
assert_eq!(display_width("👍\u{1F3FD}"), 2);
}
#[test]
fn truncate_to_width_does_not_split_zwj_cluster() {
// Cluster has display width 2. Budget of 2 must accept the whole
// cluster, not return "👨\u{200D}" (a broken cluster — the trailing
// ZWJ leaks into whatever string is concatenated after).
let family = "👨\u{200D}👩\u{200D}👦";
assert_eq!(truncate_to_width(family, 2), family);
}
#[test]
fn truncate_to_width_drops_cluster_that_does_not_fit() {
// Budget < cluster width → drop the whole cluster (don't emit half).
let family = "👨\u{200D}👩\u{200D}👦";
assert_eq!(truncate_to_width(family, 1), "");
}
#[test]
fn wrap_line_to_width_does_not_split_zwj_cluster() {
let family = "👨\u{200D}👩\u{200D}👦";
let chunks = wrap_line_to_width(family, 2);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], family);
}
#[test]
fn wrap_line_to_width_pushes_zwj_cluster_to_next_chunk() {
let family = "👨\u{200D}👩\u{200D}👦";
let input = format!("ab{family}");
let chunks = wrap_line_to_width(&input, 2);
assert_eq!(chunks.len(), 2);
assert_eq!(chunks[0], "ab");
assert_eq!(chunks[1], family);
}
#[test]
fn wrap_line_to_width_sgr_passthrough_still_works() {
let tinted = "\x1b[38;2;198;120;221mhello\x1b[0m world";
let chunks = wrap_line_to_width(tinted, 5);
assert!(chunks[0].contains("\x1b[38;2;198;120;221m"));
assert!(chunks[0].contains("hello"));
let h_pos = chunks[0].find('h').unwrap();
let sgr_pos = chunks[0].find("\x1b[").unwrap();
assert!(sgr_pos < h_pos, "SGR must precede 'hello' in chunk 0");
}
#[test]
fn slice_cols_does_not_split_zwj_cluster() {
let family = "👨\u{200D}👩\u{200D}👦";
let input = format!("ab{family}cd");
let out = slice_cols(&input, 2, 2);
assert_eq!(out, family);
}
#[test]
fn slice_cols_cluster_straddling_start_is_skipped() {
let family = "👨\u{200D}👩\u{200D}👦";
let input = format!("ab{family}cd");
let out = slice_cols(&input, 3, 2);
assert_eq!(out, "cd");
}
#[test]
fn wrap_with_cursor_does_not_split_zwj_cluster() {
let family = "👨\u{200D}👩\u{200D}👦";
let input = format!("a{family}");
let cursor_byte = input.len();
let (lines, r, _c) = wrap_with_cursor(&input, 3, cursor_byte);
assert_eq!(lines.len(), 1);
assert_eq!(lines[0], input);
assert_eq!(r, 0);
}
#[test]
fn wrap_with_cursor_pushes_zwj_cluster_to_new_row() {
let family = "👨\u{200D}👩\u{200D}👦";
let input = format!("ab{family}");
let (lines, _r, _c) = wrap_with_cursor(&input, 2, 0);
assert_eq!(lines.len(), 2);
assert_eq!(lines[0], "ab");
assert_eq!(lines[1], family);
}
}