pub fn repair_tool_args(tool_name: &str, args: &str) -> String {
let pre = pre_escape_windows_paths_in_json(args);
if serde_json::from_str::<serde_json::Value>(&pre).is_ok() {
return pre;
}
let repaired = repair_json(&pre);
if serde_json::from_str::<serde_json::Value>(&repaired).is_ok() {
return repaired;
}
if tool_name == "edit_file" {
if let Some(v) = extract_edit_file_args(&pre) {
if let Ok(s) = serde_json::to_string(&v) {
return s;
}
}
}
let extracted = extract_json_fields(&pre);
if let Some(obj) = extracted.as_object() {
if !obj.is_empty() {
if let Ok(s) = serde_json::to_string(&extracted) {
return s;
}
}
}
args.to_string()
}
fn pre_escape_windows_paths_in_json(s: &str) -> String {
let chars: Vec<char> = s.chars().collect();
let n = chars.len();
let mut out = String::with_capacity(n + 16);
let mut i = 0;
while i < n {
if chars[i] != '"' {
out.push(chars[i]);
i += 1;
continue;
}
let body_start = i + 1;
let mut j = body_start;
while j < n {
if chars[j] == '\\' && j + 1 < n {
j += 2;
continue;
}
if chars[j] == '"' {
break;
}
j += 1;
}
let body_end = j.min(n);
let body: String = chars[body_start..body_end].iter().collect();
out.push('"');
if looks_like_windows_path(&body) {
rewrite_windows_path_body(&body, &mut out);
} else {
out.push_str(&body);
}
if body_end < n {
out.push('"');
i = body_end + 1;
} else {
i = body_end;
}
}
out
}
/// True iff `s` contains an **under-escaped** Windows drive-letter path
/// prefix (`[A-Za-z]:\` with a *single* backslash) in a path-shaped context.
///
/// Required context: the drive letter is at the start of the body,
/// or the byte before it is `\` (UNC long-path `\\?\D:\…`), `'`,
/// or `"` (quoted path literal embedded in code). Without this
fn looks_like_windows_path(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() < 3 {
return false;
}
for i in 0..bytes.len().saturating_sub(2) {
if !bytes[i].is_ascii_alphabetic() {
continue;
}
if bytes[i + 1] != b':' {
continue;
}
if bytes[i + 2] != b'\\' {
continue;
}
if bytes.get(i + 3) == Some(&b'\\') {
continue;
}
// Path-context guard: only accept at start of body, or
// immediately after a path-shaped delimiter. Everything
// else (whitespace, alpha, punctuation, JSON escapes) is
// a false-positive surface for prose content.
if i > 0 {
let prev = bytes[i - 1];
if !matches!(prev, b'\\' | b'"' | b'\'') {
continue;
}
}
return true;
}
false
}
/// Walk an already-extracted JSON string body (between but not
/// including the surrounding quotes) and double any bare `\X` that
/// `looks_like_windows_path` flagged as ambiguous, while leaving
/// already-escaped sequences alone.
fn rewrite_windows_path_body(body: &str, out: &mut String) {
let chars: Vec<char> = body.chars().collect();
let mut k = 0;
while k < chars.len() {
if chars[k] != '\\' {
out.push(chars[k]);
k += 1;
continue;
}
match chars.get(k + 1).copied() {
Some('\\') => {
// Already escaped — preserve both bytes.
out.push_str("\\\\");
k += 2;
}
Some(c @ ('"' | '/' | 'u')) => {
// JSON-legal escape unrelated to single-char ambiguity
// — preserve verbatim.
//
// `\u` is the JSON Unicode escape `\uXXXX` (always 6
// chars total, 4 hex digits follow). Unlike `\t`/`\n`/
// `\r`/`\b`/`\f` — single-letter shortcuts that a
// Windows path could naturally produce as
// backslash+letter — `\u` is unambiguous: a Windows
// path containing literal `\u` is impossible (drive
// letter + `:` + `\` then directory char; no shell or
// model would normalise a directory called "u…" to a
// `\u` glyph). Treating `\u` as ambiguous corrupted
// legitimate Unicode escapes inside drive-letter
// strings: `"D:A\foo"` → `"D:\\u0041\\foo"`
// decoded to literal `D:A\foo` instead of `D:A\foo`.
out.push('\\');
out.push(c);
k += 2;
}
Some(c @ ('t' | 'n' | 'r' | 'b' | 'f')) => {
// Ambiguous in Windows-path context: model meant a
// literal backslash, not a JSON escape. Double the
// backslash so the JSON parser decodes `\X` back to
// the two chars `\` and X.
out.push_str("\\\\");
out.push(c);
k += 2;
}
Some(other) => {
// Invalid JSON escape — leave for repair_json to fix.
out.push('\\');
out.push(other);
k += 2;
}
None => {
out.push('\\');
k += 1;
}
}
}
}
/// For each position in `chars`, true iff that char is structural
/// JSON (outside any string body). The surrounding `"` chars themselves
/// are considered structural; everything between them — including
/// escape pairs like `\"` and `\n` — is non-structural so structural
/// passes don't mistake string content for grammar.
fn structural_mask(chars: &[char]) -> Vec<bool> {
let mut mask = vec![true; chars.len()];
let mut in_string = false;
let mut i = 0;
while i < chars.len() {
if !in_string {
if chars[i] == '"' {
in_string = true;
}
i += 1;
continue;
}
if chars[i] == '\\' && i + 1 < chars.len() {
mask[i] = false;
mask[i + 1] = false;
i += 2;
continue;
}
if chars[i] == '"' {
in_string = false;
i += 1;
continue;
}
mask[i] = false;
i += 1;
}
mask
}
/// Attempt to repair common JSON issues from LLM output:
/// - Trailing commas before } or ]
/// - Single quotes instead of double quotes (outside of string values)
/// - Missing closing braces
/// - Unescaped newlines in strings
/// - Invalid backslash escapes
/// - Unquoted keys
/// - Missing commas between key-value pairs
/// - Markdown code fences
pub fn repair_json(s: &str) -> String {
let mut result = s.to_string();
// Fix invalid JSON backslash escapes: \. \( \) \| \w \d \s \+ \* etc.
// JSON only allows: \\ \" \/ \n \r \t \b \f \uXXXX
// Models often write regex like @app\.(get|post) which has \. — invalid in JSON.
// Fix by doubling the backslash: \. → \\. so JSON parses it as literal backslash + dot.
let valid_escapes = ['\\', '"', '/', 'n', 'r', 't', 'b', 'f', 'u'];
let chars: Vec<char> = result.chars().collect();
let mut fixed = String::with_capacity(result.len() + 20);
let mut i = 0;
while i < chars.len() {
if chars[i] == '\\' && i + 1 < chars.len() {
let next = chars[i + 1];
if valid_escapes.contains(&next) {
// Valid JSON escape — keep as-is
fixed.push('\\');
fixed.push(next);
i += 2;
} else {
// Invalid JSON escape (like \. \( \| \w \d \s \+ \*)
// Double the backslash so JSON parser sees \\ followed by the char
fixed.push('\\');
fixed.push('\\');
fixed.push(next);
i += 2;
}
} else {
fixed.push(chars[i]);
i += 1;
}
}
result = fixed;
// Remove leading/trailing whitespace and any markdown code fences
result = result.trim().to_string();
if result.starts_with("```json") {
result = result
.strip_prefix("```json")
.unwrap_or(&result)
.to_string();
}
if result.starts_with("```") {
result = result.strip_prefix("```").unwrap_or(&result).to_string();
}
if result.ends_with("```") {
result = result.strip_suffix("```").unwrap_or(&result).to_string();
}
result = result.trim().to_string();
// Replace single quotes with double quotes for keys/values
// Be careful not to break strings containing apostrophes
// Simple heuristic: replace ' at JSON structural positions
if !result.contains('"') && result.contains('\'') {
result = result.replace('\'', "\"");
}
let mut chars: Vec<char> = result.chars().collect();
let mut insertions = Vec::new();
let mut i = 0;
while i < chars.len() {
if chars[i] == '"' {
let j = i + 1;
let mut k = j;
while k < chars.len() && chars[k].is_whitespace() {
k += 1;
}
if k < chars.len() && chars[k] == '"' && k > j {
let mut q = k + 1;
while q < chars.len() && chars[q] != '"' {
q += 1;
}
if q + 1 < chars.len() {
let mut r = q + 1;
while r < chars.len() && chars[r].is_whitespace() {
r += 1;
}
if r < chars.len() && chars[r] == ':' {
insertions.push(j);
}
}
}
}
i += 1;
}
for pos in insertions.into_iter().rev() {
chars.insert(pos, ',');
}
result = chars.into_iter().collect();
let mut fixed = String::with_capacity(result.len() + 20);
let rchars: Vec<char> = result.chars().collect();
let mask = structural_mask(&rchars);
let mut ri = 0;
while ri < rchars.len() {
if mask[ri] && (rchars[ri] == '{' || rchars[ri] == ',') {
fixed.push(rchars[ri]);
ri += 1;
while ri < rchars.len() && rchars[ri].is_whitespace() {
fixed.push(rchars[ri]);
ri += 1;
}
if ri < rchars.len() && rchars[ri].is_alphanumeric() {
let key_start = ri;
while ri < rchars.len() && (rchars[ri].is_alphanumeric() || rchars[ri] == '_') {
ri += 1;
}
let mut ki = ri;
while ki < rchars.len() && rchars[ki].is_whitespace() {
ki += 1;
}
if ki < rchars.len() && rchars[ki] == ':' {
fixed.push('"');
for c in &rchars[key_start..ri] {
fixed.push(*c);
}
fixed.push('"');
} else {
for c in &rchars[key_start..ri] {
fixed.push(*c);
}
}
}
} else {
fixed.push(rchars[ri]);
ri += 1;
}
}
result = fixed;
loop {
let rchars: Vec<char> = result.chars().collect();
let mask = structural_mask(&rchars);
let mut next = String::with_capacity(rchars.len());
let mut i = 0;
let mut changed = false;
while i < rchars.len() {
if mask[i]
&& rchars[i] == ','
&& i + 1 < rchars.len()
&& mask[i + 1]
&& (rchars[i + 1] == '}' || rchars[i + 1] == ']')
{
next.push(rchars[i + 1]);
i += 2;
changed = true;
continue;
}
next.push(rchars[i]);
i += 1;
}
result = next;
if !changed {
break;
}
}
if !result.starts_with('{') && !result.starts_with('[') {
result = format!("{{{}}}", result);
}
let rchars: Vec<char> = result.chars().collect();
let mask = structural_mask(&rchars);
let mut open_braces = 0usize;
let mut close_braces = 0usize;
for (i, &c) in rchars.iter().enumerate() {
if !mask[i] {
continue;
}
if c == '{' {
open_braces += 1;
} else if c == '}' {
close_braces += 1;
}
}
for _ in 0..(open_braces.saturating_sub(close_braces)) {
result.push('}');
}
result
}
pub fn extract_json_fields(s: &str) -> serde_json::Value {
let mut map = serde_json::Map::new();
let chars: Vec<char> = s.chars().collect();
let len = chars.len();
let mut i = 0;
while i < len {
let key = if chars[i] == '"' {
let start = i + 1;
i = start;
while i < len && chars[i] != '"' {
i += 1;
}
if i >= len {
break;
}
let k: String = chars[start..i].iter().collect();
i += 1;
k
} else if chars[i].is_alphabetic() || chars[i] == '_' {
let start = i;
while i < len && (chars[i].is_alphanumeric() || chars[i] == '_') {
i += 1;
}
chars[start..i].iter().collect()
} else {
i += 1;
continue;
};
while i < len && chars[i].is_whitespace() {
i += 1;
}
if i >= len || chars[i] != ':' {
continue;
}
i += 1;
while i < len && chars[i].is_whitespace() {
i += 1;
}
if i >= len {
break;
}
if chars[i] == '"' {
let start = i + 1;
i = start;
while i < len && chars[i] != '"' {
if chars[i] == '\\' {
i += 1;
}
i += 1;
}
let raw: String = chars[start..i.min(len)].iter().collect();
let val = unescape_json_string_contents(&raw);
map.insert(key, serde_json::json!(val));
if i < len {
i += 1;
}
} else if chars[i] == 't' || chars[i] == 'f' {
// Boolean
let start = i;
while i < len && chars[i].is_alphabetic() {
i += 1;
}
let word: String = chars[start..i].iter().collect();
match word.as_str() {
"true" => {
map.insert(key, serde_json::json!(true));
}
"false" => {
map.insert(key, serde_json::json!(false));
}
_ => {
map.insert(key, serde_json::json!(word));
}
}
} else if chars[i].is_ascii_digit() || chars[i] == '-' {
// Number
let start = i;
while i < len && (chars[i].is_ascii_digit() || chars[i] == '.' || chars[i] == '-') {
i += 1;
}
let num_str: String = chars[start..i].iter().collect();
if let Ok(n) = num_str.parse::<i64>() {
map.insert(key, serde_json::json!(n));
} else if let Ok(f) = num_str.parse::<f64>() {
map.insert(key, serde_json::json!(f));
}
} else {
// Unquoted string value — read until , } ]
let start = i;
while i < len && !matches!(chars[i], ',' | '}' | ']' | '\n') {
i += 1;
}
let val: String = chars[start..i]
.iter()
.collect::<String>()
.trim()
.to_string();
if !val.is_empty() {
map.insert(key, serde_json::json!(val));
}
}
}
serde_json::Value::Object(map)
}
/// Specialized parser for edit_file arguments when JSON parsing fails.
/// Models often generate old_string/new_string with unescaped quotes/newlines.
/// This parser uses the known field order to extract content by position.
pub fn extract_edit_file_args(raw: &str) -> Option<serde_json::Value> {
let fp_marker = raw.find("\"file_path\"")?;
let old_marker = raw.find("\"old_string\"")?;
let new_marker = raw.find("\"new_string\"")?;
if old_marker <= fp_marker || new_marker <= old_marker {
return None;
}
// Extract file_path (simple quoted string before old_string)
let fp_region = &raw[fp_marker + 11..old_marker];
let fp_colon = fp_region.find(':')?;
let fp_val = fp_region[fp_colon + 1..]
.trim()
.trim_matches(|c| c == '"' || c == ',')
.trim();
if fp_val.is_empty() {
return None;
}
let file_path = fp_val.to_string();
// Extract old_string: everything between "old_string": " and ", "new_string"
let old_colon = raw[old_marker..].find(':')?;
let old_start = old_marker + old_colon + 1;
let old_raw = &raw[old_start..new_marker];
let old_string = unescape_field_value(old_raw);
// Extract new_string: everything after "new_string": " to the end
let new_colon = raw[new_marker..].find(':')?;
let new_start = new_marker + new_colon + 1;
let new_raw = &raw[new_start..];
let new_string = unescape_field_value_end(new_raw);
if old_string.is_empty() && new_string.is_empty() {
return None;
}
let replace_all = raw.contains("\"replace_all\"")
&& raw.rfind("true").map_or(false, |t| {
raw.rfind("\"replace_all\"").map_or(false, |r| t > r)
});
Some(serde_json::json!({
"file_path": file_path,
"old_string": old_string,
"new_string": new_string,
"replace_all": replace_all,
}))
}
fn unescape_field_value(raw: &str) -> String {
let t = raw.trim().trim_end_matches(',').trim();
let inner = if t.starts_with('"') { &t[1..] } else { t };
let inner = inner.trim_end_matches('"');
unescape_json_string_contents(inner)
}
fn unescape_field_value_end(raw: &str) -> String {
let t = raw.trim();
let inner = if t.starts_with('"') { &t[1..] } else { t };
let end = inner
.rfind("\", \"replace_all\"")
.or_else(|| inner.rfind("\"}"))
.or_else(|| inner.rfind("\"\n}"))
.unwrap_or(inner.len());
let content = &inner[..end];
unescape_json_string_contents(content)
}
fn unescape_json_string_contents(raw: &str) -> String {
let mut out = String::with_capacity(raw.len());
let mut chars = raw.chars();
while let Some(c) = chars.next() {
if c != '\\' {
out.push(c);
continue;
}
match chars.next() {
Some('\\') => out.push('\\'),
Some('"') => out.push('"'),
Some('/') => out.push('/'),
Some('n') => out.push('\n'),
Some('r') => out.push('\r'),
Some('t') => out.push('\t'),
Some('b') => out.push('\u{0008}'),
Some('f') => out.push('\u{000C}'),
Some(other) => {
out.push('\\');
out.push(other);
}
None => out.push('\\'),
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
// --- repair_json tests ---
#[test]
fn repair_trailing_comma() {
let input = r#"{"key": "value",}"#;
let repaired = repair_json(input);
let parsed: serde_json::Value =
serde_json::from_str(&repaired).expect("should be valid JSON");
assert_eq!(parsed["key"], "value");
}
#[test]
fn repair_single_quotes() {
let input = "{'key': 'value'}";
let repaired = repair_json(input);
let parsed: serde_json::Value =
serde_json::from_str(&repaired).expect("should be valid JSON");
assert_eq!(parsed["key"], "value");
}
#[test]
fn repair_missing_closing_brace() {
let input = r#"{"key": "value""#;
let repaired = repair_json(input);
let parsed: serde_json::Value =
serde_json::from_str(&repaired).expect("should be valid JSON");
assert_eq!(parsed["key"], "value");
}
#[test]
fn repair_unquoted_keys() {
let input = r#"{path: "src/main.rs"}"#;
let repaired = repair_json(input);
let parsed: serde_json::Value =
serde_json::from_str(&repaired).expect("should be valid JSON");
assert_eq!(parsed["path"], "src/main.rs");
}
#[test]
fn repair_invalid_backslash_escape() {
// \. is not a valid JSON escape — should be doubled to \\.
let input = r#"{"pattern": "app\.rs"}"#;
let repaired = repair_json(input);
let parsed: serde_json::Value =
serde_json::from_str(&repaired).expect("should be valid JSON after escape repair");
// After repair \. becomes \\. which JSON parses as literal backslash + dot
assert!(parsed["pattern"].as_str().unwrap().contains('.'));
}
#[test]
fn repair_missing_comma_between_fields() {
let input = r#"{"path": "src" "depth": 2}"#;
let repaired = repair_json(input);
// Should either parse or at least not panic
let _ = serde_json::from_str::<serde_json::Value>(&repaired);
}
#[test]
fn repair_markdown_fence_json() {
let input = "```json\n{\"key\": \"value\"}\n```";
let repaired = repair_json(input);
let parsed: serde_json::Value =
serde_json::from_str(&repaired).expect("should strip fences");
assert_eq!(parsed["key"], "value");
}
#[test]
fn repair_markdown_fence_no_lang() {
let input = "```\n{\"key\": \"value\"}\n```";
let repaired = repair_json(input);
let parsed: serde_json::Value =
serde_json::from_str(&repaired).expect("should strip fences");
assert_eq!(parsed["key"], "value");
}
// --- extract_json_fields tests ---
#[test]
fn extract_fields_basic_key_value() {
let input = r#"{"file_path": "/src/main.rs", "pattern": "hello"}"#;
let result = extract_json_fields(input);
assert_eq!(result["file_path"], "/src/main.rs");
assert_eq!(result["pattern"], "hello");
}
#[test]
fn extract_fields_boolean_values() {
let input = r#"{"recursive": true, "case_sensitive": false}"#;
let result = extract_json_fields(input);
assert_eq!(result["recursive"], true);
assert_eq!(result["case_sensitive"], false);
}
#[test]
fn extract_fields_bare_keys() {
let input = r#"{path: "/tmp/foo", depth: 3}"#;
let result = extract_json_fields(input);
assert_eq!(result["path"], "/tmp/foo");
}
// --- extract_edit_file_args tests ---
#[test]
fn extract_edit_file_standard_escaped_newlines() {
let input = r#"{"file_path": "/src/lib.rs", "old_string": "fn old(){\n}", "new_string": "fn new(){\n}"}"#;
let result = extract_edit_file_args(input).expect("should parse");
assert_eq!(result["file_path"], "/src/lib.rs");
// \n sequences in old_string/new_string get unescaped to real newlines
assert!(result["old_string"].as_str().unwrap().contains('\n'));
assert!(result["new_string"].as_str().unwrap().contains('\n'));
}
#[test]
fn extract_edit_file_returns_none_on_missing_markers() {
let input = r#"{"file_path": "/src/lib.rs"}"#;
assert!(extract_edit_file_args(input).is_none());
}
#[test]
fn extract_edit_file_replace_all_true() {
let input = r#"{"file_path": "/src/lib.rs", "old_string": "foo", "new_string": "bar", "replace_all": true}"#;
let result = extract_edit_file_args(input).expect("should parse");
assert_eq!(result["replace_all"], true);
}
// --- repair_tool_args tests ---
#[test]
fn repair_tool_args_passes_valid_json_through() {
let input = r#"{"file_path":"/tmp/a.rs","content":"x"}"#;
assert_eq!(repair_tool_args("write_file", input), input);
}
#[test]
fn repair_tool_args_fixes_fence_wrapped_json() {
let input = "```json\n{\"file_path\":\"/tmp/a.rs\",\"content\":\"x\"}\n```";
let out = repair_tool_args("write_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("should parse");
assert_eq!(v["file_path"], "/tmp/a.rs");
}
#[test]
fn repair_tool_args_keeps_empty_object_untouched() {
// Empty `{}` is valid JSON — we must not paper over it by inventing fields.
// Callers surface it as a user-visible error instead.
assert_eq!(repair_tool_args("write_file", "{}"), "{}");
}
#[test]
fn repair_tool_args_returns_original_when_unsalvageable() {
// Pure garbage with no extractable key=value pairs → return as-is so
// the tool emits the real parse error (not a misleading repaired stub).
let input = "!!!";
assert_eq!(repair_tool_args("write_file", input), "!!!");
}
// --- Windows-path unescape regression tests ---
//
// Properly-escaped Windows paths arrive in raw form as `\` `\` `t` (3 chars).
// The old `.replace("\\t", "\t")` chain mistakenly matched the literal "\t"
// formed by the second backslash + the t, turning `\\test` into `\<TAB>est`.
#[test]
fn extract_fields_windows_path_keeps_backslash_t() {
// JSON-legal: every Windows backslash doubled.
let input = r#"{"file_path": "D:\\work\\prj\\test-wsd\\run.py"}"#;
let result = extract_json_fields(input);
assert_eq!(
result["file_path"], "D:\\work\\prj\\test-wsd\\run.py",
"escaped backslashes must collapse to single backslashes, not produce TAB",
);
assert!(
!result["file_path"].as_str().unwrap().contains('\t'),
"no tab character should appear",
);
}
#[test]
fn extract_fields_unc_long_path_prefix() {
// \\?\D:\... long-path prefix, fully escaped → \\?\D:\test-wsd\run.py
let input = r#"{"file_path": "\\\\?\\D:\\test-wsd\\run.py"}"#;
let result = extract_json_fields(input);
assert_eq!(result["file_path"], "\\\\?\\D:\\test-wsd\\run.py");
}
#[test]
fn extract_fields_literal_backslash_n_preserved() {
// Raw `\` `\` `n` must decode to `\n` (backslash + n), not a newline —
// sequential `.replace` could swap order and produce a real newline here.
let input = r#"{"x": "a\\nb"}"#;
let result = extract_json_fields(input);
assert_eq!(result["x"], "a\\nb");
assert!(!result["x"].as_str().unwrap().contains('\n'));
}
#[test]
fn extract_fields_real_escapes_still_work() {
// Don't regress the intended behavior: \n → newline, \t → tab, \" → ".
let input = r#"{"a": "line1\nline2", "b": "col1\tcol2", "c": "say \"hi\""}"#;
let result = extract_json_fields(input);
assert_eq!(result["a"], "line1\nline2");
assert_eq!(result["b"], "col1\tcol2");
assert_eq!(result["c"], "say \"hi\"");
}
#[test]
fn extract_edit_file_windows_path_in_old_string() {
let input = r#"{"file_path": "/src/x.py", "old_string": "p = 'C:\\foo\\test.py'", "new_string": "p = 'C:\\foo\\bar.py'"}"#;
let result = extract_edit_file_args(input).expect("should parse");
assert_eq!(result["old_string"], "p = 'C:\\foo\\test.py'");
assert_eq!(result["new_string"], "p = 'C:\\foo\\bar.py'");
assert!(!result["old_string"].as_str().unwrap().contains('\t'));
}
#[test]
fn repair_tool_args_rescues_windows_path_in_valid_json() {
let input = "{\"file_path\": \"D:\\test\\foo.py\"}";
let out = repair_tool_args("read_file", input);
let v: serde_json::Value =
serde_json::from_str(&out).expect("should be valid JSON after pre-pass");
assert_eq!(v["file_path"], "D:\\test\\foo.py");
let s = v["file_path"].as_str().unwrap();
assert!(!s.contains('\t'), "tab must not appear: got {:?}", s);
assert!(!s.contains('\u{000C}'), "form feed must not appear: got {:?}", s);
}
#[test]
fn repair_tool_args_idempotent_on_correctly_escaped_path() {
let input = r#"{"file_path": "D:\\work\\app.py"}"#;
let out = repair_tool_args("read_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
assert_eq!(v["file_path"], "D:\\work\\app.py");
}
#[test]
fn repair_tool_args_preserves_unc_long_path_prefix() {
let input = r#"{"file_path": "\\\\?\\D:\\test-wsd\\run.py"}"#;
let out = repair_tool_args("read_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
assert_eq!(v["file_path"], "\\\\?\\D:\\test-wsd\\run.py");
}
#[test]
fn repair_tool_args_non_path_string_with_tab_preserved() {
let input = r#"{"category": "fast\ttab\nnewline"}"#;
let out = repair_tool_args("write_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
let s = v["category"].as_str().unwrap();
assert!(s.contains('\t'), "real \\t should remain a tab: {:?}", s);
assert!(s.contains('\n'), "real \\n should remain a newline: {:?}", s);
}
#[test]
fn repair_tool_args_word_ending_with_colon_then_backslash_is_not_path() {
let input = r#"{"label": "category:\nimportant"}"#;
let out = repair_tool_args("write_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
let s = v["label"].as_str().unwrap();
assert!(s.contains('\n'), "newline should survive: got {:?}", s);
assert!(!s.contains('\\'), "no literal backslash should remain: got {:?}", s);
}
/// Reverted-fix regression pin. A previous attempt added a
/// "skip if body contains `\n` or `\r` escape" guard to
/// `looks_like_windows_path` to defend content-with-embedded-
/// path bodies. It broke Windows paths whose own filenames
/// start with `n` or `r` — `D:\new`, `D:\node_modules`,
/// `D:\readme.txt`, `\nightly\foo`, etc. — because those
/// contain a `\` + `n` (or `\r`) byte pair that the guard
/// misread as a newline escape. Eval matrix went 14 → 27
/// before the revert.
///
/// Pin the loose-path case so any future "body shape" guard
/// has to keep it working.
#[test]
fn repair_tool_args_loose_windows_path_with_n_dir_name_still_rewrites() {
// Raw JSON: `{"file_path": "D:\new\foo.py"}` — model emits
// single-backslash Windows path with a directory called
// `new`. The bytes between the inner quotes are `D` `:`
// `\` `n` `e` `w` `\` `f` `o` `o` `.` `p` `y`. The pre-
// escape pass MUST double the `\n` and `\f` so the path
// round-trips, otherwise serde decodes `\n` → newline and
// the path turns into `D:<newline>ew<formfeed>oo.py`.
let input = "{\"file_path\": \"D:\\new\\foo.py\"}";
let out = repair_tool_args("read_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
let p = v["file_path"].as_str().unwrap();
assert_eq!(p, "D:\\new\\foo.py", "loose Windows path with `\\n` substring must round-trip; got {:?}", p);
assert!(!p.contains('\n'), "no real newline must leak through: {:?}", p);
assert!(!p.contains('\u{000C}'), "no form feed must leak through: {:?}", p);
}
/// Python source like `class A:\n pass\n` has a single
/// uppercase letter preceded by whitespace, then `:`, then
/// `\` from the JSON `\n` escape. The old tail-of-word guard
/// only rejected multi-letter words, so single-letter "names"
/// (class names, match arms, switch labels) slipped through
/// and every `\n`/`\t` in the file body got doubled, writing
/// the file as one line of literal `\n` characters. This is
/// the v4.23.2 tool-error regression — `notify.py` rewrites
/// turned into 1 line of garbage.
#[test]
fn repair_tool_args_single_letter_label_before_newline_is_not_path() {
let input = r#"{"file_path": "/tmp/notify.py", "content": "class A:\n pass\n"}"#;
let out = repair_tool_args("write_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
let content = v["content"].as_str().unwrap();
assert!(
content.contains('\n'),
"newline must survive — file becomes 1-line garbage otherwise: got {:?}",
content
);
assert!(
!content.contains("\\n"),
"literal backslash-n must not appear: got {:?}",
content
);
assert_eq!(content, "class A:\n pass\n");
}
#[test]
fn repair_tool_args_content_with_escaped_windows_path_keeps_newlines() {
// The Windows "审核" screenshot bug: a write_file whose CONTENT is a
// multi-line Python script that *references* a correctly-escaped
// Windows path (`C:\\Users\\…`). The path made looks_like_windows_path
// fire on the WHOLE content body, and rewrite_windows_path_body then
// doubled every real `\n` newline into a literal backslash-n — landing
// the 4-line script on disk as ONE line of broken Python (the
// `(813 bytes, 1 lines)` in the report), after which `python` exits 1
// and the agent loops forever "fixing the encoding".
//
// Now the gate only fires on UNDER-escaped (single-backslash) drive
// paths, so the already-`\\`-escaped path is left alone and the real
// newlines survive.
let input = r#"{"file_path":"D:\\atomcode\\read_excel.py","content":"import openpyxl\nimport os\nexcel_path = r'C:\\Users\\Administrator\\Desktop\\文章.xlsx'\nprint(os.path.exists(excel_path))\n"}"#;
let out = repair_tool_args("write_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
let content = v["content"].as_str().unwrap();
assert!(
content.contains('\n'),
"real newlines must survive — file becomes 1-line garbage otherwise: got {:?}",
content
);
assert!(
!content.contains("\\n"),
"no literal backslash-n must appear: got {:?}",
content
);
// The escaped path must still decode to single backslashes.
assert!(
content.contains(r"C:\Users\Administrator\Desktop\文章.xlsx"),
"embedded Windows path must round-trip: got {:?}",
content
);
assert_eq!(
content.lines().count(),
4,
"should be a 4-line script, not collapsed to 1: got {:?}",
content
);
}
#[test]
fn repair_tool_args_lowercase_drive_letter_recognized() {
// Lowercase `c:\` is also a valid Windows drive prefix.
let input = "{\"file_path\": \"c:\\users\\me\\file.txt\"}";
let out = repair_tool_args("read_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
assert_eq!(v["file_path"], "c:\\users\\me\\file.txt");
}
#[test]
fn repair_tool_args_windows_path_in_malformed_json_recovered() {
// Pre-pass + repair_json combined: trailing comma (parses-fail)
// AND single-backslash Windows path. Pre-pass fixes the path
// first, then repair_json strips the trailing comma.
let input = "{\"file_path\": \"D:\\test\\foo.py\",}";
let out = repair_tool_args("read_file", input);
let v: serde_json::Value =
serde_json::from_str(&out).expect("should recover via repair_json");
assert_eq!(v["file_path"], "D:\\test\\foo.py");
}
#[test]
fn repair_tool_args_edit_file_windows_path_in_old_string() {
// edit_file old_string contains a code snippet with a Windows
// path literal — the pre-pass must rewrite that literal so the
// generic JSON parser sees the model's intent.
let input = "{\"file_path\": \"/src/x.py\", \"old_string\": \"path = 'D:\\test'\", \"new_string\": \"path = 'D:\\prod'\"}";
let out = repair_tool_args("edit_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
assert_eq!(v["old_string"], "path = 'D:\\test'");
assert_eq!(v["new_string"], "path = 'D:\\prod'");
}
#[test]
fn repair_tool_args_windows_path_after_escaped_quote() {
let input = "{\"cmd\": \"run \\\"D:\\foo.exe\\\"\"}";
let out = repair_tool_args("bash", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
let s = v["cmd"].as_str().unwrap();
assert!(s.contains("D:\\foo.exe"), "Windows path inside quoted arg lost: {:?}", s);
assert!(!s.contains('\t'), "no tab corruption: {:?}", s);
}
#[test]
fn pre_escape_idempotent_under_double_application() {
let once = pre_escape_windows_paths_in_json(r#"{"p": "D:\\a\\b"}"#);
let twice = pre_escape_windows_paths_in_json(&once);
assert_eq!(once, twice, "pre_escape should be idempotent");
}
#[test]
fn pre_escape_preserves_unicode_escape_in_windows_path() {
let input = r#"{"file_path": "C:\\Users\\张三\\file.txt"}"#;
let out = repair_tool_args("read_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
assert_eq!(v["file_path"], "C:\\Users\\张三\\file.txt");
let s = v["file_path"].as_str().unwrap();
assert!(
!s.contains("\\u"),
"Unicode escape must not survive as literal `\\u`; got {s:?}",
);
}
#[test]
fn pre_escape_mixes_unicode_escape_with_ambiguous_letter() {
let input = "{\"file_path\": \"D:\\test\\u0041\\foo\"}";
let out = repair_tool_args("read_file", input);
let v: serde_json::Value = serde_json::from_str(&out).expect("valid JSON");
assert_eq!(v["file_path"], "D:\\testA\\foo");
}
#[test]
fn repair_json_brace_balance_ignores_braces_in_strings() {
let input = r#"{"old_string": "fn main() {"}"#;
let repaired = repair_json(input);
let v: serde_json::Value = serde_json::from_str(&repaired)
.unwrap_or_else(|e| panic!("brace balance should not over-close; got {repaired:?}: {e}"));
assert_eq!(v["old_string"], "fn main() {");
}
#[test]
fn repair_json_unquoted_key_does_not_quote_inside_string() {
let input = r#"{"outer": "snippet { class: foo }", "n": 1}"#;
let repaired = repair_json(input);
let v: serde_json::Value = serde_json::from_str(&repaired)
.unwrap_or_else(|e| panic!("unquoted-key fix must not touch string content; got {repaired:?}: {e}"));
assert_eq!(v["outer"], "snippet { class: foo }");
}
#[test]
fn repair_json_trailing_comma_skips_literal_inside_string() {
let input = r#"{"outer": "tail,}", "n": 1}"#;
let repaired = repair_json(input);
let v: serde_json::Value = serde_json::from_str(&repaired)
.unwrap_or_else(|e| panic!("trailing-comma replace must not touch strings; got {repaired:?}: {e}"));
assert_eq!(v["outer"], "tail,}");
}
#[test]
fn repair_json_handles_multiple_braces_in_source_string() {
let input = r#"{"old_string": "fn x() { if y { return z; } }", "k": 1}"#;
let repaired = repair_json(input);
let v: serde_json::Value = serde_json::from_str(&repaired)
.unwrap_or_else(|e| panic!("nested braces in string must not break repair; got {repaired:?}: {e}"));
assert_eq!(v["old_string"], "fn x() { if y { return z; } }");
}
#[test]
fn repair_json_unquoted_key_outside_string_still_works() {
let input = r#"{path: "src/main.rs", depth: 2}"#;
let repaired = repair_json(input);
let v: serde_json::Value = serde_json::from_str(&repaired)
.expect("legit unquoted keys must still be wrapped");
assert_eq!(v["path"], "src/main.rs");
assert_eq!(v["depth"], 2);
}
#[test]
fn repair_json_trailing_comma_outside_string_still_removed() {
let input = r#"{"k": "v",}"#;
let repaired = repair_json(input);
let v: serde_json::Value = serde_json::from_str(&repaired)
.expect("legit trailing comma must still be stripped");
assert_eq!(v["k"], "v");
}
}