* M9 — recursively strip "ambient" unicode characters (zero-width joiner,
* BiDi overrides, RTL/LTR marks, BOM, replacement characters) from any
* string that came from the network. Mirrors legacy
* `recursivelySanitizeUnicode` (services/mcp/client.ts:1768).
*
* Why: an MCP server can advertise a tool name or description containing
* RTL override characters that flip how a description renders in the
* permission prompt — used in real-world phishing attacks. We strip the
* dangerous code points; visible non-ASCII text (CJK, accents) is kept.
*/
const STRIPPED_CODE_POINTS = new Set<string>([
"\u200B",
"\u200C",
"\u200D",
"\u200E",
"\u200F",
"\u202A",
"\u202B",
"\u202C",
"\u202D",
"\u202E",
"\u2066",
"\u2067",
"\u2068",
"\u2069",
"\uFEFF",
"\uFFFC",
"\uFFFD",
]);
export function sanitizeUnicodeString(s: string): string {
if (s.length === 0) return s;
let out = "";
for (const ch of s) {
if (STRIPPED_CODE_POINTS.has(ch)) continue;
out += ch;
}
return out;
}
export function recursivelySanitizeUnicode<T>(value: T): T {
if (typeof value === "string") {
return sanitizeUnicodeString(value) as unknown as T;
}
if (Array.isArray(value)) {
return value.map((v) => recursivelySanitizeUnicode(v)) as unknown as T;
}
if (value && typeof value === "object") {
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
out[sanitizeUnicodeString(k)] = recursivelySanitizeUnicode(v);
}
return out as unknown as T;
}
return value;
}