#!/usr/bin/env node
import { readFileSync, statSync } from "node:fs";
const SYS_OLD =
"You compress conversation history for a coding agent. Output one prose recap that preserves: " +
"the user's ORIGINAL OBJECTIVE (never paraphrase away nuance or negative constraints like 'do NOT do X'), " +
"all 'do not' / 'never' / 'avoid' instructions, decisions and conclusions reached, " +
"files inspected or modified, important tool results still relevant to ongoing work, " +
"and any open todos. Skip turn-by-turn play-by-play. No tool calls, no markdown headings, no SEARCH/REPLACE blocks — plain prose only.";
const SUMMARY_INSTRUCTION_OLD =
"Summarize the conversation above as plain prose. This summary replaces the original turns to free context — make it self-contained.";
const SUMMARY_INSTRUCTION_NEW =
"Summarize the conversation above as one self-contained prose recap. Preserve the user's " +
"ORIGINAL OBJECTIVE (never paraphrase away negative constraints like 'do NOT do X'), all " +
"'do not' / 'never' / 'avoid' instructions, decisions reached, files inspected or modified, " +
"tool results still relevant, and any open todos. Skip turn-by-turn play-by-play. " +
"Output plain prose only — no tool calls, no markdown headings, no SEARCH/REPLACE blocks.";
function approxTokens(s) {
return Math.ceil(s.length / 3.5);
}
function msgTokens(m) {
const c = typeof m.content === "string" ? m.content : JSON.stringify(m.content ?? "");
const tc = Array.isArray(m.tool_calls) ? JSON.stringify(m.tool_calls) : "";
return approxTokens(c) + approxTokens(tc) + 4;
}
function loadSession(path) {
return readFileSync(path, "utf8")
.split("\n")
.filter((l) => l.trim())
.map((l) => JSON.parse(l));
}
function pickFoldPoint(messages, headRatio = 0.7) {
const tokens = messages.map(msgTokens);
const total = tokens.reduce((a, b) => a + b, 0);
const headBudget = total * headRatio;
let cum = 0;
for (let i = 0; i < messages.length; i++) {
cum += tokens[i];
if (cum >= headBudget) {
return { boundary: i + 1, total, headTokens: cum };
}
}
return { boundary: messages.length, total, headTokens: total };
}
function sharedLeadingMessages(a, b) {
const n = Math.min(a.length, b.length);
for (let i = 0; i < n; i++) {
if (JSON.stringify(a[i]) !== JSON.stringify(b[i])) return i;
}
return n;
}
function tokenCountOf(messages) {
return messages.reduce((a, m) => a + msgTokens(m), 0);
}
function bench(path, agentSystem) {
const log = loadSession(path);
if (log.length < 10) return null;
const { boundary, total, headTokens } = pickFoldPoint(log);
const head = log.slice(0, boundary);
const recentMsg = log[boundary] ?? { role: "user", content: "next instruction" };
const mainAgentMsgs = [{ role: "system", content: agentSystem }, ...head, recentMsg];
const oldSummaryMsgs = [
{ role: "system", content: SYS_OLD },
...head,
{ role: "user", content: SUMMARY_INSTRUCTION_OLD },
];
const newSummaryMsgs = [
{ role: "system", content: agentSystem },
...head,
{ role: "user", content: SUMMARY_INSTRUCTION_NEW },
];
const oldShared = sharedLeadingMessages(mainAgentMsgs, oldSummaryMsgs);
const newShared = sharedLeadingMessages(mainAgentMsgs, newSummaryMsgs);
const oldCacheTokens = tokenCountOf(oldSummaryMsgs.slice(0, oldShared));
const newCacheTokens = tokenCountOf(newSummaryMsgs.slice(0, newShared));
const oldTotalTokens = tokenCountOf(oldSummaryMsgs);
const newTotalTokens = tokenCountOf(newSummaryMsgs);
return {
file: path,
sizeKB: Math.round(statSync(path).size / 1024),
logMessages: log.length,
foldBoundary: boundary,
totalLogTokens: total,
headTokens,
OLD: {
sharedMessages: oldShared,
cacheableTokens: oldCacheTokens,
totalRequestTokens: oldTotalTokens,
hitRatio: oldTotalTokens > 0 ? oldCacheTokens / oldTotalTokens : 0,
},
NEW: {
sharedMessages: newShared,
cacheableTokens: newCacheTokens,
totalRequestTokens: newTotalTokens,
hitRatio: newTotalTokens > 0 ? newCacheTokens / newTotalTokens : 0,
},
};
}
function fmtTokens(n) {
if (n >= 1000) return (n / 1000).toFixed(1) + "k";
return String(n);
}
function fmtPct(r) {
return (r * 100).toFixed(1) + "%";
}
const PRICE_INPUT = 3.0 / 1e6;
const PRICE_CACHE = 0.30 / 1e6;
function costOf(r) {
return r.cacheableTokens * PRICE_CACHE + (r.totalRequestTokens - r.cacheableTokens) * PRICE_INPUT;
}
const files = process.argv.slice(2);
if (files.length === 0) {
console.error("usage: bench-fold-cache-shape.mjs <session.jsonl> [more.jsonl ...]");
process.exit(2);
}
const REPRESENTATIVE_SYSTEM = "x".repeat(6000);
const results = files.map((f) => bench(f, REPRESENTATIVE_SYSTEM)).filter(Boolean);
console.log("\n=== Per-session fold cache shape ===");
for (const r of results) {
console.log(`\n${r.file}`);
console.log(
` ${r.logMessages} messages · total ${fmtTokens(r.totalLogTokens)} tok · head ${fmtTokens(r.headTokens)} tok (fold at msg ${r.foldBoundary})`,
);
console.log(
` OLD shape: shared ${r.OLD.sharedMessages} leading msgs → cache hit ${fmtTokens(r.OLD.cacheableTokens)} / ${fmtTokens(r.OLD.totalRequestTokens)} tok = ${fmtPct(r.OLD.hitRatio)} → $${costOf(r.OLD).toFixed(5)}`,
);
console.log(
` NEW shape: shared ${r.NEW.sharedMessages} leading msgs → cache hit ${fmtTokens(r.NEW.cacheableTokens)} / ${fmtTokens(r.NEW.totalRequestTokens)} tok = ${fmtPct(r.NEW.hitRatio)} → $${costOf(r.NEW).toFixed(5)}`,
);
const savings = costOf(r.OLD) - costOf(r.NEW);
const savingPct = costOf(r.OLD) > 0 ? savings / costOf(r.OLD) : 0;
console.log(` SAVING per fold: $${savings.toFixed(5)} (${fmtPct(savingPct)})`);
}
console.log("\n=== Aggregate ===");
const oldTotal = results.reduce((a, r) => a + costOf(r.OLD), 0);
const newTotal = results.reduce((a, r) => a + costOf(r.NEW), 0);
const oldCacheTok = results.reduce((a, r) => a + r.OLD.cacheableTokens, 0);
const newCacheTok = results.reduce((a, r) => a + r.NEW.cacheableTokens, 0);
const oldReqTok = results.reduce((a, r) => a + r.OLD.totalRequestTokens, 0);
const newReqTok = results.reduce((a, r) => a + r.NEW.totalRequestTokens, 0);
console.log(`folds simulated: ${results.length}`);
console.log(
`OLD: ${fmtTokens(oldCacheTok)} cached / ${fmtTokens(oldReqTok)} sent = ${fmtPct(oldCacheTok / oldReqTok)} hit · total $${oldTotal.toFixed(4)}`,
);
console.log(
`NEW: ${fmtTokens(newCacheTok)} cached / ${fmtTokens(newReqTok)} sent = ${fmtPct(newCacheTok / newReqTok)} hit · total $${newTotal.toFixed(4)}`,
);
console.log(
`aggregate saving: $${(oldTotal - newTotal).toFixed(4)} (${fmtPct((oldTotal - newTotal) / oldTotal)})`,
);