* Context compaction for long sessions.
*
* Pure functions for compaction logic. The session manager handles I/O,
* and after compaction the session is reloaded.
*/
import {
type AssistantMessage,
clampThinkingLevelForModel,
Effort,
type Message,
type MessageAttribution,
type Model,
type Usage,
} from "@oh-my-pi/pi-ai";
import { countTokens } from "@oh-my-pi/pi-natives";
import { logger, prompt } from "@oh-my-pi/pi-utils";
import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
import { ThinkingLevel } from "../thinking";
import type { AgentMessage, AgentTool } from "../types";
import type { CompactionEntry, SessionEntry } from "./entries";
import { type ConvertToLlm, convertToLlm, createBranchSummaryMessage, createCustomMessage } from "./messages";
import {
buildOpenAiNativeHistory,
getPreservedOpenAiRemoteCompactionData,
requestOpenAiRemoteCompaction,
requestRemoteCompaction,
shouldUseOpenAiRemoteCompaction,
withOpenAiRemoteCompactionPreserveData,
} from "./openai";
import autoHandoffThresholdFocusPrompt from "./prompts/auto-handoff-threshold-focus.md" with { type: "text" };
import compactionShortSummaryPrompt from "./prompts/compaction-short-summary.md" with { type: "text" };
import compactionSummaryPrompt from "./prompts/compaction-summary.md" with { type: "text" };
import compactionTurnPrefixPrompt from "./prompts/compaction-turn-prefix.md" with { type: "text" };
import compactionUpdateSummaryPrompt from "./prompts/compaction-update-summary.md" with { type: "text" };
import handoffDocumentPrompt from "./prompts/handoff-document.md" with { type: "text" };
import {
computeFileLists,
createFileOps,
extractFileOpsFromMessage,
type FileOperations,
SUMMARIZATION_SYSTEM_PROMPT,
serializeConversation,
upsertFileOperations,
} from "./utils";
export interface CompactionDetails {
readFiles: string[];
modifiedFiles: string[];
}
* Extract file operations from messages and previous compaction entries.
*/
function extractFileOperations(
messages: AgentMessage[],
entries: SessionEntry[],
prevCompactionIndex: number,
): FileOperations {
const fileOps = createFileOps();
if (prevCompactionIndex >= 0) {
const prevCompaction = entries[prevCompactionIndex] as CompactionEntry;
if (!prevCompaction.fromExtension && prevCompaction.details) {
const details = prevCompaction.details as CompactionDetails;
if (Array.isArray(details.readFiles)) {
for (const f of details.readFiles) fileOps.read.add(f);
}
if (Array.isArray(details.modifiedFiles)) {
for (const f of details.modifiedFiles) fileOps.edited.add(f);
}
}
}
for (const msg of messages) {
extractFileOpsFromMessage(msg, fileOps);
}
return fileOps;
}
* Extract AgentMessage from an entry if it produces one.
* Returns undefined for entries that don't contribute to LLM context.
*/
function getMessageFromEntry(entry: SessionEntry): AgentMessage | undefined {
if (entry.type === "message") {
return entry.message;
}
if (entry.type === "custom_message") {
return createCustomMessage(
entry.customType,
entry.content,
entry.display,
entry.details,
entry.timestamp,
entry.attribution,
);
}
if (entry.type === "branch_summary") {
return createBranchSummaryMessage(entry.summary, entry.fromId, entry.timestamp);
}
return undefined;
}
export interface CompactionResult<T = unknown> {
summary: string;
shortSummary?: string;
firstKeptEntryId: string;
tokensBefore: number;
details?: T;
preserveData?: Record<string, unknown>;
}
export interface CompactionSettings {
enabled: boolean;
strategy?: "context-full" | "handoff" | "shake" | "off";
thresholdPercent?: number;
thresholdTokens?: number;
reserveTokens: number;
keepRecentTokens: number;
autoContinue?: boolean;
remoteEnabled?: boolean;
remoteEndpoint?: string;
}
export const DEFAULT_COMPACTION_SETTINGS: CompactionSettings = {
enabled: true,
strategy: "context-full",
thresholdPercent: -1,
thresholdTokens: -1,
reserveTokens: 16384,
keepRecentTokens: 20000,
autoContinue: true,
remoteEnabled: true,
};
* Calculate total context tokens from usage.
* Uses the native totalTokens field when available, falls back to computing from components.
*/
export function calculateContextTokens(usage: Usage): number {
return usage.totalTokens || usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
}
export function calculatePromptTokens(usage: Usage): number {
const promptTokens = usage.input + usage.cacheRead + usage.cacheWrite;
if (promptTokens > 0) {
return promptTokens;
}
return calculateContextTokens(usage);
}
* Get usage from an assistant message if available.
* Skips aborted and error messages as they don't have valid usage data.
*/
function getAssistantUsage(msg: AgentMessage): Usage | undefined {
if (msg.role === "assistant" && "usage" in msg) {
const assistantMsg = msg as AssistantMessage;
if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
return assistantMsg.usage;
}
}
return undefined;
}
* Find the last non-aborted assistant message usage from session entries.
*/
export function getLastAssistantUsage(entries: SessionEntry[]): Usage | undefined {
for (let i = entries.length - 1; i >= 0; i--) {
const entry = entries[i];
if (entry.type === "message") {
const usage = getAssistantUsage(entry.message);
if (usage) return usage;
}
}
return undefined;
}
* Effective reserve: at least 15% of context window or the configured floor, whichever is larger.
*/
export function effectiveReserveTokens(contextWindow: number, settings: CompactionSettings): number {
return Math.max(Math.floor(contextWindow * 0.15), settings.reserveTokens);
}
* Check if compaction should trigger based on context usage.
*/
export function shouldCompact(contextTokens: number, contextWindow: number, settings: CompactionSettings): boolean {
if (!settings.enabled || settings.strategy === "off" || contextWindow <= 0) return false;
const thresholdTokens = resolveThresholdTokens(contextWindow, settings);
return contextTokens > thresholdTokens;
}
export function resolveThresholdTokens(contextWindow: number, settings: CompactionSettings): number {
const thresholdTokens = settings.thresholdTokens;
if (typeof thresholdTokens === "number" && Number.isFinite(thresholdTokens) && thresholdTokens > 0) {
return Math.min(contextWindow - 1, Math.max(1, thresholdTokens));
}
const thresholdPercent = settings.thresholdPercent;
if (typeof thresholdPercent !== "number" || !Number.isFinite(thresholdPercent) || thresholdPercent <= 0) {
return contextWindow - effectiveReserveTokens(contextWindow, settings);
}
const clampedThresholdPercent = Math.min(99, Math.max(1, thresholdPercent));
return Math.floor(contextWindow * (clampedThresholdPercent / 100));
}
* Image content has no tokenizer representation; charge a fixed estimate
* matching what providers typically bill for inline images.
*/
const IMAGE_TOKEN_ESTIMATE = 1200;
* Estimate token count for a message using cl100k_base via the native
* tokenizer. This is not Claude's first-party tokenizer (Anthropic doesn't
* publish one) but is within ~5–10% across English/code text.
*/
export function estimateTokens(message: AgentMessage): number {
const fragments: string[] = [];
let extra = 0;
if ((message as { role?: string }).role === "bashExecution") {
const bash = message as { command?: unknown; output?: unknown };
if (typeof bash.command === "string") fragments.push(bash.command);
if (typeof bash.output === "string") fragments.push(bash.output);
return fragments.length === 0 ? 0 : countTokens(fragments);
}
switch (message.role) {
case "user": {
const content = (message as { content: string | Array<{ type: string; text?: string }> }).content;
if (typeof content === "string") {
fragments.push(content);
} else if (Array.isArray(content)) {
for (const block of content) {
if (block.type === "text" && block.text) {
fragments.push(block.text);
}
}
}
break;
}
case "assistant": {
const assistant = message as AssistantMessage;
for (const block of assistant.content) {
if (block.type === "text") {
fragments.push(block.text);
} else if (block.type === "thinking") {
fragments.push(block.thinking);
} else if (block.type === "toolCall") {
fragments.push(block.name);
fragments.push(JSON.stringify(block.arguments));
}
}
break;
}
case "hookMessage":
case "toolResult": {
if (typeof message.content === "string") {
fragments.push(message.content);
} else {
for (const block of message.content) {
if (block.type === "text" && block.text) {
fragments.push(block.text);
} else if (block.type === "image") {
extra += IMAGE_TOKEN_ESTIMATE;
}
}
}
break;
}
case "branchSummary":
case "compactionSummary": {
fragments.push(message.summary);
break;
}
default:
return 0;
}
if (fragments.length === 0) return extra;
return extra + countTokens(fragments);
}
function estimateEntriesTokens(entries: SessionEntry[], startIndex: number, endIndex: number): number {
let total = 0;
for (let i = startIndex; i < endIndex; i++) {
const msg = getMessageFromEntry(entries[i]);
if (msg) {
total += estimateTokens(msg);
}
}
return total;
}
* Find valid cut points: indices of user, assistant, custom, or bashExecution messages.
* Never cut at tool results (they must follow their tool call).
* When we cut at an assistant message with tool calls, its tool results follow it
* and will be kept.
* BashExecutionMessage is treated like a user message (user-initiated context).
*/
function findValidCutPoints(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
const cutPoints: number[] = [];
for (let i = startIndex; i < endIndex; i++) {
const entry = entries[i];
switch (entry.type) {
case "message": {
const role = entry.message.role as string;
switch (role) {
case "bashExecution":
case "hookMessage":
case "branchSummary":
case "compactionSummary":
case "user":
case "assistant":
cutPoints.push(i);
break;
case "toolResult":
break;
}
break;
}
case "thinking_level_change":
case "model_change":
case "compaction":
case "branch_summary":
case "custom":
case "custom_message":
case "label":
}
if (entry.type === "branch_summary" || entry.type === "custom_message") {
cutPoints.push(i);
}
}
return cutPoints;
}
* Find the user message (or bashExecution) that starts the turn containing the given entry index.
* Returns -1 if no turn start found before the index.
* BashExecutionMessage is treated like a user message for turn boundaries.
*/
export function findTurnStartIndex(entries: SessionEntry[], entryIndex: number, startIndex: number): number {
for (let i = entryIndex; i >= startIndex; i--) {
const entry = entries[i];
if (entry.type === "branch_summary" || entry.type === "custom_message") {
return i;
}
if (entry.type === "message") {
const role = entry.message.role as string;
if (role === "user" || role === "bashExecution") {
return i;
}
}
}
return -1;
}
export interface CutPointResult {
firstKeptEntryIndex: number;
turnStartIndex: number;
isSplitTurn: boolean;
}
* Find the cut point in session entries that keeps approximately `keepRecentTokens`.
*
* Algorithm: Walk backwards from newest, accumulating estimated message sizes.
* Stop when we've accumulated >= keepRecentTokens. Cut at that point.
*
* Can cut at user OR assistant messages (never tool results). When cutting at an
* assistant message with tool calls, its tool results come after and will be kept.
*
* Returns CutPointResult with:
* - firstKeptEntryIndex: the entry index to start keeping from
* - turnStartIndex: if cutting mid-turn, the user message that started that turn
* - isSplitTurn: whether we're cutting in the middle of a turn
*
* Only considers entries between `startIndex` and `endIndex` (exclusive).
*/
export function findCutPoint(
entries: SessionEntry[],
startIndex: number,
endIndex: number,
keepRecentTokens: number,
): CutPointResult {
const cutPoints = findValidCutPoints(entries, startIndex, endIndex);
if (cutPoints.length === 0) {
return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
}
let accumulatedTokens = 0;
let cutIndex = cutPoints[0];
for (let i = endIndex - 1; i >= startIndex; i--) {
const entry = entries[i];
if (entry.type !== "message") continue;
const messageTokens = estimateTokens(entry.message);
accumulatedTokens += messageTokens;
if (accumulatedTokens >= keepRecentTokens) {
for (let c = 0; c < cutPoints.length; c++) {
if (cutPoints[c] >= i) {
cutIndex = cutPoints[c];
break;
}
}
break;
}
}
while (cutIndex > startIndex) {
const prevEntry = entries[cutIndex - 1];
if (prevEntry.type === "compaction") {
break;
}
if (prevEntry.type === "message") {
break;
}
cutIndex--;
}
const cutEntry = entries[cutIndex];
const isUserMessage = cutEntry.type === "message" && cutEntry.message.role === "user";
const turnStartIndex = isUserMessage ? -1 : findTurnStartIndex(entries, cutIndex, startIndex);
return {
firstKeptEntryIndex: cutIndex,
turnStartIndex,
isSplitTurn: !isUserMessage && turnStartIndex !== -1,
};
}
const SUMMARIZATION_PROMPT = prompt.render(compactionSummaryPrompt);
const UPDATE_SUMMARIZATION_PROMPT = prompt.render(compactionUpdateSummaryPrompt);
const SHORT_SUMMARY_PROMPT = prompt.render(compactionShortSummaryPrompt);
const HANDOFF_DOCUMENT_PROMPT = prompt.render(handoffDocumentPrompt);
export const AUTO_HANDOFF_THRESHOLD_FOCUS = prompt.render(autoHandoffThresholdFocusPrompt);
function formatAdditionalContext(context: string[] | undefined): string {
if (!context || context.length === 0) return "";
const lines = context.map(line => `- ${line}`).join("\n");
return `<additional-context>\n${lines}\n</additional-context>\n\n`;
}
* Maps the non-special `ThinkingLevel` values to their `Effort` counterparts.
* Exhaustive over the union; throws for `Off`/`Inherit` to surface logic
* errors in callers that forgot to filter those out. Never use a TS cast for
* this — `ThinkingLevel` is a string-union over distinct concepts (Off /
* Inherit are not Efforts), and a cast hides the contract.
*/
function effortFromThinkingLevel(level: ThinkingLevel): Effort {
switch (level) {
case ThinkingLevel.Minimal:
return Effort.Minimal;
case ThinkingLevel.Low:
return Effort.Low;
case ThinkingLevel.Medium:
return Effort.Medium;
case ThinkingLevel.High:
return Effort.High;
case ThinkingLevel.XHigh:
return Effort.XHigh;
case ThinkingLevel.Off:
case ThinkingLevel.Inherit:
throw new Error(`effortFromThinkingLevel: ${level} must be handled by caller`);
}
}
* Resolves the reasoning effort to send on a compaction LLM call.
*
* - Explicit `Off` → `undefined` (omit reasoning entirely; the user said no thinking).
* - `undefined` / `Inherit` → historical `Effort.High` default → clamped per model
* (preserves current behavior for users who never touched the dial).
* - Explicit effort → respect user choice → clamped per model.
*
* The clamp routes through `clampThinkingLevelForModel`, which returns
* `undefined` for models with `compat.supportsReasoningEffort: false`
* (e.g. `xai-oauth/grok-build`). That `undefined` then flows through to the
* openai-responses mapper where `modelOmitsReasoningEffort` short-circuits
* the wire param — no `requireSupportedEffort` throw.
*/
function resolveCompactionEffort(model: Model, level: ThinkingLevel | undefined): Effort | undefined {
if (level === ThinkingLevel.Off) return undefined;
const requested: Effort =
level === undefined || level === ThinkingLevel.Inherit ? Effort.High : effortFromThinkingLevel(level);
return clampThinkingLevelForModel(model, requested);
}
* Build the error thrown when an LLM summarization call ends with
* `stopReason === "error"`. Carries the provider's HTTP `errorStatus`
* onto a top-level `.status` field so callers (notably
* `AgentSession.#isCompactionAuthFailure`) can branch on 401/403 without
* regex-scraping `error.message`. The `auth_unavailable` synthetic
* (pi-native gateway) does not populate `errorStatus`, hence the legacy
* message-based check is still required upstream — see issue #986.
*/
function createSummarizationError(prefix: string, response: AssistantMessage): Error {
const error: Error & { status?: number } = new Error(`${prefix}: ${response.errorMessage || "Unknown error"}`);
if (response.errorStatus !== undefined) {
error.status = response.errorStatus;
}
return error;
}
* Generate a summary of the conversation using the LLM.
* If previousSummary is provided, uses the update prompt to merge.
*/
export interface SummaryOptions {
promptOverride?: string;
extraContext?: string[];
remoteEndpoint?: string;
remoteInstructions?: string;
initiatorOverride?: MessageAttribution;
metadata?: Record<string, unknown>;
convertToLlm?: ConvertToLlm;
* Optional telemetry handle. When provided, every LLM call emitted during
* compaction is wrapped in an OTEL chat span tagged with
* `pi.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
* or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
*/
telemetry?: AgentTelemetry;
* Active session thinking level. Threaded from `agent-session.ts` so
* compaction honors the user's `/model` thinking selection instead of
* silently overriding it with `Effort.High` (the historical default).
* `undefined` / `ThinkingLevel.Inherit` falls back to that historical
* default; `ThinkingLevel.Off` omits reasoning entirely. See
* `resolveCompactionEffort` for the conversion contract.
*/
thinkingLevel?: ThinkingLevel;
}
export async function generateSummary(
currentMessages: AgentMessage[],
model: Model,
reserveTokens: number,
apiKey: string,
signal?: AbortSignal,
customInstructions?: string,
previousSummary?: string,
options?: SummaryOptions,
): Promise<string> {
const maxTokens = Math.floor(0.8 * reserveTokens);
let basePrompt = previousSummary ? UPDATE_SUMMARIZATION_PROMPT : SUMMARIZATION_PROMPT;
if (options?.promptOverride) {
basePrompt = options.promptOverride;
}
if (customInstructions) {
basePrompt = `${basePrompt}\n\nAdditional focus: ${customInstructions}`;
}
const llmMessages = (options?.convertToLlm ?? convertToLlm)(currentMessages);
const conversationText = serializeConversation(llmMessages);
let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
if (previousSummary) {
promptText += `<previous-summary>\n${previousSummary}\n</previous-summary>\n\n`;
}
promptText += formatAdditionalContext(options?.extraContext);
promptText += basePrompt;
const summarizationMessages = [
{
role: "user" as const,
content: [{ type: "text" as const, text: promptText }],
timestamp: Date.now(),
},
];
if (options?.remoteEndpoint) {
const remote = await requestRemoteCompaction(
options.remoteEndpoint,
{
systemPrompt: SUMMARIZATION_SYSTEM_PROMPT,
prompt: promptText,
},
signal,
);
return remote.summary;
}
const response = await instrumentedCompleteSimple(
model,
{ systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
{
maxTokens,
signal,
apiKey,
reasoning: resolveCompactionEffort(model, options?.thinkingLevel),
initiatorOverride: options?.initiatorOverride,
metadata: options?.metadata,
},
{ telemetry: options?.telemetry, oneshotKind: "compaction_summary" },
);
if (response.stopReason === "error") {
throw createSummarizationError("Summarization failed", response);
}
const textContent = response.content
.filter((c): c is { type: "text"; text: string } => c.type === "text")
.map(c => c.text)
.join("\n");
return textContent;
}
export interface HandoffOptions {
systemPrompt: string[];
tools?: AgentTool<any>[];
customInstructions?: string;
convertToLlm?: ConvertToLlm;
initiatorOverride?: MessageAttribution;
metadata?: Record<string, unknown>;
* Optional telemetry handle. When provided, the handoff LLM call is
* wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "handoff"`.
*/
telemetry?: AgentTelemetry;
* Active session thinking level. Threaded from `agent-session.ts` so
* handoff generation honors the user's `/model` thinking selection
* instead of silently overriding it with `Effort.High`. See
* `resolveCompactionEffort` for the conversion contract.
*/
thinkingLevel?: ThinkingLevel;
}
export function renderHandoffPrompt(customInstructions?: string): string {
if (!customInstructions) return HANDOFF_DOCUMENT_PROMPT;
return prompt.render(handoffDocumentPrompt, {
additionalFocus: customInstructions,
});
}
export async function generateHandoff(
messages: AgentMessage[],
model: Model,
apiKey: string,
options: HandoffOptions,
signal?: AbortSignal,
): Promise<string> {
const llmMessages = (options.convertToLlm ?? convertToLlm)(messages);
const requestMessages: Message[] = [
...llmMessages,
{
role: "user",
content: [{ type: "text", text: renderHandoffPrompt(options.customInstructions) }],
attribution: "agent",
timestamp: Date.now(),
},
];
const response = await instrumentedCompleteSimple(
model,
{
systemPrompt: options.systemPrompt,
messages: requestMessages,
tools: options.tools,
},
{
apiKey,
signal,
reasoning: resolveCompactionEffort(model, options.thinkingLevel),
toolChoice: "none",
initiatorOverride: options.initiatorOverride,
metadata: options.metadata,
},
{ telemetry: options.telemetry, oneshotKind: "handoff" },
);
if (response.stopReason === "error") {
throw createSummarizationError("Handoff generation failed", response);
}
return response.content
.filter((c): c is { type: "text"; text: string } => c.type === "text")
.map(c => c.text)
.join("\n");
}
async function generateShortSummary(
recentMessages: AgentMessage[],
historySummary: string | undefined,
model: Model,
reserveTokens: number,
apiKey: string,
signal?: AbortSignal,
options?: SummaryOptions,
): Promise<string> {
const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
const llmMessages = (options?.convertToLlm ?? convertToLlm)(recentMessages);
const conversationText = serializeConversation(llmMessages);
let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
if (historySummary) {
promptText += `<previous-summary>\n${historySummary}\n</previous-summary>\n\n`;
}
promptText += formatAdditionalContext(options?.extraContext);
promptText += SHORT_SUMMARY_PROMPT;
if (options?.remoteEndpoint) {
const remote = await requestRemoteCompaction(
options.remoteEndpoint,
{
systemPrompt: SUMMARIZATION_SYSTEM_PROMPT,
prompt: promptText,
},
signal,
);
return remote.summary;
}
const response = await instrumentedCompleteSimple(
model,
{
systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT],
messages: [{ role: "user", content: [{ type: "text", text: promptText }], timestamp: Date.now() }],
},
{
maxTokens,
signal,
apiKey,
reasoning: resolveCompactionEffort(model, options?.thinkingLevel),
initiatorOverride: options?.initiatorOverride,
metadata: options?.metadata,
},
{ telemetry: options?.telemetry, oneshotKind: "compaction_short_summary" },
);
if (response.stopReason === "error") {
throw createSummarizationError("Short summary failed", response);
}
return response.content
.filter((c): c is { type: "text"; text: string } => c.type === "text")
.map(c => c.text)
.join("\n");
}
export interface CompactionPreparation {
firstKeptEntryId: string;
messagesToSummarize: AgentMessage[];
turnPrefixMessages: AgentMessage[];
recentMessages: AgentMessage[];
isSplitTurn: boolean;
tokensBefore: number;
previousSummary?: string;
previousPreserveData?: Record<string, unknown>;
fileOps: FileOperations;
settings: CompactionSettings;
}
export function prepareCompaction(
pathEntries: SessionEntry[],
settings: CompactionSettings,
): CompactionPreparation | undefined {
if (pathEntries.length > 0 && pathEntries[pathEntries.length - 1].type === "compaction") {
return undefined;
}
let prevCompactionIndex = -1;
for (let i = pathEntries.length - 1; i >= 0; i--) {
if (pathEntries[i].type === "compaction") {
prevCompactionIndex = i;
break;
}
}
const boundaryStart = prevCompactionIndex + 1;
const boundaryEnd = pathEntries.length;
const lastUsage = getLastAssistantUsage(pathEntries);
const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0;
let keepRecentTokens = settings.keepRecentTokens;
if (lastUsage) {
const estimatedTokens = estimateEntriesTokens(pathEntries, boundaryStart, boundaryEnd);
const promptTokens = calculatePromptTokens(lastUsage);
const ratio = estimatedTokens > 0 ? promptTokens / estimatedTokens : 0;
if (Number.isFinite(ratio) && ratio > 1) {
keepRecentTokens = Math.max(1, Math.floor(keepRecentTokens / ratio));
}
}
const cutPoint = findCutPoint(pathEntries, boundaryStart, boundaryEnd, keepRecentTokens);
const firstKeptEntry = pathEntries[cutPoint.firstKeptEntryIndex];
if (!firstKeptEntry?.id) {
return undefined;
}
const firstKeptEntryId = firstKeptEntry.id;
const historyEnd = cutPoint.isSplitTurn ? cutPoint.turnStartIndex : cutPoint.firstKeptEntryIndex;
const messagesToSummarize: AgentMessage[] = [];
for (let i = boundaryStart; i < historyEnd; i++) {
const msg = getMessageFromEntry(pathEntries[i]);
if (msg) messagesToSummarize.push(msg);
}
const turnPrefixMessages: AgentMessage[] = [];
if (cutPoint.isSplitTurn) {
for (let i = cutPoint.turnStartIndex; i < cutPoint.firstKeptEntryIndex; i++) {
const msg = getMessageFromEntry(pathEntries[i]);
if (msg) turnPrefixMessages.push(msg);
}
}
const recentMessages: AgentMessage[] = [];
for (let i = cutPoint.firstKeptEntryIndex; i < boundaryEnd; i++) {
const msg = getMessageFromEntry(pathEntries[i]);
if (msg) recentMessages.push(msg);
}
if (messagesToSummarize.length === 0 && turnPrefixMessages.length === 0) {
return undefined;
}
let previousSummary: string | undefined;
let previousPreserveData: Record<string, unknown> | undefined;
if (prevCompactionIndex >= 0) {
const prevCompaction = pathEntries[prevCompactionIndex] as CompactionEntry;
previousSummary = prevCompaction.summary;
previousPreserveData = prevCompaction.preserveData;
}
const fileOps = extractFileOperations(messagesToSummarize, pathEntries, prevCompactionIndex);
if (cutPoint.isSplitTurn) {
for (const msg of turnPrefixMessages) {
extractFileOpsFromMessage(msg, fileOps);
}
}
return {
firstKeptEntryId,
messagesToSummarize,
turnPrefixMessages,
recentMessages,
isSplitTurn: cutPoint.isSplitTurn,
tokensBefore,
previousSummary,
previousPreserveData,
fileOps,
settings,
};
}
const TURN_PREFIX_SUMMARIZATION_PROMPT = prompt.render(compactionTurnPrefixPrompt);
* Generate summaries for compaction using prepared data.
* Returns CompactionResult - SessionManager adds id/parentId when saving.
*
* @param preparation - Pre-calculated preparation from prepareCompaction()
* @param customInstructions - Optional custom focus for the summary
*/
export async function compact(
preparation: CompactionPreparation,
model: Model,
apiKey: string,
customInstructions?: string,
signal?: AbortSignal,
options?: SummaryOptions,
): Promise<CompactionResult> {
const {
firstKeptEntryId,
messagesToSummarize,
turnPrefixMessages,
recentMessages,
isSplitTurn,
tokensBefore,
previousSummary,
previousPreserveData,
fileOps,
settings,
} = preparation;
const summaryOptions: SummaryOptions = {
promptOverride: options?.promptOverride,
extraContext: options?.extraContext,
remoteEndpoint: settings.remoteEnabled === false ? undefined : settings.remoteEndpoint,
remoteInstructions: options?.remoteInstructions,
initiatorOverride: options?.initiatorOverride,
metadata: options?.metadata,
convertToLlm: options?.convertToLlm,
telemetry: options?.telemetry,
thinkingLevel: options?.thinkingLevel,
};
let preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, undefined);
if (settings.remoteEnabled !== false && shouldUseOpenAiRemoteCompaction(model)) {
const previousRemoteCompaction = getPreservedOpenAiRemoteCompactionData(previousPreserveData);
const remoteMessages = [...messagesToSummarize, ...turnPrefixMessages, ...recentMessages];
const previousReplacementHistory =
previousRemoteCompaction?.provider === model.provider
? previousRemoteCompaction.replacementHistory
: undefined;
const remoteHistory = buildOpenAiNativeHistory(
(summaryOptions.convertToLlm ?? convertToLlm)(remoteMessages),
model,
previousReplacementHistory,
);
if (remoteHistory.length > 0) {
try {
const remote = await requestOpenAiRemoteCompaction(
model,
apiKey,
remoteHistory,
summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
signal,
);
preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, remote);
} catch (err) {
logger.warn("OpenAI remote compaction failed, falling back to local summarization", {
error: err instanceof Error ? err.message : String(err),
model: model.id,
provider: model.provider,
});
}
}
}
let summary: string;
if (isSplitTurn && turnPrefixMessages.length > 0) {
const [historyResult, turnPrefixResult] = await Promise.all([
messagesToSummarize.length > 0
? generateSummary(
messagesToSummarize,
model,
settings.reserveTokens,
apiKey,
signal,
customInstructions,
previousSummary,
summaryOptions,
)
: Promise.resolve("No prior history."),
generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal, summaryOptions),
]);
summary = `${historyResult}\n\n---\n\n**Turn Context (split turn):**\n\n${turnPrefixResult}`;
} else if (messagesToSummarize.length > 0) {
summary = await generateSummary(
messagesToSummarize,
model,
settings.reserveTokens,
apiKey,
signal,
customInstructions,
previousSummary,
summaryOptions,
);
} else if (previousSummary) {
summary = previousSummary;
} else {
summary = "No prior history.";
}
const shortSummary = await generateShortSummary(
recentMessages,
summary,
model,
settings.reserveTokens,
apiKey,
signal,
{
extraContext: options?.extraContext,
remoteEndpoint: summaryOptions.remoteEndpoint,
initiatorOverride: summaryOptions.initiatorOverride,
metadata: summaryOptions.metadata,
telemetry: summaryOptions.telemetry,
thinkingLevel: options?.thinkingLevel,
},
);
const { readFiles, modifiedFiles } = computeFileLists(fileOps);
summary = upsertFileOperations(summary, readFiles, modifiedFiles);
if (!firstKeptEntryId) {
throw new Error("First kept entry has no ID - session may need migration");
}
return {
summary,
shortSummary,
firstKeptEntryId,
tokensBefore,
details: { readFiles, modifiedFiles } as CompactionDetails,
preserveData,
};
}
* Generate a summary for a turn prefix (when splitting a turn).
*/
async function generateTurnPrefixSummary(
messages: AgentMessage[],
model: Model,
reserveTokens: number,
apiKey: string,
signal?: AbortSignal,
options?: SummaryOptions,
): Promise<string> {
const maxTokens = Math.floor(0.5 * reserveTokens);
const llmMessages = (options?.convertToLlm ?? convertToLlm)(messages);
const conversationText = serializeConversation(llmMessages);
const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
const summarizationMessages = [
{
role: "user" as const,
content: [{ type: "text" as const, text: promptText }],
timestamp: Date.now(),
},
];
const response = await instrumentedCompleteSimple(
model,
{ systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
{
maxTokens,
signal,
apiKey,
reasoning: resolveCompactionEffort(model, options?.thinkingLevel),
initiatorOverride: options?.initiatorOverride,
metadata: options?.metadata,
},
{ telemetry: options?.telemetry, oneshotKind: "compaction_turn_prefix" },
);
if (response.stopReason === "error") {
throw createSummarizationError("Turn prefix summarization failed", response);
}
return response.content
.filter((c): c is { type: "text"; text: string } => c.type === "text")
.map(c => c.text)
.join("\n");
}