* Test context overflow error handling across providers.
*
* Context overflow occurs when the input (prompt + history) exceeds
* the model's context window. This is different from output token limits.
*
* Expected behavior: All providers should return stopReason: "error"
* with an errorMessage that indicates the context was too large,
* OR (for z.ai) return successfully with usage.input > contextWindow.
*
* The isContextOverflow() function must return true for all providers.
*/
import { afterAll, beforeAll, describe, expect, it } from "bun:test";
import type { ChildProcess } from "node:child_process";
import { execSync, spawn } from "node:child_process";
import { getBundledModel } from "@oh-my-pi/pi-ai/models";
import { complete } from "@oh-my-pi/pi-ai/stream";
import type { AssistantMessage, Context, Model, Usage } from "@oh-my-pi/pi-ai/types";
import { isContextOverflow } from "@oh-my-pi/pi-ai/utils/overflow";
import { $which } from "@oh-my-pi/pi-utils";
import { e2eApiKey, resolveApiKey } from "./oauth";
const oauthTokens = await Promise.all([
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
resolveApiKey("openai-codex"),
]);
const [githubCopilotToken, geminiCliToken, antigravityToken, openaiCodexToken] = oauthTokens;
const LOREM_IPSUM = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. `;
function generateOverflowContent(contextWindow: number): string {
const targetTokens = contextWindow + 10000;
const targetChars = targetTokens * 4 * 1.5;
const repetitions = Math.ceil(targetChars / LOREM_IPSUM.length);
return LOREM_IPSUM.repeat(repetitions);
}
interface OverflowResult {
provider: string;
model: string;
contextWindow: number;
stopReason: string;
errorMessage: string | undefined;
usage: Usage;
hasUsageData: boolean;
response: AssistantMessage;
}
async function testContextOverflow(model: Model, apiKey: string): Promise<OverflowResult> {
const overflowContent = generateOverflowContent(model.contextWindow);
const context: Context = {
systemPrompt: ["You are a helpful assistant."],
messages: [
{
role: "user",
content: overflowContent,
timestamp: Date.now(),
},
],
};
const response = await complete(model, context, { apiKey });
const hasUsageData = response.usage.input > 0 || response.usage.cacheRead > 0;
return {
provider: model.provider,
model: model.id,
contextWindow: model.contextWindow,
stopReason: response.stopReason,
errorMessage: response.errorMessage,
usage: response.usage,
hasUsageData,
response,
};
}
function logResult(result: OverflowResult) {
console.log(`\n${result.provider} / ${result.model}:`);
console.log(` contextWindow: ${result.contextWindow}`);
console.log(` stopReason: ${result.stopReason}`);
console.log(` errorMessage: ${result.errorMessage}`);
console.log(` usage: ${JSON.stringify(result.usage)}`);
console.log(` hasUsageData: ${result.hasUsageData}`);
}
describe("Context overflow error handling", () => {
describe.skipIf(!e2eApiKey("ANTHROPIC_API_KEY"))("Anthropic (API Key)", () => {
it("claude-haiku-4-5 - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("anthropic", "claude-haiku-4-5-20251001");
const result = await testContextOverflow(model, Bun.env.ANTHROPIC_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/prompt is too long/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe.skipIf(!e2eApiKey("ANTHROPIC_API_KEY"))("Anthropic (OAuth)", () => {
it("claude-sonnet-4 - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("anthropic", "claude-sonnet-4-20250514");
const result = await testContextOverflow(model, e2eApiKey("ANTHROPIC_API_KEY")!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/prompt is too long/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe("GitHub Copilot (OAuth)", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should detect overflow via isContextOverflow",
async () => {
const model = getBundledModel("github-copilot", "gpt-4o");
const result = await testContextOverflow(model, githubCopilotToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/exceeds the limit of \d+/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should detect overflow via isContextOverflow",
async () => {
const model = getBundledModel("github-copilot", "claude-sonnet-4");
const result = await testContextOverflow(model, githubCopilotToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/exceeds the limit of \d+/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
});
describe.skipIf(!e2eApiKey("OPENAI_API_KEY"))("OpenAI Completions", () => {
it("gpt-4o-mini - should detect overflow via isContextOverflow", async () => {
const model = { ...getBundledModel("openai", "gpt-4o-mini"), api: "openai-completions" as const };
const result = await testContextOverflow(model, Bun.env.OPENAI_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/maximum context length/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe.skipIf(!e2eApiKey("OPENAI_API_KEY"))("OpenAI Responses", () => {
it("gpt-4o - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("openai", "gpt-4o");
const result = await testContextOverflow(model, Bun.env.OPENAI_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/exceeds the context window/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe.skipIf(!e2eApiKey("GEMINI_API_KEY"))("Google", () => {
it("gemini-2.0-flash - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("google", "gemini-2.0-flash");
const result = await testContextOverflow(model, Bun.env.GEMINI_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/input token count.*exceeds the maximum/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe("Google Gemini CLI (OAuth)", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should detect overflow via isContextOverflow",
async () => {
const model = getBundledModel("google-gemini-cli", "gemini-2.5-flash");
const result = await testContextOverflow(model, geminiCliToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/input token count.*exceeds the maximum/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
});
describe("Google Antigravity (OAuth)", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should detect overflow via isContextOverflow",
async () => {
const model = getBundledModel("google-antigravity", "gemini-3-flash");
const result = await testContextOverflow(model, antigravityToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/input token count.*exceeds the maximum/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should detect overflow via isContextOverflow",
async () => {
const model = getBundledModel("google-antigravity", "claude-sonnet-4-5");
const result = await testContextOverflow(model, antigravityToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/prompt is too long/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
});
describe("OpenAI Codex (OAuth)", () => {
it.skipIf(!openaiCodexToken)(
"gpt-5.2-codex - should detect overflow via isContextOverflow",
async () => {
const model = getBundledModel("openai-codex", "gpt-5.2-codex");
const result = await testContextOverflow(model, openaiCodexToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
});
describe.skipIf(!e2eApiKey("XAI_API_KEY"))("xAI", () => {
it("grok-3-fast - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("xai", "grok-3-fast");
const result = await testContextOverflow(model, Bun.env.XAI_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/maximum prompt length is \d+/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe.skipIf(!e2eApiKey("GROQ_API_KEY"))("Groq", () => {
it("llama-3.3-70b-versatile - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("groq", "llama-3.3-70b-versatile");
const result = await testContextOverflow(model, Bun.env.GROQ_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/reduce the length of the messages/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe.skipIf(!e2eApiKey("CEREBRAS_API_KEY"))("Cerebras", () => {
it("qwen-3-235b - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("cerebras", "qwen-3-235b-a22b-instruct-2507");
const result = await testContextOverflow(model, Bun.env.CEREBRAS_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/4(00|13|29).*\(no body\)/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe.skipIf(!e2eApiKey("ZAI_API_KEY"))("z.ai", () => {
it("glm-4.5-flash - should detect overflow via isContextOverflow (silent overflow or rate limit)", async () => {
const model = getBundledModel("zai", "glm-4.5-flash");
const result = await testContextOverflow(model, Bun.env.ZAI_API_KEY!);
logResult(result);
if (result.stopReason === "stop") {
expect(result.hasUsageData).toBe(true);
expect(result.usage.input).toBeGreaterThan(model.contextWindow);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
} else {
console.log(" z.ai returned error (possibly rate limited), skipping overflow detection");
}
}, 120000);
});
describe.skipIf(!e2eApiKey("MISTRAL_API_KEY"))("Mistral", () => {
it("devstral-medium-latest - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("mistral", "devstral-medium-latest");
const result = await testContextOverflow(model, Bun.env.MISTRAL_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
describe.skipIf(!e2eApiKey("OPENROUTER_API_KEY"))("OpenRouter", () => {
it("anthropic/claude-sonnet-4 via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("openrouter", "anthropic/claude-sonnet-4");
const result = await testContextOverflow(model, Bun.env.OPENROUTER_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
it("deepseek/deepseek-v3.2 via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("openrouter", "deepseek/deepseek-v3.2");
const result = await testContextOverflow(model, Bun.env.OPENROUTER_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
it("mistralai/mistral-large-2512 via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("openrouter", "mistralai/mistral-large-2512");
const result = await testContextOverflow(model, Bun.env.OPENROUTER_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
it("google/gemini-2.5-flash via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("openrouter", "google/gemini-2.5-flash");
const result = await testContextOverflow(model, Bun.env.OPENROUTER_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
it("meta-llama/llama-4-maverick via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getBundledModel("openrouter", "meta-llama/llama-4-maverick");
const result = await testContextOverflow(model, Bun.env.OPENROUTER_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
const ollamaInstalled = !!Bun.env.PI_LOCAL_LLM && !!$which("ollama");
describe.skipIf(!ollamaInstalled)("Ollama (local)", () => {
let ollamaProcess: ChildProcess | null = null;
let model: Model<"openai-completions"> | undefined;
beforeAll(async () => {
if (!ollamaInstalled) return;
try {
execSync("ollama list | grep -q 'gpt-oss:20b'", { stdio: "ignore" });
} catch {
console.log("Pulling gpt-oss:20b model for Ollama overflow tests...");
try {
execSync("ollama pull gpt-oss:20b", { stdio: "inherit" });
} catch {
console.warn("Failed to pull gpt-oss:20b model, tests will be skipped");
return;
}
}
ollamaProcess = spawn("ollama", ["serve"], {
stdio: "ignore",
});
await new Promise<void>(resolve => {
const checkServer = async () => {
try {
const response = await fetch("http://localhost:11434/api/tags");
if (response.ok) {
resolve();
} else {
setTimeout(checkServer, 500);
}
} catch {
setTimeout(checkServer, 500);
}
};
setTimeout(checkServer, 1000);
});
model = {
id: "gpt-oss:20b",
api: "openai-completions",
provider: "ollama",
baseUrl: "http://localhost:11434/v1",
reasoning: true,
input: ["text"],
contextWindow: 128000,
maxTokens: 16000,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
name: "Ollama GPT-OSS 20B",
};
}, 60000);
afterAll(() => {
if (ollamaProcess) {
ollamaProcess.kill("SIGTERM");
ollamaProcess = null;
}
});
it("gpt-oss:20b - should detect overflow via isContextOverflow (ollama silently truncates)", async () => {
if (!model) return;
const result = await testContextOverflow(model, "ollama");
logResult(result);
if (result.stopReason === "stop" && result.hasUsageData) {
console.log(" Ollama silently truncated input to", result.usage.input, "tokens");
} else if (result.stopReason === "error") {
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}
}, 300000);
});
let lmStudioRunning = false;
if (!Bun.env.PI_NO_LOCAL_LLM) {
try {
execSync("curl -s --max-time 1 http://localhost:1234/v1/models > /dev/null", { stdio: "ignore" });
lmStudioRunning = true;
} catch {
lmStudioRunning = false;
}
}
describe.skipIf(!lmStudioRunning)("LM Studio (local)", () => {
it("should detect overflow via isContextOverflow", async () => {
const model: Model<"openai-completions"> = {
id: "local-model",
api: "openai-completions",
provider: "lm-studio",
baseUrl: "http://localhost:1234/v1",
reasoning: false,
input: ["text"],
contextWindow: 8192,
maxTokens: 2048,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
name: "LM Studio Local Model",
};
const result = await testContextOverflow(model, "lm-studio");
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
let llamaCppRunning = false;
try {
execSync("curl -s --max-time 1 http://localhost:8081/health > /dev/null", { stdio: "ignore" });
llamaCppRunning = true;
} catch {
llamaCppRunning = false;
}
describe.skipIf(!llamaCppRunning)("llama.cpp (local)", () => {
it("should detect overflow via isContextOverflow", async () => {
const model: Model<"openai-completions"> = {
id: "local-model",
api: "openai-completions",
provider: "llama.cpp",
baseUrl: "http://localhost:8081/v1",
reasoning: false,
input: ["text"],
contextWindow: 4096,
maxTokens: 2048,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
name: "llama.cpp Local Model",
};
const result = await testContextOverflow(model, "llama.cpp");
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
});