from langchain_core.messages import MessageLikeRepresentation, AIMessage
def is_token_limit_exceeded(exception: Exception, model_name: str = None) -> bool:
"""Determine if an exception indicates a token/context limit was exceeded.
Args:
exception: The exception to analyze
model_name: Optional model name to optimize provider detection
Returns:
True if the exception indicates a token limit was exceeded, False otherwise
"""
error_str = str(exception).lower()
provider = None
if model_name:
model_str = str(model_name).lower()
if model_str.startswith('openai:'):
provider = 'openai'
elif model_str.startswith('anthropic:'):
provider = 'anthropic'
elif model_str.startswith('gemini:') or model_str.startswith('google:'):
provider = 'gemini'
if provider == 'openai':
return _check_openai_token_limit(exception, error_str)
elif provider == 'anthropic':
return _check_anthropic_token_limit(exception, error_str)
elif provider == 'gemini':
return _check_gemini_token_limit(exception, error_str)
return (
_check_openai_token_limit(exception, error_str) or
_check_anthropic_token_limit(exception, error_str) or
_check_gemini_token_limit(exception, error_str)
)
def _check_openai_token_limit(exception: Exception, error_str: str) -> bool:
"""Check if exception indicates OpenAI token limit exceeded."""
exception_type = str(type(exception))
class_name = exception.__class__.__name__
module_name = getattr(exception.__class__, '__module__', '')
is_openai_exception = (
'openai' in exception_type.lower() or
'openai' in module_name.lower()
)
is_request_error = class_name in ['BadRequestError', 'InvalidRequestError']
if is_openai_exception and is_request_error:
token_keywords = ['token', 'context', 'length', 'maximum context', 'reduce']
if any(keyword in error_str for keyword in token_keywords):
return True
if hasattr(exception, 'code') and hasattr(exception, 'type'):
error_code = getattr(exception, 'code', '')
error_type = getattr(exception, 'type', '')
if (error_code == 'context_length_exceeded' or
error_type == 'invalid_request_error'):
return True
return False
def _check_anthropic_token_limit(exception: Exception, error_str: str) -> bool:
"""Check if exception indicates Anthropic token limit exceeded."""
exception_type = str(type(exception))
class_name = exception.__class__.__name__
module_name = getattr(exception.__class__, '__module__', '')
is_anthropic_exception = (
'anthropic' in exception_type.lower() or
'anthropic' in module_name.lower()
)
is_bad_request = class_name == 'BadRequestError'
if is_anthropic_exception and is_bad_request:
if 'prompt is too long' in error_str:
return True
return False
def _check_gemini_token_limit(exception: Exception, error_str: str) -> bool:
"""Check if exception indicates Google/Gemini token limit exceeded."""
exception_type = str(type(exception))
class_name = exception.__class__.__name__
module_name = getattr(exception.__class__, '__module__', '')
is_google_exception = (
'google' in exception_type.lower() or
'google' in module_name.lower()
)
is_resource_exhausted = class_name in [
'ResourceExhausted',
'GoogleGenerativeAIFetchError'
]
if is_google_exception and is_resource_exhausted:
return True
if 'google.api_core.exceptions.resourceexhausted' in exception_type.lower():
return True
return False
MODEL_TOKEN_LIMITS = {
"openai:gpt-4.1-mini": 1047576,
"openai:gpt-4.1-nano": 1047576,
"openai:gpt-4.1": 1047576,
"openai:gpt-4o-mini": 128000,
"openai:gpt-4o": 128000,
"openai:o4-mini": 200000,
"openai:o3-mini": 200000,
"openai:o3": 200000,
"openai:o3-pro": 200000,
"openai:o1": 200000,
"openai:o1-pro": 200000,
"anthropic:claude-opus-4": 200000,
"anthropic:claude-sonnet-4": 200000,
"anthropic:claude-3-7-sonnet": 200000,
"anthropic:claude-3-5-sonnet": 200000,
"anthropic:claude-3-5-haiku": 200000,
"google:gemini-1.5-pro": 2097152,
"google:gemini-1.5-flash": 1048576,
"google:gemini-pro": 32768,
"cohere:command-r-plus": 128000,
"cohere:command-r": 128000,
"cohere:command-light": 4096,
"cohere:command": 4096,
"mistral:mistral-large": 32768,
"mistral:mistral-medium": 32768,
"mistral:mistral-small": 32768,
"mistral:mistral-7b-instruct": 32768,
"ollama:codellama": 16384,
"ollama:llama2:70b": 4096,
"ollama:llama2:13b": 4096,
"ollama:llama2": 4096,
"ollama:mistral": 32768,
"bedrock:us.amazon.nova-premier-v1:0": 1000000,
"bedrock:us.amazon.nova-pro-v1:0": 300000,
"bedrock:us.amazon.nova-lite-v1:0": 300000,
"bedrock:us.amazon.nova-micro-v1:0": 128000,
"bedrock:us.anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
"bedrock:us.anthropic.claude-sonnet-4-20250514-v1:0": 200000,
"bedrock:us.anthropic.claude-opus-4-20250514-v1:0": 200000,
"anthropic.claude-opus-4-1-20250805-v1:0": 200000,
}
def get_model_token_limit(model_string):
"""Look up the token limit for a specific model.
Args:
model_string: The model identifier string to look up
Returns:
Token limit as integer if found, None if model not in lookup table
"""
for model_key, token_limit in MODEL_TOKEN_LIMITS.items():
if model_key in model_string:
return token_limit
return None
def remove_up_to_last_ai_message(messages: list[MessageLikeRepresentation]) -> list[MessageLikeRepresentation]:
"""Truncate message history by removing up to the last AI message.
This is useful for handling token limit exceeded errors by removing recent context.
Args:
messages: List of message objects to truncate
Returns:
Truncated message list up to (but not including) the last AI message
"""
for i in range(len(messages) - 1, -1, -1):
if isinstance(messages[i], AIMessage):
return messages[:i]
return messages