"""
Gateway runner - entry point for messaging platform integrations.
This module provides:
- start_gateway(): Start all configured platform adapters
- GatewayRunner: Main class managing the gateway lifecycle
Usage:
# Start the gateway
python -m gateway.run
# Or from CLI
python cli.py --gateway
"""
try:
import hermes_bootstrap
except ModuleNotFoundError:
pass
import asyncio
import dataclasses
import inspect
import json
import logging
import os
import re
import shlex
import sys
import signal
import tempfile
import threading
import time
import sqlite3
from collections import OrderedDict
from contextvars import copy_context
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List, Union
from agent.account_usage import fetch_account_usage, render_account_usage_lines
from agent.async_utils import safe_schedule_threadsafe
from agent.i18n import t
from hermes_cli.config import cfg_get
_AGENT_CACHE_MAX_SIZE = 128
_AGENT_CACHE_IDLE_TTL_SECS = 3600.0
_PLATFORM_CONNECT_TIMEOUT_SECS_DEFAULT = 30.0
_ADAPTER_DISCONNECT_TIMEOUT_SECS_DEFAULT = 5.0
_TELEGRAM_COMMAND_MENTION_RE = re.compile(r"(?<![\w:/])/([A-Za-z0-9][A-Za-z0-9_-]*)")
_TELEGRAM_NOISY_STATUS_RE = re.compile(
r"("
r"auxiliary\s+.+\s+failed"
r"|compression\s+summary\s+failed"
r"|fallback\s+context\s+marker"
r"|configured\s+compression\s+model\s+.+\s+failed"
r"|no\s+auxiliary\s+llm\s+provider\s+configured"
r"|auto-lowered\s+compression\s+threshold"
r"|preflight\s+compression"
r"|rate\s+limited\.\s+waiting\s+\d"
r"|retrying\s+in\s+\d"
r"|max\s+retries\s+\(\d+\).*(?:trying\s+fallback|exhausted|invalid\s+responses)"
r"|stream\s+(?:drop|drop\s+mid\s+tool-call).+retry\s+\d"
r"|stale\s+connections\s+from\s+a\s+previous\s+provider\s+issue"
r")",
re.IGNORECASE | re.DOTALL,
)
_GATEWAY_PROVIDER_ERROR_RE = re.compile(
r"("
r"api\s+(?:call\s+)?failed"
r"|provider\s+authentication\s+failed"
r"|non-retryable\s+error"
r"|rate\s+limited\s+after\s+\d+\s+retries"
r"|error\s+code\s*:"
r"|\bhttp\s*\d{3}\b"
r"|incorrect\s+api\s+key"
r"|invalid\s+api\s+key"
r")",
re.IGNORECASE,
)
_GATEWAY_PROVIDER_POLICY_RE = re.compile(
r"("
r"cybersecurity\s+risk"
r"|security\s+policy"
r"|safety\s+policy"
r"|policy\s+violation"
r"|violat(?:e|es|ed|ion)"
r"|blocked\s+(?:because|by|under)"
r"|request\s+(?:was\s+)?(?:blocked|rejected)"
r"|disallowed"
r"|moderation"
r")",
re.IGNORECASE,
)
_GATEWAY_AUTH_ERROR_RE = re.compile(
r"(provider\s+authentication\s+failed|incorrect\s+api\s+key|invalid\s+api\s+key|\b401\b)",
re.IGNORECASE,
)
_GATEWAY_RATE_LIMIT_RE = re.compile(
r"(rate\s+limit|rate-limited|\b429\b|quota|usage\s+limit)",
re.IGNORECASE,
)
_GATEWAY_SECRET_PATTERNS = (
re.compile(r"\bsk-[A-Za-z0-9][A-Za-z0-9_\-]{12,}\b"),
re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b"),
re.compile(r"\bxox[baprs]-[A-Za-z0-9\-]{20,}\b"),
re.compile(r"\bhf_[A-Za-z0-9]{20,}\b"),
re.compile(r"\bglpat-[A-Za-z0-9_\-]{20,}\b"),
re.compile(r"(?i)\b(Bearer\s+)[A-Za-z0-9._\-]{20,}\b"),
)
def _gateway_platform_value(platform: Any) -> str:
"""Return a normalized gateway platform value for enums or raw strings."""
return str(getattr(platform, "value", platform) or "").strip().lower()
def _redact_gateway_user_facing_secrets(text: str) -> str:
"""Best-effort secret redaction before text can leave the gateway."""
redacted = str(text or "")
for pattern in _GATEWAY_SECRET_PATTERNS:
redacted = pattern.sub(lambda m: (m.group(1) if m.lastindex else "") + "[REDACTED]", redacted)
return redacted
def _gateway_provider_error_reply(text: str) -> str:
"""Map raw provider/API errors to a short user-safe Telegram reply."""
if _GATEWAY_AUTH_ERROR_RE.search(text):
return (
"⚠️ Provider authentication failed. Check the configured credentials; "
"raw provider details are in the gateway logs."
)
if _GATEWAY_PROVIDER_POLICY_RE.search(text):
return (
"⚠️ The model provider rejected the request. I kept the raw provider "
"error out of chat; check gateway logs for details or try rephrasing."
)
if _GATEWAY_RATE_LIMIT_RE.search(text):
return "⏱️ The model provider is rate-limiting requests. Please wait a moment and try again."
return (
"⚠️ The model provider failed after retries. I kept raw provider details "
"out of chat; check gateway logs for diagnostics."
)
_GATEWAY_PROVIDER_ERROR_SHAPE_RE = re.compile(
r"^\s*(\W*\s*)?("
r"api\s+(?:call\s+)?failed"
r"|provider\s+authentication\s+failed"
r"|non-retryable\s+error"
r"|rate\s+limited\s+after\s+\d+\s+retries"
r"|error\s+code\s*:"
r"|http\s*\d{3}\b"
r"|incorrect\s+api\s+key"
r"|invalid\s+api\s+key"
r")",
re.IGNORECASE,
)
def _looks_like_gateway_provider_error(text: str) -> bool:
"""True when text is infrastructure/provider failure, not normal content.
Two heuristics combined so the rewrite only fires on actual provider
error envelopes, not on assistant prose that happens to mention an
HTTP status code:
1. The text is short — real provider errors are 1–3 lines of envelope
text; assistant answers are usually longer.
2. AND the error marker appears at the start of the message (optionally
behind a punctuation/symbol prefix), not buried mid-paragraph in an
explanation like "HTTP 404 means 'not found' — ...".
"""
if not text:
return False
body = str(text).strip()
if len(body) > 400 or body.count("\n") > 4:
return False
return bool(_GATEWAY_PROVIDER_ERROR_SHAPE_RE.search(body))
def _sanitize_gateway_final_response(platform: Any, text: str) -> str:
"""Sanitize final gateway replies before sending them to high-noise chats.
Telegram is Bob's mobile inbox, so it should receive concise, safe provider
failure categories instead of raw HTTP bodies, request IDs, or policy text.
Other platforms keep the existing behaviour for now.
"""
if not text:
return text
if _gateway_platform_value(platform) != "telegram":
return text
redacted = _redact_gateway_user_facing_secrets(str(text))
if _looks_like_gateway_provider_error(redacted):
return _gateway_provider_error_reply(redacted)
return redacted
def _prepare_gateway_status_message(platform: Any, event_type: str, message: str) -> Optional[str]:
"""Filter/sanitize agent status callbacks before platform delivery."""
text = str(message or "").strip()
if not text:
return None
if _gateway_platform_value(platform) != "telegram":
return text
text = _redact_gateway_user_facing_secrets(text)
if _TELEGRAM_NOISY_STATUS_RE.search(text):
return None
if _looks_like_gateway_provider_error(text):
return _gateway_provider_error_reply(text)
return text
def _telegramize_command_mentions(text: str, platform: Any) -> str:
"""Rewrite slash-command mentions to Telegram-valid command names.
Telegram Bot API command names allow only lowercase letters, digits, and
underscores. Keep other platform renderings unchanged, but normalize
Telegram help text so command mentions remain clickable/valid there.
"""
platform_value = getattr(platform, "value", platform)
if platform_value != "telegram":
return text
from hermes_cli.commands import _sanitize_telegram_name
def _replace(match: re.Match[str]) -> str:
sanitized = _sanitize_telegram_name(match.group(1))
return f"/{sanitized}" if sanitized else match.group(0)
return _TELEGRAM_COMMAND_MENTION_RE.sub(_replace, text)
_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT = 60 * 60
def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
"""Best-effort conversion of stored gateway timestamps to epoch seconds.
Missing/unparseable timestamps return None so legacy transcripts keep the
historical auto-continue behaviour instead of being silently dropped.
Accepts: datetime, epoch seconds (int/float), epoch milliseconds (when
the magnitude exceeds year-2286), ISO-8601 strings (with or without a
trailing ``Z``), and numeric strings.
"""
if value is None:
return None
if isinstance(value, datetime):
return value.timestamp()
if isinstance(value, bool):
return None
if isinstance(value, (int, float)):
return float(value) / 1000.0 if float(value) > 10_000_000_000 else float(value)
if isinstance(value, str):
text = value.strip()
if not text:
return None
try:
numeric = float(text)
return numeric / 1000.0 if numeric > 10_000_000_000 else numeric
except ValueError:
pass
try:
return datetime.fromisoformat(text.replace("Z", "+00:00")).timestamp()
except ValueError:
return None
return None
def _auto_continue_freshness_window() -> float:
"""Return the configured auto-continue freshness window in seconds.
Reads ``HERMES_AUTO_CONTINUE_FRESHNESS`` (bridged from
``config.yaml`` ``agent.gateway_auto_continue_freshness`` at gateway
startup, same pattern as ``HERMES_AGENT_TIMEOUT``). Falls back to the
module default when unset or malformed. Non-positive values disable
the freshness gate (restores the pre-fix "always fresh" behaviour for
users who want to opt out).
"""
raw = os.environ.get("HERMES_AUTO_CONTINUE_FRESHNESS")
if raw is None or raw == "":
return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
try:
return float(raw)
except (TypeError, ValueError):
return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
def _float_env(name: str, default: float) -> float:
"""Read an env var as float, falling back to ``default`` on typos/empty.
A misconfigured env var (e.g. ``HERMES_AGENT_TIMEOUT=abc``) must not
crash the gateway or an agent turn. Unset/empty also falls back.
"""
raw = os.environ.get(name)
if raw is None or raw == "":
return float(default)
try:
return float(raw)
except (TypeError, ValueError):
return float(default)
def _is_fresh_gateway_interruption(
value: Any,
*,
now: Optional[float] = None,
window_secs: Optional[float] = None,
) -> bool:
"""Return True when an interruption marker is fresh enough to auto-continue.
Unknown timestamps are treated as fresh for backward compatibility with
legacy transcripts (pre-dating timestamp persistence) and with in-memory
test scaffolding that constructs history entries without timestamps.
A non-positive ``window_secs`` disables the gate (always fresh), which
restores the pre-fix behaviour for users who opt out via config.
"""
window = (
float(window_secs)
if window_secs is not None
else float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
)
if window <= 0:
return True
timestamp = _coerce_gateway_timestamp(value)
if timestamp is None:
return True
current = time.time() if now is None else now
return current - timestamp <= window
_ASSISTANT_REPLAY_FIELDS: tuple[str, ...] = (
"reasoning",
"reasoning_content",
"reasoning_details",
"codex_reasoning_items",
"codex_message_items",
"finish_reason",
)
def _build_replay_entry(role: str, content: Any, msg: Dict[str, Any]) -> Dict[str, Any]:
"""Build a replay entry for a non-tool-calling message, preserving the
assistant fields the agent's API builders rely on for multi-turn fidelity.
Lifted out of the inline ``run_sync`` closure so the field whitelist can
be unit-tested in isolation. Mirrors the ``_ASSISTANT_REPLAY_FIELDS``
contract above.
Empty values: most fields are dropped when falsy (matching the original
PR #2974 behaviour) since an empty list/string for those carries no
information. The exception is ``reasoning_content``: DeepSeek/Kimi
thinking-mode replay treats an empty string as a meaningful sentinel
that ``_copy_reasoning_content_for_api`` upgrades to a single space.
Dropping it here would make the gateway send no ``reasoning_content`` at
all on the next turn, which can cause HTTP 400 from strict thinking
providers.
"""
entry: Dict[str, Any] = {"role": role, "content": content}
if role == "assistant":
for _rkey in _ASSISTANT_REPLAY_FIELDS:
if _rkey not in msg:
continue
_rval = msg.get(_rkey)
if _rkey == "reasoning_content":
if _rval is None:
continue
elif not _rval:
continue
entry[_rkey] = _rval
return entry
def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
"""Return the ``timestamp`` of the last usable transcript row, if any.
Skips metadata-only rows (``session_meta``, system injections) that are
dropped before being handed to the agent. Returns ``None`` when no
usable row carries a timestamp — callers should treat that as "fresh"
for backward compatibility.
"""
if not history:
return None
for msg in reversed(history):
if not isinstance(msg, dict):
continue
role = msg.get("role")
if not role or role in {"session_meta", "system"}:
continue
ts = msg.get("timestamp")
if ts is not None:
return ts
return None
return None
def _ensure_ssl_certs() -> None:
"""Set SSL_CERT_FILE if the system doesn't expose CA certs to Python."""
if "SSL_CERT_FILE" in os.environ:
return
import ssl
paths = ssl.get_default_verify_paths()
for candidate in (paths.cafile, paths.openssl_cafile):
if candidate and os.path.exists(candidate):
os.environ["SSL_CERT_FILE"] = candidate
return
try:
import certifi
os.environ["SSL_CERT_FILE"] = certifi.where()
return
except ImportError:
pass
for candidate in (
"/etc/ssl/certs/ca-certificates.crt",
"/etc/pki/tls/certs/ca-bundle.crt",
"/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem",
"/etc/ssl/ca-bundle.pem",
"/etc/ssl/cert.pem",
"/etc/pki/tls/cert.pem",
"/usr/local/etc/openssl@1.1/cert.pem",
"/opt/homebrew/etc/openssl@1.1/cert.pem",
):
if os.path.exists(candidate):
os.environ["SSL_CERT_FILE"] = candidate
return
def _home_target_env_var(platform_name: str) -> str:
"""Return the configured home-target env var for a platform.
Consults built-in ``_HOME_TARGET_ENV_VARS`` first, then the plugin
registry via ``cron.scheduler._resolve_home_env_var``, then falls back
to ``<PLATFORM>_HOME_CHANNEL`` for unknown names.
"""
from cron.scheduler import _resolve_home_env_var
resolved = _resolve_home_env_var(platform_name)
if resolved:
return resolved
return f"{platform_name.upper()}_HOME_CHANNEL"
def _home_thread_env_var(platform_name: str) -> str:
"""Return the optional thread/topic env var for a platform home target."""
return f"{_home_target_env_var(platform_name)}_THREAD_ID"
def _restart_notification_pending() -> bool:
"""Return True when a /restart completion marker is waiting to be delivered."""
return (_hermes_home / ".restart_notify.json").exists()
os.environ["_HERMES_GATEWAY"] = "1"
_ensure_ssl_certs()
sys.path.insert(0, str(Path(__file__).parent.parent))
from hermes_constants import get_hermes_home
from utils import atomic_json_write, atomic_yaml_write, base_url_host_matches, is_truthy_value
_hermes_home = get_hermes_home()
from dotenv import load_dotenv
from hermes_cli.env_loader import load_hermes_dotenv
_env_path = _hermes_home / '.env'
load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
def _reload_runtime_env_preserving_config_authority() -> None:
"""Reload .env for fresh credentials without letting stale .env override config.
Gateway processes are long-lived, so per-turn code reloads ~/.hermes/.env to
pick up rotated API keys. config.yaml remains authoritative for agent budget
settings such as agent.max_turns; otherwise a stale HERMES_MAX_ITERATIONS in
.env can replace the startup bridge on later turns.
"""
load_hermes_dotenv(
hermes_home=_hermes_home,
project_env=Path(__file__).resolve().parents[1] / '.env',
)
config_path = _hermes_home / 'config.yaml'
if not config_path.exists():
return
try:
import yaml as _yaml
with open(config_path, encoding="utf-8") as f:
cfg = _yaml.safe_load(f) or {}
from hermes_cli.config import _expand_env_vars
cfg = _expand_env_vars(cfg)
except Exception:
return
agent_cfg = cfg.get("agent", {})
if isinstance(agent_cfg, dict) and "max_turns" in agent_cfg:
os.environ["HERMES_MAX_ITERATIONS"] = str(agent_cfg["max_turns"])
_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
_config_path = _hermes_home / 'config.yaml'
if _config_path.exists():
try:
import yaml as _yaml
with open(_config_path, encoding="utf-8") as _f:
_cfg = _yaml.safe_load(_f) or {}
from hermes_cli.config import _expand_env_vars
_cfg = _expand_env_vars(_cfg)
for _key, _val in _cfg.items():
if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
os.environ[_key] = str(_val)
_terminal_cfg = _cfg.get("terminal", {})
if _terminal_cfg and isinstance(_terminal_cfg, dict):
_terminal_env_map = {
"backend": "TERMINAL_ENV",
"cwd": "TERMINAL_CWD",
"timeout": "TERMINAL_TIMEOUT",
"lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
"docker_image": "TERMINAL_DOCKER_IMAGE",
"docker_forward_env": "TERMINAL_DOCKER_FORWARD_ENV",
"singularity_image": "TERMINAL_SINGULARITY_IMAGE",
"modal_image": "TERMINAL_MODAL_IMAGE",
"daytona_image": "TERMINAL_DAYTONA_IMAGE",
"vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
"ssh_host": "TERMINAL_SSH_HOST",
"ssh_user": "TERMINAL_SSH_USER",
"ssh_port": "TERMINAL_SSH_PORT",
"ssh_key": "TERMINAL_SSH_KEY",
"container_cpu": "TERMINAL_CONTAINER_CPU",
"container_memory": "TERMINAL_CONTAINER_MEMORY",
"container_disk": "TERMINAL_CONTAINER_DISK",
"container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
"docker_volumes": "TERMINAL_DOCKER_VOLUMES",
"docker_env": "TERMINAL_DOCKER_ENV",
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
}
for _cfg_key, _env_var in _terminal_env_map.items():
if _cfg_key in _terminal_cfg:
_val = _terminal_cfg[_cfg_key]
if _cfg_key == "cwd" and str(_val) in {".", "auto", "cwd"}:
continue
if _cfg_key == "cwd" and isinstance(_val, str):
_val = os.path.expanduser(_val)
if isinstance(_val, (list, dict)):
os.environ[_env_var] = json.dumps(_val)
else:
os.environ[_env_var] = str(_val)
_auxiliary_cfg = _cfg.get("auxiliary", {})
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
_aux_task_env = {
"vision": {
"provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
"approval": {
"provider": "AUXILIARY_APPROVAL_PROVIDER",
"model": "AUXILIARY_APPROVAL_MODEL",
"base_url": "AUXILIARY_APPROVAL_BASE_URL",
"api_key": "AUXILIARY_APPROVAL_API_KEY",
},
}
for _task_key, _env_map in _aux_task_env.items():
_task_cfg = _auxiliary_cfg.get(_task_key, {})
if not isinstance(_task_cfg, dict):
continue
_prov = str(_task_cfg.get("provider", "")).strip()
_model = str(_task_cfg.get("model", "")).strip()
_base_url = str(_task_cfg.get("base_url", "")).strip()
_api_key = str(_task_cfg.get("api_key", "")).strip()
if _prov and _prov != "auto":
os.environ[_env_map["provider"]] = _prov
if _model:
os.environ[_env_map["model"]] = _model
if _base_url:
os.environ[_env_map["base_url"]] = _base_url
if _api_key:
os.environ[_env_map["api_key"]] = _api_key
_agent_cfg = _cfg.get("agent", {})
if _agent_cfg and isinstance(_agent_cfg, dict):
if "max_turns" in _agent_cfg:
os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
if "gateway_timeout" in _agent_cfg:
os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
if "gateway_timeout_warning" in _agent_cfg:
os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
if "gateway_notify_interval" in _agent_cfg:
os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
if "restart_drain_timeout" in _agent_cfg:
os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
if "gateway_auto_continue_freshness" in _agent_cfg:
os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str(
_agent_cfg["gateway_auto_continue_freshness"]
)
_display_cfg = _cfg.get("display", {})
if _display_cfg and isinstance(_display_cfg, dict):
if "busy_input_mode" in _display_cfg:
os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
if "busy_ack_enabled" in _display_cfg:
os.environ["HERMES_GATEWAY_BUSY_ACK_ENABLED"] = str(_display_cfg["busy_ack_enabled"])
_tz_cfg = _cfg.get("timezone", "")
if _tz_cfg and isinstance(_tz_cfg, str):
os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
_security_cfg = _cfg.get("security", {})
if isinstance(_security_cfg, dict):
_redact = _security_cfg.get("redact_secrets")
if _redact is not None:
os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
except Exception as _bridge_err:
print(
f" Warning: config.yaml → env bridge failed: "
f"{type(_bridge_err).__name__}: {_bridge_err}",
file=sys.stderr,
)
print(
" Gateway will fall back to .env values, which may not match "
"your current config.yaml. Run `hermes doctor` to investigate.",
file=sys.stderr,
)
try:
from hermes_constants import apply_ipv4_preference
_network_cfg = (_cfg if '_cfg' in dir() else {}).get("network", {})
if isinstance(_network_cfg, dict) and _network_cfg.get("force_ipv4"):
apply_ipv4_preference(force=True)
except Exception as _bootstrap_exc:
print(f" Warning: IPv4 preference application failed: {_bootstrap_exc}", file=sys.stderr)
try:
from hermes_cli.config import print_config_warnings
print_config_warnings()
except Exception as _bootstrap_exc:
print(f" Warning: config validation failed: {_bootstrap_exc}", file=sys.stderr)
try:
from hermes_cli.config import warn_deprecated_cwd_env_vars
warn_deprecated_cwd_env_vars()
except Exception as _bootstrap_exc:
print(f" Warning: deprecation check failed: {_bootstrap_exc}", file=sys.stderr)
os.environ["HERMES_QUIET"] = "1"
os.environ["HERMES_EXEC_ASK"] = "1"
_configured_cwd = os.environ.get("TERMINAL_CWD", "")
if not _configured_cwd or _configured_cwd in {".", "auto", "cwd"}:
_fallback = os.getenv("MESSAGING_CWD") or str(Path.home())
os.environ["TERMINAL_CWD"] = _fallback
from gateway.config import (
Platform,
_BUILTIN_PLATFORM_VALUES,
GatewayConfig,
HomeChannel,
PlatformConfig,
load_gateway_config,
)
from gateway.session import (
SessionStore,
SessionSource,
SessionContext,
build_session_context,
build_session_context_prompt,
build_session_key,
is_shared_multi_user_session,
)
from gateway.delivery import DeliveryRouter
from gateway.platforms.base import (
BasePlatformAdapter,
EphemeralReply,
MessageEvent,
MessageType,
_reply_anchor_for_event,
merge_pending_message_event,
)
from gateway.restart import (
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
GATEWAY_SERVICE_RESTART_EXIT_CODE,
parse_restart_drain_timeout,
)
from gateway.whatsapp_identity import (
canonical_whatsapp_identifier as _canonical_whatsapp_identifier,
expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
)
logger = logging.getLogger(__name__)
_AGENT_PENDING_SENTINEL = object()
def _resolve_runtime_agent_kwargs() -> dict:
"""Resolve provider credentials for gateway-created AIAgent instances.
If the primary provider fails with an authentication error, attempt to
resolve credentials using the fallback provider chain from config.yaml
before giving up.
"""
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
)
from hermes_cli.auth import AuthError
try:
runtime = resolve_runtime_provider(
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
)
except AuthError as auth_exc:
logger.warning("Primary provider auth failed: %s — trying fallback", auth_exc)
fb_config = _try_resolve_fallback_provider()
if fb_config is not None:
return fb_config
raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
except Exception as exc:
raise RuntimeError(format_runtime_provider_error(exc)) from exc
return {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
}
def _try_resolve_fallback_provider() -> dict | None:
"""Attempt to resolve credentials from the fallback_model/fallback_providers config."""
from hermes_cli.runtime_provider import resolve_runtime_provider
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if not cfg_path.exists():
return None
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
fb = cfg.get("fallback_providers") or cfg.get("fallback_model")
if not fb:
return None
fb_list = fb if isinstance(fb, list) else [fb]
for entry in fb_list:
if not isinstance(entry, dict):
continue
try:
runtime = resolve_runtime_provider(
requested=entry.get("provider"),
explicit_base_url=entry.get("base_url"),
explicit_api_key=entry.get("api_key"),
)
logger.info(
"Fallback provider resolved: %s model=%s",
runtime.get("provider"),
entry.get("model"),
)
return {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
"model": entry.get("model"),
}
except Exception as fb_exc:
logger.debug("Fallback entry %s failed: %s", entry.get("provider"), fb_exc)
continue
except Exception:
pass
return None
def _build_media_placeholder(event) -> str:
"""Build a text placeholder for media-only events so they aren't dropped.
When a photo/document is queued during active processing and later
dequeued, only .text is extracted. If the event has no caption,
the media would be silently lost. This builds a placeholder that
the vision enrichment pipeline will replace with a real description.
"""
parts = []
media_urls = getattr(event, "media_urls", None) or []
media_types = getattr(event, "media_types", None) or []
for i, url in enumerate(media_urls):
mtype = media_types[i] if i < len(media_types) else ""
if mtype.startswith("image/") or getattr(event, "message_type", None) == MessageType.PHOTO:
parts.append(f"[User sent an image: {url}]")
elif mtype.startswith("audio/"):
parts.append(f"[User sent audio: {url}]")
else:
parts.append(f"[User sent a file: {url}]")
return "\n".join(parts)
def _format_duration(seconds: float) -> str:
total = int(round(seconds))
if total < 0:
total = 0
hours, rem = divmod(total, 3600)
minutes, secs = divmod(rem, 60)
if hours:
return f"{hours}:{minutes:02d}:{secs:02d}"
return f"{minutes}:{secs:02d}"
async def _probe_audio_duration(path: str) -> Optional[str]:
"""Best-effort duration probe. Returns formatted MM:SS / HH:MM:SS, or None on failure."""
ext = os.path.splitext(path)[1].lower()
if ext == ".wav":
try:
def _wav_duration() -> float:
import wave
with wave.open(path, "rb") as wf:
frames = wf.getnframes()
rate = wf.getframerate() or 1
return frames / float(rate)
secs = await asyncio.to_thread(_wav_duration)
return _format_duration(secs)
except Exception:
pass
if ext in (".ogg", ".opus", ".oga"):
try:
def _ogg_duration() -> float:
from mutagen.oggopus import OggOpus
return float(OggOpus(path).info.length)
secs = await asyncio.to_thread(_ogg_duration)
return _format_duration(secs)
except Exception:
pass
try:
proc = await asyncio.create_subprocess_exec(
"ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", path,
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
)
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5.0)
if proc.returncode == 0:
return _format_duration(float(stdout.decode().strip()))
except Exception:
pass
return None
def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
"""Consume and return the full pending event for a session.
Queued follow-ups must preserve their media metadata so they can re-enter
the normal image/STT/document preprocessing path instead of being reduced
to a placeholder string.
"""
return adapter.get_pending_message(session_key)
_INTERRUPT_REASON_STOP = "Stop requested"
_INTERRUPT_REASON_RESET = "Session reset requested"
_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)"
_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected"
_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down"
_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting"
_CONTROL_INTERRUPT_MESSAGES = frozenset(
{
_INTERRUPT_REASON_STOP.lower(),
_INTERRUPT_REASON_RESET.lower(),
_INTERRUPT_REASON_TIMEOUT.lower(),
_INTERRUPT_REASON_SSE_DISCONNECT.lower(),
_INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(),
_INTERRUPT_REASON_GATEWAY_RESTART.lower(),
}
)
def _is_control_interrupt_message(message: Optional[str]) -> bool:
"""Return True when an interrupt message is internal control flow."""
if not message:
return False
normalized = " ".join(str(message).strip().split()).lower()
return normalized in _CONTROL_INTERRUPT_MESSAGES
def _skill_slug_from_frontmatter(skill_md: Path) -> tuple[str | None, str | None]:
"""Derive the /command slug and declared frontmatter name from a SKILL.md.
Matches the exact normalization used by
:func:`agent.skill_commands.scan_skill_commands` so the slug here is the
same string a user types after the leading ``/`` (e.g. a skill with
frontmatter ``name: Stable Diffusion Image Generation`` resolves to
``stable-diffusion-image-generation`` — NOT the parent directory name,
which is commonly shorter/different, e.g. ``stable-diffusion``).
Using the directory name silently broke :func:`_check_unavailable_skill`
for every skill whose directory name drifted from its frontmatter name
(19 such skills on a standard install as of 2026-05), causing a generic
"unknown command" response where a "disabled — enable with …" or
"not installed — install with …" hint was expected.
Returns ``(slug, declared_name)`` or ``(None, None)`` when the file
can't be read or lacks a ``name:`` in its frontmatter.
"""
try:
content = skill_md.read_text(encoding="utf-8", errors="replace")
except Exception:
return None, None
if not content.startswith("---"):
return None, None
end = content.find("\n---", 3)
if end < 0:
return None, None
declared_name: str | None = None
for line in content[3:end].splitlines():
line = line.strip()
if line.startswith("name:"):
raw = line.split(":", 1)[1].strip()
if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in {'"', "'"}:
raw = raw[1:-1]
declared_name = raw.strip()
break
if not declared_name:
return None, None
slug = declared_name.lower().replace(" ", "-").replace("_", "-")
import re as _re
slug = _re.sub(r"[^a-z0-9-]", "", slug)
slug = _re.sub(r"-{2,}", "-", slug).strip("-")
if not slug:
return None, declared_name
return slug, declared_name
def _check_unavailable_skill(command_name: str) -> str | None:
"""Check if a command matches a known-but-inactive skill.
Returns a helpful message if the skill exists but is disabled or only
available as an optional install. Returns None if no match found.
The slug for each on-disk skill is derived from its frontmatter ``name:``
(via :func:`_skill_slug_from_frontmatter`), NOT from its containing
directory name — because the two can differ (e.g. directory
``stable-diffusion`` + frontmatter ``Stable Diffusion Image Generation``
yields slug ``stable-diffusion-image-generation``). Matching on
directory name would miss that slug entirely and fall through to the
generic "unknown command" path.
"""
normalized = command_name.lower().replace("_", "-")
try:
from tools.skills_tool import _get_disabled_skill_names
from agent.skill_utils import get_all_skills_dirs
disabled = _get_disabled_skill_names()
for skills_dir in get_all_skills_dirs():
if not skills_dir.exists():
continue
for skill_md in skills_dir.rglob("SKILL.md"):
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
continue
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
if not slug or not declared_name:
continue
if slug == normalized and declared_name in disabled:
return (
f"The **{command_name}** skill is installed but disabled.\n"
f"Enable it with: `hermes skills config`"
)
from hermes_constants import get_optional_skills_dir
repo_root = Path(__file__).resolve().parent.parent
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
if optional_dir.exists():
for skill_md in optional_dir.rglob("SKILL.md"):
slug, _declared = _skill_slug_from_frontmatter(skill_md)
if not slug:
continue
if slug == normalized:
rel = skill_md.parent.relative_to(optional_dir)
parts = list(rel.parts)
install_path = f"official/{'/'.join(parts)}"
return (
f"The **{command_name}** skill is available but not installed.\n"
f"Install it with: `hermes skills install {install_path}`"
)
except Exception:
pass
return None
def _platform_config_key(platform: "Platform") -> str:
"""Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value)."""
return "cli" if platform == Platform.LOCAL else platform.value
def _teams_pipeline_plugin_enabled() -> bool:
"""Return True when the standalone Teams pipeline plugin is enabled."""
config = _load_gateway_config()
enabled = cfg_get(config, "plugins", "enabled", default=[])
if not isinstance(enabled, list):
return False
return "teams_pipeline" in enabled or "teams-pipeline" in enabled
def _load_gateway_config() -> dict:
"""Load and parse ~/.hermes/config.yaml, returning {} on any error.
Uses the module-level ``_hermes_home`` (so tests that monkeypatch it
still see their fixture) and shares the mtime-keyed raw-yaml cache
from ``hermes_cli.config.read_raw_config`` when the paths match.
"""
config_path = _hermes_home / 'config.yaml'
try:
from hermes_cli.config import get_config_path, read_raw_config
if config_path == get_config_path():
return read_raw_config()
except Exception:
pass
try:
if config_path.exists():
import yaml
with open(config_path, 'r', encoding='utf-8') as f:
return yaml.safe_load(f) or {}
except Exception:
logger.debug("Could not load gateway config from %s", config_path)
return {}
def _resolve_gateway_model(config: dict | None = None) -> str:
"""Read model from config.yaml — single source of truth.
Without this, temporary AIAgent instances (e.g. /compress) fall
back to the hardcoded default which fails when the active provider is
openai-codex.
"""
cfg = config if config is not None else _load_gateway_config()
model_cfg = cfg.get("model", {})
if isinstance(model_cfg, str):
return model_cfg
elif isinstance(model_cfg, dict):
return model_cfg.get("default") or model_cfg.get("model") or ""
return ""
def _resolve_hermes_bin() -> Optional[list[str]]:
"""Resolve the Hermes update command as argv parts.
Tries in order:
1. ``shutil.which("hermes")`` — standard PATH lookup
2. ``sys.executable -m hermes_cli.main`` — fallback when Hermes is running
from a venv/module invocation and the ``hermes`` shim is not on PATH
Returns argv parts ready for quoting/joining, or ``None`` if neither works.
"""
import shutil
hermes_bin = shutil.which("hermes")
if hermes_bin:
return [hermes_bin]
try:
import importlib.util
if importlib.util.find_spec("hermes_cli") is not None:
return [sys.executable, "-m", "hermes_cli.main"]
except Exception:
pass
return None
def _parse_session_key(session_key: str) -> "dict | None":
"""Parse a session key into its component parts.
Session keys follow the format
``agent:main:{platform}:{chat_type}:{chat_id}[:{extra}...]``.
Returns a dict with ``platform``, ``chat_type``, ``chat_id``, and
optionally ``thread_id`` keys, or None if the key doesn't match.
The 6th element is only returned as ``thread_id`` for chat types where
it is unambiguous (``dm`` and ``thread``). For group/channel sessions
the suffix may be a user_id (per-user isolation) rather than a
thread_id, so we leave ``thread_id`` out to avoid mis-routing.
"""
parts = session_key.split(":")
if len(parts) >= 5 and parts[0] == "agent" and parts[1] == "main":
result = {
"platform": parts[2],
"chat_type": parts[3],
"chat_id": parts[4],
}
if len(parts) > 5 and parts[3] in {"dm", "thread"}:
result["thread_id"] = parts[5]
return result
return None
def _format_gateway_process_notification(evt: dict) -> "str | None":
"""Format a watch pattern event from completion_queue into a [IMPORTANT:] message."""
evt_type = evt.get("type", "completion")
_sid = evt.get("session_id", "unknown")
_cmd = evt.get("command", "unknown")
if evt_type == "watch_disabled":
return f"[IMPORTANT: {evt.get('message', '')}]"
if evt_type == "watch_match":
_pat = evt.get("pattern", "?")
_out = evt.get("output", "")
_sup = evt.get("suppressed", 0)
text = (
f"[IMPORTANT: Background process {_sid} matched "
f"watch pattern \"{_pat}\".\n"
f"Command: {_cmd}\n"
f"Matched output:\n{_out}"
)
if _sup:
text += f"\n({_sup} earlier matches were suppressed by rate limit)"
text += "]"
return text
return None
import weakref as _weakref
_gateway_runner_ref: _weakref.ref = lambda: None
def _normalize_empty_agent_response(
agent_result: dict,
response: str,
*,
history_len: int = 0,
) -> str:
"""Normalize empty/None agent responses into user-facing messages.
Consolidates the existing ``failed`` handler and adds a catch-all for
the case where the agent did work (api_calls > 0) but returned no text.
Fix for #18765.
"""
if response:
return response
if agent_result.get("failed"):
error_detail = agent_result.get("error", "unknown error")
error_str = str(error_detail).lower()
is_context_failure = any(
p in error_str
for p in ("context", "token", "too large", "too long", "exceed", "payload")
) or ("400" in error_str and history_len > 50)
if is_context_failure:
return (
"⚠️ Session too large for the model's context window.\n"
"Use /compact to compress the conversation, or "
"/reset to start fresh."
)
return (
f"The request failed: {str(error_detail)[:300]}\n"
"Try again or use /reset to start a fresh session."
)
api_calls = int(agent_result.get("api_calls", 0) or 0)
if api_calls > 0 and not agent_result.get("interrupted"):
if agent_result.get("partial"):
err = agent_result.get("error", "processing incomplete")
return f"⚠️ Processing stopped: {str(err)[:200]}. Try again."
return (
"⚠️ Processing completed but no response was generated. "
"This may be a transient error — try sending your message again."
)
return response
def _should_clear_resume_pending_after_turn(agent_result: dict) -> bool:
"""Return True only when a gateway turn really completed successfully.
Restart recovery uses ``resume_pending`` as a durable marker for sessions
interrupted during gateway drain. A soft interrupt can still bubble out as
a syntactically normal agent result with an empty final response; clearing
the marker in that case loses the recovery signal and startup auto-resume
has nothing to schedule.
"""
if not isinstance(agent_result, dict):
return False
if agent_result.get("interrupted"):
return False
if agent_result.get("failed") or agent_result.get("partial") or agent_result.get("error"):
return False
if agent_result.get("completed") is False:
return False
return True
def _preserve_queued_followup_history_offset(
current_result: dict,
followup_result: dict,
) -> dict:
"""Carry the outer history offset through queued follow-up drains.
``_process_message_background()`` persists transcript rows only once, after the
entire in-band queued-follow-up chain returns. Each recursive ``_run_agent()``
call advances ``history_offset`` to the history it received, so without
correction the outermost persistence step sees only the *last* queued turn as
"new" and silently drops earlier turns from the same drain chain.
Preserve the earliest (outermost) history offset so the final transcript slice
still includes every queued turn that ran during the chain.
"""
if not isinstance(followup_result, dict):
return followup_result
if not isinstance(current_result, dict):
return followup_result
current_offset = current_result.get("history_offset")
followup_offset = followup_result.get("history_offset")
if not isinstance(current_offset, int):
return followup_result
if isinstance(followup_offset, int) and followup_offset <= current_offset:
return followup_result
merged = dict(followup_result)
merged["history_offset"] = current_offset
return merged
class GatewayRunner:
"""
Main gateway controller.
Manages the lifecycle of all platform adapters and routes
messages to/from the agent.
"""
_running_agents_ts: Dict[str, float] = {}
_busy_input_mode: str = "interrupt"
_restart_drain_timeout: float = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
_exit_code: Optional[int] = None
_draining: bool = False
_restart_requested: bool = False
_restart_task_started: bool = False
_restart_detached: bool = False
_restart_via_service: bool = False
_stop_task: Optional[asyncio.Task] = None
_session_model_overrides: Dict[str, Dict[str, str]] = {}
_session_reasoning_overrides: Dict[str, Dict[str, Any]] = {}
def __init__(self, config: Optional[GatewayConfig] = None):
global _gateway_runner_ref
self.config = config or load_gateway_config()
self.adapters: Dict[Platform, BasePlatformAdapter] = {}
self._warn_if_docker_media_delivery_is_risky()
_gateway_runner_ref = _weakref.ref(self)
self._prefill_messages = self._load_prefill_messages()
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
self._reasoning_config = self._load_reasoning_config()
self._service_tier = self._load_service_tier()
self._show_reasoning = self._load_show_reasoning()
self._busy_input_mode = self._load_busy_input_mode()
self._restart_drain_timeout = self._load_restart_drain_timeout()
self._provider_routing = self._load_provider_routing()
self._fallback_model = self._load_fallback_model()
from tools.process_registry import process_registry
self.session_store = SessionStore(
self.config.sessions_dir, self.config,
has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
)
self.delivery_router = DeliveryRouter(self.config)
self._running = False
self._gateway_loop: Optional[asyncio.AbstractEventLoop] = None
self._shutdown_event = asyncio.Event()
self._exit_cleanly = False
self._exit_with_failure = False
self._exit_reason: Optional[str] = None
self._exit_code: Optional[int] = None
self._draining = False
self._restart_requested = False
self._restart_task_started = False
self._restart_detached = False
self._restart_via_service = False
self._stop_task: Optional[asyncio.Task] = None
self._running_agents: Dict[str, Any] = {}
self._running_agents_ts: Dict[str, float] = {}
self._pending_messages: Dict[str, str] = {}
self._queued_events: Dict[str, List[MessageEvent]] = {}
self._pending_native_image_paths_by_session: Dict[str, List[str]] = {}
self._busy_ack_ts: Dict[str, float] = {}
self._session_run_generation: Dict[str, int] = {}
self._session_sources: "OrderedDict[str, SessionSource]" = OrderedDict()
self._session_sources_max = 512
import threading as _threading
self._agent_cache: "OrderedDict[str, tuple]" = OrderedDict()
self._agent_cache_lock = _threading.Lock()
self._session_model_overrides: Dict[str, Dict[str, str]] = {}
self._session_reasoning_overrides: Dict[str, Dict[str, Any]] = {}
self._kanban_notifier_profile = self._active_profile_name()
self._teams_pipeline_runtime = None
self._teams_pipeline_runtime_error: Optional[str] = None
self._pending_approvals: Dict[str, Dict[str, Any]] = {}
self._failed_platforms: Dict[Platform, Dict[str, Any]] = {}
self._update_prompt_pending: Dict[str, bool] = {}
import itertools as _itertools
self._slash_confirm_counter = _itertools.count(1)
try:
from tools.tirith_security import ensure_installed
ensure_installed(log_failures=False)
except Exception:
pass
self._session_db = None
try:
from hermes_state import SessionDB
self._session_db = SessionDB()
except Exception as e:
logger.warning("SQLite session store not available: %s", e)
if self._session_db is not None:
try:
from hermes_cli.config import load_config as _load_full_config
_sess_cfg = (_load_full_config().get("sessions") or {})
if _sess_cfg.get("auto_prune", False):
self._session_db.maybe_auto_prune_and_vacuum(
retention_days=int(_sess_cfg.get("retention_days", 90)),
min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
sessions_dir=self.config.sessions_dir,
)
except Exception as exc:
logger.debug("state.db auto-maintenance skipped: %s", exc)
try:
from hermes_cli.config import load_config as _load_full_config
_ckpt_cfg = (_load_full_config().get("checkpoints") or {})
if _ckpt_cfg.get("auto_prune", False):
from tools.checkpoint_manager import maybe_auto_prune_checkpoints
maybe_auto_prune_checkpoints(
retention_days=int(_ckpt_cfg.get("retention_days", 7)),
min_interval_hours=int(_ckpt_cfg.get("min_interval_hours", 24)),
delete_orphans=bool(_ckpt_cfg.get("delete_orphans", True)),
max_total_size_mb=int(_ckpt_cfg.get("max_total_size_mb", 500)),
)
except Exception as exc:
logger.debug("checkpoint auto-maintenance skipped: %s", exc)
from gateway.pairing import PairingStore
self.pairing_store = PairingStore()
from gateway.hooks import HookRegistry
self.hooks = HookRegistry()
self._voice_mode: Dict[str, str] = self._load_voice_modes()
self._recent_voice_transcripts: Dict[tuple[int, int], List[tuple[float, str]]] = {}
self._background_tasks: set = set()
def _wire_teams_pipeline_runtime(self) -> None:
"""Bind the Teams meeting pipeline runtime to Graph webhook ingress.
No-op when the msgraph_webhook adapter isn't running or the
teams_pipeline plugin isn't enabled — lets the gateway start cleanly
whether or not the user has opted into the pipeline.
"""
if Platform.MSGRAPH_WEBHOOK not in self.adapters:
return
if not _teams_pipeline_plugin_enabled():
logger.debug("Teams pipeline plugin is disabled; skipping runtime wiring")
return
try:
from plugins.teams_pipeline.runtime import bind_gateway_runtime
except Exception as exc:
logger.warning("Teams pipeline runtime import failed: %s", exc)
return
try:
bound = bind_gateway_runtime(self)
except Exception as exc:
logger.warning("Teams pipeline runtime wiring failed: %s", exc)
return
if bound:
logger.info("Teams pipeline runtime bound to msgraph webhook ingress")
elif self._teams_pipeline_runtime_error:
logger.warning(
"Teams pipeline runtime unavailable: %s",
self._teams_pipeline_runtime_error,
)
def _warn_if_docker_media_delivery_is_risky(self) -> None:
"""Warn when Docker-backed gateways lack an explicit export mount.
MEDIA delivery happens in the gateway process, so paths emitted by the model
must be readable from the host. A plain container-local path like
`/workspace/report.txt` or `/output/report.txt` often exists only inside
Docker, so users commonly need a dedicated export mount such as
`host-dir:/output`.
"""
if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker":
return
connected = self.config.get_connected_platforms()
messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}]
if not messaging_platforms:
return
raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip()
volumes: List[str] = []
if raw_volumes:
try:
parsed = json.loads(raw_volumes)
if isinstance(parsed, list):
volumes = [str(v) for v in parsed if isinstance(v, str)]
except Exception:
logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True)
has_explicit_output_mount = False
for spec in volumes:
match = _DOCKER_VOLUME_SPEC_RE.match(spec)
if not match:
continue
container_path = match.group("container")
if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS:
has_explicit_output_mount = True
break
if has_explicit_output_mount:
return
logger.warning(
"Docker backend is enabled for the messaging gateway but no explicit host-visible "
"output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. "
"This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail "
"for container-local paths like '/workspace/...' or '/output/...'."
)
def _has_setup_skill(self) -> bool:
"""Check if the hermes-agent-setup skill is installed."""
try:
from tools.skill_manager_tool import _find_skill
return _find_skill("hermes-agent-setup") is not None
except Exception:
return False
_VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
def _voice_key(self, platform: Platform, chat_id: str) -> str:
"""Return a platform-namespaced key for voice mode state."""
return f"{platform.value}:{chat_id}"
def _load_voice_modes(self) -> Dict[str, str]:
try:
data = json.loads(self._VOICE_MODE_PATH.read_text())
except (FileNotFoundError, json.JSONDecodeError, OSError):
return {}
if not isinstance(data, dict):
return {}
valid_modes = {"off", "voice_only", "all"}
result = {}
for chat_id, mode in data.items():
if mode not in valid_modes:
continue
key = str(chat_id)
if ":" not in key:
logger.warning(
"Skipping legacy unprefixed voice mode key %r during migration. "
"Re-enable voice mode on that chat to rebuild the prefixed key.",
key,
)
continue
result[key] = mode
return result
def _save_voice_modes(self) -> None:
try:
self._VOICE_MODE_PATH.parent.mkdir(parents=True, exist_ok=True)
self._VOICE_MODE_PATH.write_text(
json.dumps(self._voice_mode, indent=2)
)
except OSError as e:
logger.warning("Failed to save voice modes: %s", e)
def _set_adapter_auto_tts_disabled(self, adapter, chat_id: str, disabled: bool) -> None:
"""Update an adapter's in-memory auto-TTS suppression set if present."""
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
if not isinstance(disabled_chats, set):
return
if disabled:
disabled_chats.add(chat_id)
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
if isinstance(enabled_chats, set):
enabled_chats.discard(chat_id)
else:
disabled_chats.discard(chat_id)
def _set_adapter_auto_tts_enabled(self, adapter, chat_id: str, enabled: bool) -> None:
"""Update an adapter's per-chat auto-TTS opt-in set if present.
Used for ``/voice on``/``/voice tts`` where the user explicitly wants
auto-TTS even when ``voice.auto_tts`` is False globally.
"""
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
if not isinstance(enabled_chats, set):
return
if enabled:
enabled_chats.add(chat_id)
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
if isinstance(disabled_chats, set):
disabled_chats.discard(chat_id)
else:
enabled_chats.discard(chat_id)
def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
"""Restore persisted /voice state into a live platform adapter.
Populates three fields from config + ``self._voice_mode``:
- ``_auto_tts_default``: global default from ``voice.auto_tts``
- ``_auto_tts_enabled_chats``: chats with mode ``voice_only``/``all``
- ``_auto_tts_disabled_chats``: chats with mode ``off``
"""
platform = getattr(adapter, "platform", None)
if not isinstance(platform, Platform):
return
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
if not isinstance(disabled_chats, set) and not isinstance(enabled_chats, set):
return
try:
from hermes_cli.config import load_config as _load_full_config
_full_cfg = _load_full_config()
_auto_tts_default = bool(
(_full_cfg.get("voice") or {}).get("auto_tts", False)
)
except Exception:
_auto_tts_default = False
if hasattr(adapter, "_auto_tts_default"):
adapter._auto_tts_default = _auto_tts_default
prefix = f"{platform.value}:"
if isinstance(disabled_chats, set):
disabled_chats.clear()
disabled_chats.update(
key[len(prefix):] for key, mode in self._voice_mode.items()
if mode == "off" and key.startswith(prefix)
)
if isinstance(enabled_chats, set):
enabled_chats.clear()
enabled_chats.update(
key[len(prefix):] for key, mode in self._voice_mode.items()
if mode in {"voice_only", "all"} and key.startswith(prefix)
)
async def _safe_adapter_disconnect(self, adapter, platform) -> None:
"""Call adapter.disconnect() defensively, swallowing any error.
Used when adapter.connect() failed or raised — the adapter may
have allocated partial resources (aiohttp.ClientSession, poll
tasks, child subprocesses) that would otherwise leak and surface
as "Unclosed client session" warnings at process exit.
Must tolerate partial-init state and never raise, since callers
use it inside error-handling blocks.
"""
timeout = self._adapter_disconnect_timeout_secs()
try:
if timeout <= 0:
await adapter.disconnect()
else:
await asyncio.wait_for(adapter.disconnect(), timeout=timeout)
except asyncio.TimeoutError:
logger.warning(
"Timed out after %.1fs while disconnecting %s adapter; continuing shutdown",
timeout,
platform.value if platform is not None else "adapter",
)
except Exception as e:
logger.debug(
"Defensive %s disconnect after failed connect raised: %s",
platform.value if platform is not None else "adapter",
e,
)
def _adapter_disconnect_timeout_secs(self) -> float:
"""Return the per-adapter disconnect timeout used during shutdown."""
raw = os.getenv("HERMES_GATEWAY_ADAPTER_DISCONNECT_TIMEOUT", "").strip()
if raw:
try:
timeout = float(raw)
except ValueError:
logger.warning(
"Ignoring invalid HERMES_GATEWAY_ADAPTER_DISCONNECT_TIMEOUT=%r",
raw,
)
else:
return max(0.0, timeout)
return _ADAPTER_DISCONNECT_TIMEOUT_SECS_DEFAULT
def _platform_connect_timeout_secs(self) -> float:
"""Return the per-platform connect timeout used during startup/retry."""
raw = os.getenv("HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT", "").strip()
if raw:
try:
timeout = float(raw)
except ValueError:
logger.warning(
"Ignoring invalid HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT=%r",
raw,
)
else:
return max(0.0, timeout)
return _PLATFORM_CONNECT_TIMEOUT_SECS_DEFAULT
async def _connect_adapter_with_timeout(self, adapter, platform) -> bool:
"""Connect an adapter without allowing one platform to block others."""
timeout = self._platform_connect_timeout_secs()
if timeout <= 0:
return await adapter.connect()
try:
return await asyncio.wait_for(adapter.connect(), timeout=timeout)
except asyncio.TimeoutError as exc:
raise TimeoutError(
f"{platform.value} connect timed out after {timeout:g}s"
) from exc
@property
def should_exit_cleanly(self) -> bool:
return self._exit_cleanly
@property
def should_exit_with_failure(self) -> bool:
return self._exit_with_failure
@property
def exit_reason(self) -> Optional[str]:
return self._exit_reason
@property
def exit_code(self) -> Optional[int]:
return self._exit_code
def _session_key_for_source(self, source: SessionSource) -> str:
"""Resolve the current session key for a source, honoring gateway config when available."""
if hasattr(self, "session_store") and self.session_store is not None:
try:
session_key = self.session_store._generate_session_key(source)
if isinstance(session_key, str) and session_key:
return session_key
except Exception:
pass
config = getattr(self, "config", None)
return build_session_key(
source,
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
)
def _telegram_topic_mode_enabled(self, source: SessionSource) -> bool:
"""Return whether Telegram DM topic mode is active for this chat."""
if source.platform != Platform.TELEGRAM or source.chat_type != "dm":
return False
session_db = getattr(self, "_session_db", None)
if session_db is None:
return False
try:
raw = session_db.is_telegram_topic_mode_enabled(
chat_id=str(source.chat_id),
user_id=str(source.user_id),
)
except Exception:
logger.debug("Failed to read Telegram topic mode state", exc_info=True)
return False
return raw is True
_TELEGRAM_GENERAL_TOPIC_IDS = frozenset({"", "1"})
def _is_telegram_topic_root_lobby(self, source: SessionSource) -> bool:
"""True for the main Telegram DM (or General topic) when topic mode has made it a lobby."""
if source.platform != Platform.TELEGRAM or source.chat_type != "dm":
return False
if not self._telegram_topic_mode_enabled(source):
return False
tid = str(source.thread_id or "")
return tid in self._TELEGRAM_GENERAL_TOPIC_IDS
def _is_telegram_topic_lane(self, source: SessionSource) -> bool:
"""True for a user-created Telegram private-chat topic lane."""
if source.platform != Platform.TELEGRAM or source.chat_type != "dm":
return False
if not self._telegram_topic_mode_enabled(source):
return False
tid = str(source.thread_id or "")
if not tid or tid in self._TELEGRAM_GENERAL_TOPIC_IDS:
return False
return True
_TELEGRAM_LOBBY_REMINDER_COOLDOWN_S = 30.0
def _should_send_telegram_lobby_reminder(self, source: SessionSource) -> bool:
"""Rate-limit root-DM lobby reminders to one message per cooldown window.
A user who forgets multi-session mode is enabled and types several
prompts in the root DM would otherwise get a reminder for every
message. Cap it so the first one lands and the rest stay quiet.
"""
if not hasattr(self, "_telegram_lobby_reminder_ts"):
self._telegram_lobby_reminder_ts = {}
chat_id = str(source.chat_id or "")
if not chat_id:
return True
import time as _time
now = _time.monotonic()
last = self._telegram_lobby_reminder_ts.get(chat_id, 0.0)
if now - last < self._TELEGRAM_LOBBY_REMINDER_COOLDOWN_S:
return False
self._telegram_lobby_reminder_ts[chat_id] = now
return True
def _telegram_topic_root_lobby_message(self) -> str:
return (
"This main chat is reserved for system commands.\n\n"
"To start a new Hermes chat, open the All Messages topic at the top "
"of this bot interface and send any message there. Telegram will "
"create a new topic for that message; each topic works as an "
"independent Hermes session."
)
def _telegram_topic_root_new_message(self) -> str:
return (
"To start a new parallel Hermes chat, open the All Messages topic "
"at the top of this bot interface and send any message there. "
"Telegram will create a new topic for it.\n\n"
"Each topic is an independent Hermes session. Use /new inside an "
"existing topic only if you want to replace that topic's current session."
)
def _telegram_topic_new_header(self, source: SessionSource) -> Optional[str]:
if not self._is_telegram_topic_lane(source):
return None
return (
"Started a new Hermes session in this topic.\n\n"
"Tip: for parallel work, open All Messages and send a message there "
"to create a separate topic instead of using /new here. /new replaces "
"the session attached to the current topic."
)
def _record_telegram_topic_binding(
self,
source: SessionSource,
session_entry,
) -> None:
"""Persist the Telegram topic -> Hermes session binding for topic lanes."""
session_db = getattr(self, "_session_db", None)
if session_db is None or not source.chat_id or not source.thread_id:
return
session_db.bind_telegram_topic(
chat_id=str(source.chat_id),
thread_id=str(source.thread_id),
user_id=str(source.user_id or ""),
session_key=session_entry.session_key,
session_id=session_entry.session_id,
)
def _recover_telegram_topic_thread_id(
self,
source: SessionSource,
) -> Optional[str]:
"""Pin DM-topic routing to the user's last-active topic.
Telegram fragments topic-mode DMs two ways: a Reply on a message
in another topic delivers ``message_thread_id`` for *that* topic,
and ``_build_message_event`` strips the thread_id on plain replies
(#3206 — needed for non-topic users). Both route the user to the
wrong session. When topic mode is on, rewrite the thread_id to the
user's most-recent binding if the inbound id is missing/General or
not a known topic for this chat. Returns None to leave it alone.
"""
if (
source.platform != Platform.TELEGRAM
or source.chat_type != "dm"
or not source.chat_id
or not source.user_id
or not self._telegram_topic_mode_enabled(source)
):
return None
session_db = getattr(self, "_session_db", None)
if session_db is None:
return None
try:
bindings = session_db.list_telegram_topic_bindings_for_chat(
chat_id=str(source.chat_id),
)
except Exception:
logger.debug("topic-recover: read failed", exc_info=True)
return None
if not bindings:
return None
inbound = str(source.thread_id or "")
is_lobby = not inbound or inbound in self._TELEGRAM_GENERAL_TOPIC_IDS
known = {str(b.get("thread_id") or "") for b in bindings}
if not is_lobby and inbound in known:
return None
user_id = str(source.user_id)
for b in bindings:
if str(b.get("user_id") or "") == user_id:
recovered = str(b.get("thread_id") or "")
if recovered and recovered != inbound:
return recovered
return None
return None
def _resolve_session_agent_runtime(
self,
*,
source: Optional[SessionSource] = None,
session_key: Optional[str] = None,
user_config: Optional[dict] = None,
) -> tuple[str, dict]:
"""Resolve model/runtime for a session, honoring session-scoped /model overrides.
If the session override already contains a complete provider bundle
(provider/api_key/base_url/api_mode), prefer it directly instead of
resolving fresh global runtime state first.
"""
resolved_session_key = session_key
if not resolved_session_key and source is not None:
try:
resolved_session_key = self._session_key_for_source(source)
except Exception:
resolved_session_key = None
model = _resolve_gateway_model(user_config)
override = self._session_model_overrides.get(resolved_session_key) if resolved_session_key else None
if override:
override_model = override.get("model", model)
override_runtime = {
"provider": override.get("provider"),
"api_key": override.get("api_key"),
"base_url": override.get("base_url"),
"api_mode": override.get("api_mode"),
}
if override_runtime.get("api_key"):
logger.debug(
"Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
resolved_session_key or "", model, override_model,
override_runtime.get("provider"),
)
return override_model, override_runtime
logger.debug(
"Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
resolved_session_key or "", model, override_model,
)
else:
logger.debug(
"No session model override: session=%s config_model=%s override_keys=%s",
resolved_session_key or "", model,
list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
)
runtime_kwargs = _resolve_runtime_agent_kwargs()
runtime_model = runtime_kwargs.pop("model", None)
if runtime_model:
logger.info(
"Runtime provider supplied explicit model override: %s -> %s",
model,
runtime_model,
)
model = runtime_model
if override and resolved_session_key:
model, runtime_kwargs = self._apply_session_model_override(
resolved_session_key, model, runtime_kwargs
)
if not model and runtime_kwargs.get("provider"):
try:
from hermes_cli.models import get_default_model_for_provider
model = get_default_model_for_provider(runtime_kwargs["provider"])
if model:
logger.info(
"No model configured — defaulting to %s for provider %s",
model, runtime_kwargs["provider"],
)
except Exception:
pass
return model, runtime_kwargs
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
"""Build the effective model/runtime config for a single turn.
Always uses the session's primary model/provider. If `/fast` is
enabled and the model supports Priority Processing / Anthropic fast
mode, attach `request_overrides` so the API call is marked
accordingly.
"""
from hermes_cli.models import resolve_fast_mode_overrides
runtime = {
"api_key": runtime_kwargs.get("api_key"),
"base_url": runtime_kwargs.get("base_url"),
"provider": runtime_kwargs.get("provider"),
"api_mode": runtime_kwargs.get("api_mode"),
"command": runtime_kwargs.get("command"),
"args": list(runtime_kwargs.get("args") or []),
"credential_pool": runtime_kwargs.get("credential_pool"),
}
route = {
"model": model,
"runtime": runtime,
"signature": (
model,
runtime["provider"],
runtime["base_url"],
runtime["api_mode"],
runtime["command"],
tuple(runtime["args"]),
),
}
service_tier = getattr(self, "_service_tier", None)
if not service_tier:
route["request_overrides"] = {}
return route
try:
overrides = resolve_fast_mode_overrides(route["model"])
except Exception:
overrides = None
route["request_overrides"] = overrides or {}
return route
async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
"""React to an adapter failure after startup.
If the error is retryable (e.g. network blip, DNS failure), queue the
platform for background reconnection instead of giving up permanently.
"""
logger.error(
"Fatal %s adapter error (%s): %s",
adapter.platform.value,
adapter.fatal_error_code or "unknown",
adapter.fatal_error_message or "unknown error",
)
self._update_platform_runtime_status(
adapter.platform.value,
platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
error_code=adapter.fatal_error_code,
error_message=adapter.fatal_error_message,
)
existing = self.adapters.get(adapter.platform)
if existing is adapter:
try:
await adapter.disconnect()
finally:
self.adapters.pop(adapter.platform, None)
self.delivery_router.adapters = self.adapters
if adapter.fatal_error_retryable:
platform_config = self.config.platforms.get(adapter.platform)
if platform_config and adapter.platform not in self._failed_platforms:
self._failed_platforms[adapter.platform] = {
"config": platform_config,
"attempts": 0,
"next_retry": time.monotonic() + 30,
}
logger.info(
"%s queued for background reconnection",
adapter.platform.value,
)
if not self.adapters and not self._failed_platforms:
self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected"
if adapter.fatal_error_retryable:
self._exit_with_failure = True
logger.error("No connected messaging platforms remain. Shutting down gateway for service restart.")
else:
logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
await self.stop()
elif not self.adapters and self._failed_platforms:
logger.warning(
"No connected messaging platforms remain, but %d platform(s) "
"queued for reconnection — gateway staying alive, watcher will "
"retry in background.",
len(self._failed_platforms),
)
def _request_clean_exit(self, reason: str) -> None:
self._exit_cleanly = True
self._exit_reason = reason
self._shutdown_event.set()
def _running_agent_count(self) -> int:
return len(self._running_agents)
def _status_action_label(self) -> str:
return "restart" if self._restart_requested else "shutdown"
def _status_action_gerund(self) -> str:
return "restarting" if self._restart_requested else "shutting down"
def _queue_during_drain_enabled(self) -> bool:
return self._restart_requested and self._busy_input_mode in {"queue", "steer"}
def _enqueue_fifo(self, session_key: str, queued_event: "MessageEvent", adapter: Any) -> None:
"""Append a /queue event to the FIFO chain for a session."""
if adapter is None:
return
pending_slot = getattr(adapter, "_pending_messages", None)
if pending_slot is None:
return
queued_events = getattr(self, "_queued_events", None)
if queued_events is None:
queued_events = {}
self._queued_events = queued_events
if session_key in pending_slot:
queued_events.setdefault(session_key, []).append(queued_event)
else:
pending_slot[session_key] = queued_event
def _promote_queued_event(
self,
session_key: str,
adapter: Any,
pending_event: Optional["MessageEvent"],
) -> Optional["MessageEvent"]:
"""Promote the next overflow item after the slot was drained.
Called at the drain site after _dequeue_pending_event consumed
(or failed to consume) the slot. If there's an overflow item:
- When pending_event is None (slot was empty), return the
overflow head as the new pending_event.
- When pending_event already exists (slot was populated by an
interrupt follow-up or similar), stage the overflow head in
the slot so the NEXT recursion picks it up.
Returns the (possibly updated) pending_event for drain to use.
"""
queued_events = getattr(self, "_queued_events", None)
if not queued_events:
return pending_event
overflow = queued_events.get(session_key)
if not overflow:
return pending_event
next_queued = overflow.pop(0)
if not overflow:
queued_events.pop(session_key, None)
if pending_event is None:
return next_queued
if adapter is not None and hasattr(adapter, "_pending_messages"):
adapter._pending_messages[session_key] = next_queued
else:
queued_events.setdefault(session_key, []).insert(0, next_queued)
return pending_event
def _queue_depth(self, session_key: str, *, adapter: Any = None) -> int:
"""Total pending /queue items for a session — slot + overflow."""
queued_events = getattr(self, "_queued_events", None) or {}
depth = len(queued_events.get(session_key, []))
if adapter is not None and session_key in getattr(adapter, "_pending_messages", {}):
depth += 1
return depth
@staticmethod
def _is_goal_continuation_event(event_or_text: Any) -> bool:
"""Return True for synthetic /goal continuation turns.
Goal continuations are normal queued user-role events, so pause/clear
must distinguish them from real user /queue messages before removing or
suppressing them.
"""
text = getattr(event_or_text, "text", event_or_text) or ""
return str(text).startswith("[Continuing toward your standing goal]\nGoal:")
def _clear_goal_pending_continuations(self, session_key: str, adapter: Any) -> int:
"""Remove queued synthetic /goal continuations for one session.
User-issued /goal pause/clear can race with a continuation already
queued by the judge. Remove only synthetic goal continuations while
preserving normal /queue and user follow-up events.
"""
removed = 0
pending_slot = getattr(adapter, "_pending_messages", None) if adapter is not None else None
if isinstance(pending_slot, dict):
pending_event = pending_slot.get(session_key)
if self._is_goal_continuation_event(pending_event):
pending_slot.pop(session_key, None)
removed += 1
queued_events = getattr(self, "_queued_events", None)
if isinstance(queued_events, dict):
overflow = queued_events.get(session_key) or []
if overflow:
kept = []
for queued_event in overflow:
if self._is_goal_continuation_event(queued_event):
removed += 1
else:
kept.append(queued_event)
if kept:
queued_events[session_key] = kept
else:
queued_events.pop(session_key, None)
return removed
def _goal_still_active_for_session(self, session_id: str) -> bool:
"""Best-effort fresh DB check before running a queued continuation."""
if not session_id:
return False
try:
from hermes_cli.goals import GoalManager
return GoalManager(session_id=session_id).is_active()
except Exception as exc:
logger.debug("goal continuation: active-state recheck failed: %s", exc)
return False
def _update_runtime_status(self, gateway_state: Optional[str] = None, exit_reason: Optional[str] = None) -> None:
try:
from gateway.status import write_runtime_status
write_runtime_status(
gateway_state=gateway_state,
exit_reason=exit_reason,
restart_requested=self._restart_requested,
active_agents=self._running_agent_count(),
)
except Exception:
pass
def _update_platform_runtime_status(
self,
platform: str,
*,
platform_state: Optional[str] = None,
error_code: Optional[str] = None,
error_message: Optional[str] = None,
) -> None:
try:
from gateway.status import write_runtime_status
write_runtime_status(
platform=platform,
platform_state=platform_state,
error_code=error_code,
error_message=error_message,
)
except Exception:
pass
def _pause_failed_platform(self, platform, *, reason: str = "") -> None:
"""Mark a queued platform as paused — keep it in ``_failed_platforms``
but stop the reconnect watcher from hammering it.
Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive
retryable failures, and by ``/platform pause <name>`` for manual
intervention. Paused platforms are surfaced in ``/platform list``
and resumed with ``/platform resume <name>``.
"""
info = getattr(self, "_failed_platforms", {}).get(platform)
if info is None:
return
if info.get("paused"):
return
info["paused"] = True
info["pause_reason"] = reason or "auto-paused after repeated failures"
info["next_retry"] = float("inf")
try:
self._update_platform_runtime_status(
platform.value,
platform_state="paused",
error_code=None,
error_message=info["pause_reason"],
)
except Exception:
pass
logger.warning(
"%s paused after %d consecutive failures (%s) — "
"fix the underlying issue then run `/platform resume %s` "
"to retry, or `hermes gateway restart` to restart the gateway.",
platform.value, info.get("attempts", 0),
info["pause_reason"], platform.value,
)
def _resume_paused_platform(self, platform) -> bool:
"""Unpause a platform — reset its attempt counter and schedule an
immediate retry. Returns True if the platform was paused and is
now queued; False if it wasn't paused (or wasn't in the queue).
"""
info = getattr(self, "_failed_platforms", {}).get(platform)
if info is None:
return False
if not info.get("paused"):
return False
info["paused"] = False
info.pop("pause_reason", None)
info["attempts"] = 0
info["next_retry"] = time.monotonic()
try:
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",
error_code=None,
error_message=None,
)
except Exception:
pass
logger.info("%s resumed — retrying on next watcher tick", platform.value)
return True
@staticmethod
def _load_prefill_messages() -> List[Dict[str, Any]]:
"""Load ephemeral prefill messages from config or env var.
Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
the prefill_messages_file key in ~/.hermes/config.yaml.
Relative paths are resolved from ~/.hermes/.
"""
file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
if not file_path:
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
file_path = cfg.get("prefill_messages_file", "")
except Exception:
pass
if not file_path:
return []
path = Path(file_path).expanduser()
if not path.is_absolute():
path = _hermes_home / path
if not path.exists():
logger.warning("Prefill messages file not found: %s", path)
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list):
logger.warning("Prefill messages file must contain a JSON array: %s", path)
return []
return data
except Exception as e:
logger.warning("Failed to load prefill messages from %s: %s", path, e)
return []
@staticmethod
def _load_ephemeral_system_prompt() -> str:
"""Load ephemeral system prompt from config or env var.
Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
agent.system_prompt in ~/.hermes/config.yaml.
"""
prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
if prompt:
return prompt
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
return (cfg_get(cfg, "agent", "system_prompt", default="") or "").strip()
except Exception:
pass
return ""
@staticmethod
def _load_reasoning_config() -> dict | None:
"""Load reasoning effort from config.yaml.
Reads agent.reasoning_effort from config.yaml. Valid: "none",
"minimal", "low", "medium", "high", "xhigh". Returns None to use
default (medium).
"""
from hermes_constants import parse_reasoning_effort
effort = ""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
except Exception:
pass
result = parse_reasoning_effort(effort)
if effort and effort.strip() and result is None:
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
return result
@staticmethod
def _parse_reasoning_command_args(raw_args: str) -> tuple[str, bool]:
"""Parse `/reasoning` args into `(value, persist_global)`.
`/reasoning <level>` is session-scoped by default. `--global` may be
supplied in any position to persist the change to config.yaml.
"""
import shlex
text = str(raw_args or "").strip().replace("—", "--")
if not text:
return "", False
try:
tokens = shlex.split(text)
except ValueError:
tokens = text.split()
persist_global = False
value_tokens = []
for token in tokens:
if token == "--global":
persist_global = True
else:
value_tokens.append(token)
return " ".join(value_tokens).strip().lower(), persist_global
def _resolve_session_reasoning_config(
self,
*,
source: Optional[SessionSource] = None,
session_key: Optional[str] = None,
) -> dict | None:
"""Resolve reasoning effort for a session, honoring session overrides."""
resolved_session_key = session_key
if not resolved_session_key and source is not None:
try:
resolved_session_key = self._session_key_for_source(source)
except Exception:
resolved_session_key = None
overrides = getattr(self, "_session_reasoning_overrides", {}) or {}
if resolved_session_key and resolved_session_key in overrides:
return overrides[resolved_session_key]
return self._load_reasoning_config()
def _set_session_reasoning_override(
self,
session_key: str,
reasoning_config: Optional[dict],
) -> None:
"""Set or clear the session-scoped reasoning override."""
if not session_key:
return
if not hasattr(self, "_session_reasoning_overrides"):
self._session_reasoning_overrides = {}
if reasoning_config is None:
self._session_reasoning_overrides.pop(session_key, None)
else:
self._session_reasoning_overrides[session_key] = dict(reasoning_config)
@staticmethod
def _load_service_tier() -> str | None:
"""Load Priority Processing setting from config.yaml.
Reads agent.service_tier from config.yaml. Accepted values mirror the CLI:
"fast"/"priority"/"on" => "priority", while "normal"/"off" disables it.
Returns None when unset or unsupported.
"""
raw = ""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
raw = str(cfg_get(cfg, "agent", "service_tier", default="") or "").strip()
except Exception:
pass
value = raw.lower()
if not value or value in {"normal", "default", "standard", "off", "none"}:
return None
if value in {"fast", "priority", "on"}:
return "priority"
logger.warning("Unknown service_tier '%s', ignoring", raw)
return None
@staticmethod
def _load_show_reasoning() -> bool:
"""Load show_reasoning toggle from config.yaml display section."""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
return is_truthy_value(
cfg_get(cfg, "display", "show_reasoning"),
default=False,
)
except Exception:
pass
return False
@staticmethod
def _load_busy_input_mode() -> str:
"""Load gateway drain-time busy-input behavior from config/env."""
mode = os.getenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "").strip().lower()
if not mode:
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
mode = str(cfg_get(cfg, "display", "busy_input_mode", default="") or "").strip().lower()
except Exception:
pass
if mode == "queue":
return "queue"
if mode == "steer":
return "steer"
return "interrupt"
@staticmethod
def _load_restart_drain_timeout() -> float:
"""Load graceful gateway restart/stop drain timeout in seconds."""
raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
if not raw:
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
raw = str(cfg_get(cfg, "agent", "restart_drain_timeout", default="") or "").strip()
except Exception:
pass
value = parse_restart_drain_timeout(raw)
if raw and value == DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT:
try:
float(raw)
except (TypeError, ValueError):
logger.warning(
"Invalid restart_drain_timeout '%s', using default %.0fs",
raw,
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
)
return value
@staticmethod
def _load_background_notifications_mode() -> str:
"""Load background process notification mode from config or env var.
Modes:
- ``all`` — push running-output updates *and* the final message (default)
- ``result`` — only the final completion message (regardless of exit code)
- ``error`` — only the final message when exit code is non-zero
- ``off`` — no watcher messages at all
"""
mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
if not mode:
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
raw = cfg_get(cfg, "display", "background_process_notifications")
if raw is False:
mode = "off"
elif raw not in {None, ""}:
mode = str(raw)
except Exception:
pass
mode = (mode or "all").strip().lower()
valid = {"all", "result", "error", "off"}
if mode not in valid:
logger.warning(
"Unknown background_process_notifications '%s', defaulting to 'all'",
mode,
)
return "all"
return mode
@staticmethod
def _load_provider_routing() -> dict:
"""Load OpenRouter provider routing preferences from config.yaml."""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
return cfg.get("provider_routing", {}) or {}
except Exception:
pass
return {}
@staticmethod
def _load_fallback_model() -> list | dict | None:
"""Load fallback provider chain from config.yaml.
Returns a list of provider dicts (``fallback_providers``), a single
dict (legacy ``fallback_model``), or None if not configured.
AIAgent.__init__ normalizes both formats into a chain.
"""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or None
if fb:
return fb
except Exception:
pass
return None
def _snapshot_running_agents(self) -> Dict[str, Any]:
return {
session_key: agent
for session_key, agent in self._running_agents.items()
if agent is not _AGENT_PENDING_SENTINEL
}
def _queue_or_replace_pending_event(self, session_key: str, event: MessageEvent) -> None:
adapter = self.adapters.get(event.source.platform)
if not adapter:
return
merge_pending_message_event(adapter._pending_messages, session_key, event)
async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
if not self._is_user_authorized(event.source):
logger.warning(
"Dropping message from unauthorized user in active session: "
"user=%s (%s), platform=%s, session=%s",
event.source.user_id,
event.source.user_name,
event.source.platform.value if event.source.platform else "unknown",
session_key,
)
return True
if self._draining:
adapter = self.adapters.get(event.source.platform)
if not adapter:
return True
reply_anchor = self._reply_anchor_for_event(event)
thread_meta = self._thread_metadata_for_source(event.source, reply_anchor)
if self._queue_during_drain_enabled():
self._queue_or_replace_pending_event(session_key, event)
message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
else:
message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
await adapter._send_with_retry(
chat_id=event.source.chat_id,
content=message,
reply_to=(
reply_anchor
if event.source.platform == Platform.TELEGRAM
and event.source.chat_type == "dm"
and event.source.thread_id
else (None if event.source.platform == Platform.TELEGRAM and event.source.thread_id else event.message_id)
),
metadata=thread_meta,
)
return True
adapter = self.adapters.get(event.source.platform)
if not adapter:
return False
running_agent = self._running_agents.get(session_key)
effective_mode = self._busy_input_mode
steered = False
if effective_mode == "steer":
steer_text = (event.text or "").strip()
can_steer = (
steer_text
and running_agent is not None
and running_agent is not _AGENT_PENDING_SENTINEL
and hasattr(running_agent, "steer")
)
if can_steer:
try:
steered = bool(running_agent.steer(steer_text))
except Exception as exc:
logger.warning("Gateway steer failed for session %s: %s", session_key, exc)
steered = False
if not steered:
effective_mode = "queue"
if not steered:
merge_pending_message_event(adapter._pending_messages, session_key, event)
is_queue_mode = effective_mode == "queue"
is_steer_mode = effective_mode == "steer"
if effective_mode == "interrupt" and running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
try:
running_agent.interrupt(event.text)
except Exception:
pass
busy_ack_enabled = os.environ.get("HERMES_GATEWAY_BUSY_ACK_ENABLED", "true").lower() == "true"
if not busy_ack_enabled:
logger.debug("Busy ack suppressed for session %s", session_key)
return True
_BUSY_ACK_COOLDOWN = 30
now = time.time()
last_ack = self._busy_ack_ts.get(session_key, 0)
if now - last_ack < _BUSY_ACK_COOLDOWN:
return True
self._busy_ack_ts[session_key] = now
status_parts = []
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
try:
summary = running_agent.get_activity_summary()
iteration = summary.get("api_call_count", 0)
max_iter = summary.get("max_iterations", 0)
current_tool = summary.get("current_tool")
start_ts = self._running_agents_ts.get(session_key, 0)
if start_ts:
elapsed_min = int((now - start_ts) / 60)
if elapsed_min > 0:
status_parts.append(f"{elapsed_min} min elapsed")
if max_iter:
status_parts.append(f"iteration {iteration}/{max_iter}")
if current_tool:
status_parts.append(f"running: {current_tool}")
except Exception:
pass
status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
if is_steer_mode:
message = (
f"⏩ Steered into current run{status_detail}. "
f"Your message arrives after the next tool call."
)
elif is_queue_mode:
message = (
f"⏳ Queued for the next turn{status_detail}. "
f"I'll respond once the current task finishes."
)
else:
message = (
f"⚡ Interrupting current task{status_detail}. "
f"I'll respond to your message shortly."
)
try:
from agent.onboarding import (
BUSY_INPUT_FLAG,
busy_input_hint_gateway,
is_seen,
mark_seen,
)
_user_cfg = _load_gateway_config()
if not is_seen(_user_cfg, BUSY_INPUT_FLAG):
if is_steer_mode:
_hint_mode = "steer"
elif is_queue_mode:
_hint_mode = "queue"
else:
_hint_mode = "interrupt"
message = (
f"{message}\n\n"
f"{busy_input_hint_gateway(_hint_mode)}"
)
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
except Exception as _onb_err:
logger.debug("Failed to apply busy-input onboarding hint: %s", _onb_err)
reply_anchor = self._reply_anchor_for_event(event)
thread_meta = self._thread_metadata_for_source(event.source, reply_anchor)
try:
await adapter._send_with_retry(
chat_id=event.source.chat_id,
content=message,
reply_to=(
reply_anchor
if event.source.platform == Platform.TELEGRAM
and event.source.chat_type == "dm"
and event.source.thread_id
else (None if event.source.platform == Platform.TELEGRAM and event.source.thread_id else event.message_id)
),
metadata=thread_meta,
)
except Exception as e:
logger.debug("Failed to send busy-ack: %s", e)
return True
async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]:
snapshot = self._snapshot_running_agents()
last_active_count = self._running_agent_count()
last_status_at = 0.0
def _maybe_update_status(force: bool = False) -> None:
nonlocal last_active_count, last_status_at
now = asyncio.get_running_loop().time()
active_count = self._running_agent_count()
if force or active_count != last_active_count or (now - last_status_at) >= 1.0:
self._update_runtime_status("draining")
last_active_count = active_count
last_status_at = now
if not self._running_agents:
_maybe_update_status(force=True)
return snapshot, False
_maybe_update_status(force=True)
if timeout <= 0:
return snapshot, True
deadline = asyncio.get_running_loop().time() + timeout
while self._running_agents and asyncio.get_running_loop().time() < deadline:
_maybe_update_status()
await asyncio.sleep(0.1)
timed_out = bool(self._running_agents)
_maybe_update_status(force=True)
return snapshot, timed_out
def _interrupt_running_agents(self, reason: str) -> None:
for session_key, agent in list(self._running_agents.items()):
if agent is _AGENT_PENDING_SENTINEL:
continue
try:
agent.interrupt(reason)
logger.debug("Interrupted running agent for session %s during shutdown", session_key)
except Exception as e:
logger.debug("Failed interrupting agent during shutdown: %s", e)
async def _notify_active_sessions_of_shutdown(self) -> None:
"""Send shutdown/restart notifications to active chats and home channels.
Called at the very start of stop() — adapters are still connected so
messages can be delivered. Best-effort: individual send failures are
logged and swallowed so they never block the shutdown sequence.
"""
active = self._snapshot_running_agents()
action = "restarting" if self._restart_requested else "shutting down"
hint = (
"Your current task will be interrupted. "
"Send any message after restart and I'll try to resume where you left off."
if self._restart_requested
else "Your current task will be interrupted."
)
msg = f"⚠️ Gateway {action} — {hint}"
notified: set[tuple[str, str, Optional[str]]] = set()
for session_key in active:
source = None
try:
if getattr(self, "session_store", None) is not None:
self.session_store._ensure_loaded()
entry = self.session_store._entries.get(session_key)
source = getattr(entry, "origin", None) if entry else None
except Exception as e:
logger.debug(
"Failed to load session origin for shutdown notification %s: %s",
session_key,
e,
)
if source is None:
source = self._get_cached_session_source(session_key)
if source is not None:
platform_str = source.platform.value
chat_id = str(source.chat_id)
thread_id = source.thread_id
else:
_parsed = _parse_session_key(session_key)
if not _parsed:
continue
platform_str = _parsed["platform"]
chat_id = _parsed["chat_id"]
thread_id = _parsed.get("thread_id")
dedup_key = (platform_str, chat_id, str(thread_id) if thread_id else None)
if dedup_key in notified:
continue
try:
platform = Platform(platform_str)
adapter = self.adapters.get(platform)
if not adapter:
continue
platform_cfg = self.config.platforms.get(platform)
if platform_cfg is not None and not platform_cfg.gateway_restart_notification:
logger.info(
"Shutdown notification suppressed for active session: %s has gateway_restart_notification=false",
platform_str,
)
continue
metadata = {"thread_id": thread_id} if thread_id else None
result = await adapter.send(chat_id, msg, metadata=metadata)
if result is not None and getattr(result, "success", True) is False:
logger.debug(
"Failed to send shutdown notification to %s:%s: %s",
platform_str,
chat_id,
getattr(result, "error", "send returned success=False"),
)
continue
notified.add(dedup_key)
logger.info(
"Sent shutdown notification to active chat %s:%s",
platform_str, chat_id,
)
except Exception as e:
logger.debug(
"Failed to send shutdown notification to %s:%s: %s",
platform_str, chat_id, e,
)
for platform, adapter in list(self.adapters.items()):
home = self.config.get_home_channel(platform)
if not home or not home.chat_id:
continue
platform_cfg = self.config.platforms.get(platform)
if platform_cfg is not None and not platform_cfg.gateway_restart_notification:
logger.info(
"Shutdown notification suppressed for home channel: %s has gateway_restart_notification=false",
platform.value,
)
continue
dedup_key = (platform.value, str(home.chat_id), str(home.thread_id) if home.thread_id else None)
if dedup_key in notified:
continue
try:
metadata = {"thread_id": home.thread_id} if home.thread_id else None
if metadata:
result = await adapter.send(str(home.chat_id), msg, metadata=metadata)
else:
result = await adapter.send(str(home.chat_id), msg)
if result is not None and getattr(result, "success", True) is False:
logger.debug(
"Failed to send shutdown notification to home channel %s:%s: %s",
platform.value,
home.chat_id,
getattr(result, "error", "send returned success=False"),
)
continue
notified.add(dedup_key)
logger.info(
"Sent shutdown notification to home channel %s:%s",
platform.value,
home.chat_id,
)
except Exception as e:
logger.debug(
"Failed to send shutdown notification to home channel %s:%s: %s",
platform.value,
home.chat_id,
e,
)
def _finalize_shutdown_agents(self, active_agents: Dict[str, Any]) -> None:
for agent in active_agents.values():
try:
from hermes_cli.plugins import invoke_hook as _invoke_hook
_invoke_hook(
"on_session_finalize",
session_id=getattr(agent, "session_id", None),
platform="gateway",
)
except Exception:
pass
self._cleanup_agent_resources(agent)
def _cleanup_agent_resources(self, agent: Any) -> None:
"""Best-effort cleanup for temporary or cached agent instances."""
if agent is None:
return
try:
if hasattr(agent, "shutdown_memory_provider"):
session_messages = getattr(agent, "_session_messages", None)
if isinstance(session_messages, list):
agent.shutdown_memory_provider(session_messages)
else:
agent.shutdown_memory_provider()
except Exception:
pass
try:
if hasattr(agent, "close"):
agent.close()
except Exception:
pass
try:
from agent.auxiliary_client import cleanup_stale_async_clients
cleanup_stale_async_clients()
except Exception:
pass
_STUCK_LOOP_THRESHOLD = 3
_STUCK_LOOP_FILE = ".restart_failure_counts"
def _increment_restart_failure_counts(self, active_session_keys: set) -> None:
"""Increment restart-failure counters for sessions active at shutdown.
Persists to a JSON file so counters survive across restarts.
Sessions NOT in active_session_keys are removed (they completed
successfully, so the loop is broken).
"""
import json
path = _hermes_home / self._STUCK_LOOP_FILE
try:
counts = json.loads(path.read_text()) if path.exists() else {}
except Exception:
counts = {}
new_counts = {}
for key in active_session_keys:
new_counts[key] = counts.get(key, 0) + 1
try:
atomic_json_write(path, new_counts, indent=None)
except Exception:
pass
def _suspend_stuck_loop_sessions(self) -> int:
"""Suspend sessions that have been active across too many restarts.
Returns the number of sessions suspended. Called on gateway startup
AFTER suspend_recently_active() to catch the stuck-loop pattern:
session loads → agent gets stuck → gateway restarts → repeat.
"""
import json
path = _hermes_home / self._STUCK_LOOP_FILE
if not path.exists():
return 0
try:
counts = json.loads(path.read_text())
except Exception:
return 0
suspended = 0
stuck_keys = [k for k, v in counts.items() if v >= self._STUCK_LOOP_THRESHOLD]
for session_key in stuck_keys:
try:
entry = self.session_store._entries.get(session_key)
if entry and not entry.suspended:
entry.suspended = True
suspended += 1
logger.warning(
"Auto-suspended stuck session %s (active across %d "
"consecutive restarts — likely a stuck loop)",
session_key, counts[session_key],
)
except Exception:
pass
if suspended:
try:
self.session_store._save()
except Exception:
pass
try:
path.unlink(missing_ok=True)
except Exception:
pass
return suspended
def _clear_restart_failure_count(self, session_key: str) -> None:
"""Clear the restart-failure counter for a session that completed OK.
Called after a successful agent turn to signal the loop is broken.
"""
import json
path = _hermes_home / self._STUCK_LOOP_FILE
if not path.exists():
return
try:
counts = json.loads(path.read_text())
if session_key in counts:
del counts[session_key]
if counts:
atomic_json_write(path, counts, indent=None)
else:
path.unlink(missing_ok=True)
except Exception:
pass
async def _launch_detached_restart_command(self) -> None:
import shutil
import subprocess
hermes_cmd = _resolve_hermes_bin()
if not hermes_cmd:
logger.error("Could not locate hermes binary for detached /restart")
return
current_pid = os.getpid()
if sys.platform == "win32":
import textwrap
from hermes_cli._subprocess_compat import windows_detach_popen_kwargs
cmd_argv = [*hermes_cmd, "gateway", "restart"]
watcher = textwrap.dedent(
"""
import os, subprocess, sys, time
pid = int(sys.argv[1])
cmd = sys.argv[2:]
deadline = time.monotonic() + 120
def _alive(p):
# On Windows, os.kill(pid, 0) is NOT a no-op — it maps to
# GenerateConsoleCtrlEvent(0, pid) (bpo-14484). Use the
# Win32 handle-based existence check instead.
if os.name == 'nt':
import ctypes
k32 = ctypes.windll.kernel32
k32.OpenProcess.restype = ctypes.c_void_p
k32.WaitForSingleObject.restype = ctypes.c_uint
k32.GetLastError.restype = ctypes.c_uint
h = k32.OpenProcess(0x1000 | 0x100000, False, int(p))
if not h:
return k32.GetLastError() != 87
try:
return k32.WaitForSingleObject(h, 0) == 0x102
finally:
k32.CloseHandle(h)
try:
os.kill(int(p), 0)
return True
except ProcessLookupError:
return False
except PermissionError:
return True
except OSError:
return False
while time.monotonic() < deadline:
if not _alive(pid):
break
time.sleep(0.2)
_CREATE_NEW_PROCESS_GROUP = 0x00000200
_DETACHED_PROCESS = 0x00000008
_CREATE_NO_WINDOW = 0x08000000
subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=_CREATE_NEW_PROCESS_GROUP | _DETACHED_PROCESS | _CREATE_NO_WINDOW,
)
"""
).strip()
subprocess.Popen(
[sys.executable, "-c", watcher, str(current_pid), *cmd_argv],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
**windows_detach_popen_kwargs(),
)
return
cmd = " ".join(shlex.quote(part) for part in hermes_cmd)
shell_cmd = (
f"while kill -0 {current_pid} 2>/dev/null; do sleep 0.2; done; "
f"{cmd} gateway restart"
)
setsid_bin = shutil.which("setsid")
if setsid_bin:
subprocess.Popen(
[setsid_bin, "bash", "-lc", shell_cmd],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
else:
subprocess.Popen(
["bash", "-lc", shell_cmd],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
def request_restart(self, *, detached: bool = False, via_service: bool = False) -> bool:
if self._restart_task_started:
return False
self._restart_requested = True
self._restart_detached = detached
self._restart_via_service = via_service
self._restart_task_started = True
async def _run_restart() -> None:
await asyncio.sleep(0.05)
await self.stop(restart=True, detached_restart=detached, service_restart=via_service)
task = asyncio.create_task(_run_restart())
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
return True
_AUTO_RESUME_REASONS = frozenset(
{"restart_timeout", "shutdown_timeout", "restart_interrupted"}
)
def _schedule_resume_pending_sessions(self) -> int:
"""Auto-continue fresh restart-interrupted sessions after startup.
``resume_pending`` already preserves the transcript AND the existing
``_is_resume_pending`` branch in ``_handle_message_with_agent``
injects a reason-aware recovery system note on the next turn. This
method closes the UX gap by synthesizing that next turn once
adapters are back online — the event text is empty so the existing
injection path owns the wording and we never double up.
Adapters that are not yet ready (adapter missing from
``self.adapters``) are skipped silently; their sessions stay
``resume_pending`` and will auto-resume on the next real user
message, or on the next gateway startup.
"""
window = _auto_continue_freshness_window()
try:
with self.session_store._lock:
self.session_store._ensure_loaded_locked()
candidates = [
entry for entry in self.session_store._entries.values()
if entry.resume_pending
and not entry.suspended
and entry.origin is not None
and entry.resume_reason in self._AUTO_RESUME_REASONS
]
except Exception as exc:
logger.warning("Failed to enumerate resume-pending sessions: %s", exc)
return 0
now = datetime.now()
scheduled = 0
for entry in candidates:
marker = entry.last_resume_marked_at or entry.updated_at
if marker is not None and (now - marker).total_seconds() > window:
continue
source = entry.origin
adapter = self.adapters.get(source.platform)
if adapter is None:
logger.debug(
"Skipping auto-resume for %s: adapter not ready for %s",
entry.session_key,
getattr(source.platform, "value", source.platform),
)
continue
event = MessageEvent(
text="",
message_type=MessageType.TEXT,
source=source,
internal=True,
)
task = asyncio.create_task(adapter.handle_message(event))
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
scheduled += 1
if scheduled:
logger.info(
"Scheduled auto-resume for %d restart-interrupted session(s)",
scheduled,
)
return scheduled
async def start(self) -> bool:
"""
Start the gateway and all configured platform adapters.
Returns True if at least one adapter connected successfully.
"""
logger.info("Starting Hermes Gateway...")
try:
self._gateway_loop = asyncio.get_running_loop()
except RuntimeError:
self._gateway_loop = None
logger.info("Session storage: %s", self.config.sessions_dir)
try:
from gateway.shutdown_forensics import check_systemd_timing_alignment
_alignment = check_systemd_timing_alignment(self._restart_drain_timeout)
if _alignment is not None and _alignment.get("mismatch"):
logger.warning(
"Stale systemd unit detected: %s has TimeoutStopSec=%.0fs but "
"drain_timeout=%.0fs (expected >=%.0fs). systemd may SIGKILL the "
"gateway mid-drain. Run `hermes gateway service install --replace` "
"to regenerate the unit, or shorten agent.restart_drain_timeout.",
_alignment.get("unit", "(unknown)"),
_alignment["timeout_stop_sec"],
_alignment["drain_timeout"],
_alignment["expected_min"],
)
except Exception as _e:
logger.debug("check_systemd_timing_alignment failed: %s", _e)
try:
_effective_max_iter = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
logger.info(
"Agent budget: max_iterations=%d (agent.max_turns from config.yaml, "
"or HERMES_MAX_ITERATIONS from .env, or default 90)",
_effective_max_iter,
)
except Exception:
pass
try:
_redact_raw = os.getenv("HERMES_REDACT_SECRETS", "true")
_redact_on = _redact_raw.lower() in {"1", "true", "yes", "on"}
if _redact_on:
logger.info(
"Secret redaction: ENABLED (tool output, logs, and chat "
"responses are scrubbed before delivery)"
)
else:
logger.warning(
"Secret redaction: DISABLED (HERMES_REDACT_SECRETS=%s). "
"API keys and tokens may appear verbatim in chat output, "
"session JSONs, and logs. Set security.redact_secrets: true "
"in config.yaml to re-enable.",
_redact_raw,
)
except Exception:
pass
try:
from hermes_cli.profiles import get_active_profile_name
_profile = get_active_profile_name()
if _profile and _profile != "default":
logger.info("Active profile: %s", _profile)
except Exception:
pass
try:
from gateway.status import write_runtime_status
write_runtime_status(gateway_state="starting", exit_reason=None)
except Exception:
pass
try:
from hermes_cli.security_advisories import (
detect_compromised,
gateway_log_message,
)
_adv_hits = detect_compromised()
_adv_msg = gateway_log_message(_adv_hits)
if _adv_msg:
logger.warning("%s", _adv_msg)
logger.warning(
"Run `hermes doctor` on the gateway host for full "
"remediation steps."
)
except Exception:
logger.debug(
"security advisory check failed at gateway startup",
exc_info=True,
)
_builtin_allowed_vars = (
"TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
"WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
"SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
"TELEGRAM_GROUP_ALLOWED_USERS",
"TELEGRAM_GROUP_ALLOWED_CHATS",
"EMAIL_ALLOWED_USERS",
"SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
"MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
"FEISHU_ALLOWED_USERS",
"WECOM_ALLOWED_USERS",
"WECOM_CALLBACK_ALLOWED_USERS",
"WEIXIN_ALLOWED_USERS",
"BLUEBUBBLES_ALLOWED_USERS",
"QQ_ALLOWED_USERS",
"YUANBAO_ALLOWED_USERS",
"GATEWAY_ALLOWED_USERS",
)
_builtin_allow_all_vars = (
"TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
"WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
"SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
"SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
"MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS",
"FEISHU_ALLOW_ALL_USERS",
"WECOM_ALLOW_ALL_USERS",
"WECOM_CALLBACK_ALLOW_ALL_USERS",
"WEIXIN_ALLOW_ALL_USERS",
"BLUEBUBBLES_ALLOW_ALL_USERS",
"QQ_ALLOW_ALL_USERS",
"YUANBAO_ALLOW_ALL_USERS",
)
_plugin_allowed_vars: tuple = ()
_plugin_allow_all_vars: tuple = ()
try:
from gateway.platform_registry import platform_registry
_plugin_allowed_vars = tuple(
e.allowed_users_env for e in platform_registry.plugin_entries()
if e.allowed_users_env
)
_plugin_allow_all_vars = tuple(
e.allow_all_env for e in platform_registry.plugin_entries()
if e.allow_all_env
)
except Exception:
pass
_any_allowlist = any(
os.getenv(v) for v in _builtin_allowed_vars + _plugin_allowed_vars
)
_allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in {"true", "1", "yes"} or any(
os.getenv(v, "").lower() in {"true", "1", "yes"}
for v in _builtin_allow_all_vars + _plugin_allow_all_vars
)
if not _any_allowlist and not _allow_all:
logger.warning(
"No user allowlists configured. All unauthorized users will be denied. "
"Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
"or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
)
try:
from hermes_cli.plugins import discover_plugins
discover_plugins()
except Exception:
logger.warning(
"plugin discovery failed at gateway startup", exc_info=True,
)
try:
from hermes_cli.config import load_config
from agent.shell_hooks import register_from_config
register_from_config(load_config(), accept_hooks=False)
except Exception:
logger.debug(
"shell-hook registration failed at gateway startup",
exc_info=True,
)
self.hooks.discover_and_load()
try:
from tools.process_registry import process_registry
recovered = process_registry.recover_from_checkpoint()
if recovered:
logger.info("Recovered %s background process(es) from previous run", recovered)
except Exception as e:
logger.warning("Process checkpoint recovery: %s", e)
_clean_marker = _hermes_home / ".clean_shutdown"
if _clean_marker.exists():
logger.info("Previous gateway exited cleanly — skipping session suspension")
try:
_clean_marker.unlink()
except Exception:
pass
else:
try:
suspended = self.session_store.suspend_recently_active()
if suspended:
logger.info("Marked %d in-flight session(s) as resumable from previous run", suspended)
except Exception as e:
logger.warning("Session suspension on startup failed: %s", e)
try:
stuck = self._suspend_stuck_loop_sessions()
if stuck:
logger.warning("Auto-suspended %d stuck-loop session(s)", stuck)
except Exception as e:
logger.debug("Stuck-loop detection failed: %s", e)
connected_count = 0
enabled_platform_count = 0
startup_nonretryable_errors: list[str] = []
startup_retryable_errors: list[str] = []
for platform, platform_config in self.config.platforms.items():
if not platform_config.enabled:
continue
enabled_platform_count += 1
adapter = self._create_adapter(platform, platform_config)
if not adapter:
_pval = platform.value
_builtin_names = {m.value for m in Platform.__members__.values()}
if _pval not in _builtin_names:
logger.warning(
"No adapter for '%s' — is the plugin installed? "
"(platform is enabled in config.yaml but no plugin registered it)",
_pval,
)
else:
logger.warning("No adapter available for %s", _pval)
continue
adapter.set_message_handler(self._handle_message)
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
adapter.set_session_store(self.session_store)
adapter.set_busy_session_handler(self._handle_active_session_busy_message)
logger.info("Connecting to %s...", platform.value)
self._update_platform_runtime_status(
platform.value,
platform_state="connecting",
error_code=None,
error_message=None,
)
try:
success = await self._connect_adapter_with_timeout(adapter, platform)
if success:
self.adapters[platform] = adapter
self._sync_voice_mode_state_to_adapter(adapter)
connected_count += 1
self._update_platform_runtime_status(
platform.value,
platform_state="connected",
error_code=None,
error_message=None,
)
logger.info("✓ %s connected", platform.value)
else:
logger.warning("✗ %s failed to connect", platform.value)
await self._safe_adapter_disconnect(adapter, platform)
if adapter.has_fatal_error:
self._update_platform_runtime_status(
platform.value,
platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
error_code=adapter.fatal_error_code,
error_message=adapter.fatal_error_message,
)
target = (
startup_retryable_errors
if adapter.fatal_error_retryable
else startup_nonretryable_errors
)
target.append(
f"{platform.value}: {adapter.fatal_error_message}"
)
if adapter.fatal_error_retryable:
self._failed_platforms[platform] = {
"config": platform_config,
"attempts": 1,
"next_retry": time.monotonic() + 30,
}
else:
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",
error_code=None,
error_message="failed to connect",
)
startup_retryable_errors.append(
f"{platform.value}: failed to connect"
)
self._failed_platforms[platform] = {
"config": platform_config,
"attempts": 1,
"next_retry": time.monotonic() + 30,
}
except Exception as e:
logger.error("✗ %s error: %s", platform.value, e)
await self._safe_adapter_disconnect(adapter, platform)
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",
error_code=None,
error_message=str(e),
)
startup_retryable_errors.append(f"{platform.value}: {e}")
self._failed_platforms[platform] = {
"config": platform_config,
"attempts": 1,
"next_retry": time.monotonic() + 30,
}
if connected_count == 0:
if startup_nonretryable_errors:
reason = "; ".join(startup_nonretryable_errors)
logger.error("Gateway hit a non-retryable startup conflict: %s", reason)
try:
from gateway.status import write_runtime_status
write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
except Exception:
pass
self._request_clean_exit(reason)
return True
if enabled_platform_count > 0:
if startup_retryable_errors:
reason = "; ".join(startup_retryable_errors)
logger.warning(
"Gateway started with no connected platforms — "
"%d platform(s) queued for retry: %s",
len(self._failed_platforms), reason,
)
try:
from gateway.status import write_runtime_status
write_runtime_status(
gateway_state="degraded",
exit_reason=None,
)
except Exception:
pass
logger.warning(
"No adapter could be created for any of the %d configured platform(s). "
"Check that required dependencies are installed and credentials are set. "
"Gateway will continue for cron job execution.",
enabled_platform_count,
)
else:
logger.warning("No messaging platforms enabled.")
logger.info("Gateway will continue running for cron job execution.")
self.delivery_router.adapters = self.adapters
self._wire_teams_pipeline_runtime()
self._running = True
self._update_runtime_status("running")
hook_count = len(self.hooks.loaded_hooks)
if hook_count:
logger.info("%s hook(s) loaded", hook_count)
await self.hooks.emit("gateway:startup", {
"platforms": [p.value for p in self.adapters.keys()],
})
if connected_count > 0:
logger.info("Gateway running with %s platform(s)", connected_count)
try:
from gateway.channel_directory import build_channel_directory
directory = await build_channel_directory(self.adapters)
ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
logger.info("Channel directory built: %d target(s)", ch_count)
except Exception as e:
logger.warning("Channel directory build failed: %s", e)
notified = await self._send_update_notification()
if not notified and any(
path.exists()
for path in (
_hermes_home / ".update_pending.json",
_hermes_home / ".update_pending.claimed.json",
)
):
self._schedule_update_notification_watch()
if connected_count > 0:
await asyncio.sleep(1.0)
restart_notification_pending = _restart_notification_pending()
delivered_restart_target = await self._send_restart_notification()
if restart_notification_pending or delivered_restart_target is not None:
skip_home_targets = (
{delivered_restart_target} if delivered_restart_target else None
)
await self._send_home_channel_startup_notifications(
skip_targets=skip_home_targets,
)
self._schedule_resume_pending_sessions()
try:
from tools.process_registry import process_registry
while process_registry.pending_watchers:
watcher = process_registry.pending_watchers.pop(0)
asyncio.create_task(self._run_process_watcher(watcher))
logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
except Exception as e:
logger.error("Recovered watcher setup error: %s", e)
asyncio.create_task(self._session_expiry_watcher())
asyncio.create_task(self._kanban_notifier_watcher())
asyncio.create_task(self._kanban_dispatcher_watcher())
if self._failed_platforms:
logger.info(
"Starting reconnection watcher for %d failed platform(s): %s",
len(self._failed_platforms),
", ".join(p.value for p in self._failed_platforms),
)
asyncio.create_task(self._platform_reconnect_watcher())
asyncio.create_task(self._handoff_watcher())
logger.info("Press Ctrl+C to stop")
return True
async def _handoff_watcher(self, interval: float = 2.0) -> None:
"""Background task that processes pending CLI→gateway session handoffs.
Polls ``state.db`` for sessions in ``handoff_state='pending'`` and,
for each one:
1. Atomically claims it (pending → running).
2. Resolves the destination platform's configured home channel.
3. Re-binds the gateway's session_key for that home channel to the
CLI's existing session_id via ``session_store.switch_session`` so
the full role-aware transcript replays on the next agent turn.
4. Forges a synthetic ``MessageEvent`` (``internal=True``) with a
handoff-notice text and dispatches through the normal gateway
message pipeline so the agent runs and replies on the platform.
5. Marks the row ``completed`` (or ``failed`` with ``handoff_error``).
The CLI process is poll-blocked on the row's terminal state and
prints the result to the user.
"""
await asyncio.sleep(5)
while self._running:
try:
if self._session_db is None:
await asyncio.sleep(interval)
continue
pending = self._session_db.list_pending_handoffs()
for row in pending:
session_id = row.get("id")
if not session_id:
continue
if not self._session_db.claim_handoff(session_id):
continue
try:
await self._process_handoff(row)
self._session_db.complete_handoff(session_id)
except Exception as exc:
logger.warning(
"Handoff for session %s failed: %s",
session_id, exc, exc_info=True,
)
self._session_db.fail_handoff(session_id, str(exc))
except asyncio.CancelledError:
raise
except Exception as exc:
logger.debug("Handoff watcher tick error: %s", exc, exc_info=True)
await asyncio.sleep(interval)
async def _process_handoff(self, row: Dict[str, Any]) -> None:
"""Execute one handoff row. Raises on failure (caller marks failed)."""
from gateway.config import Platform
from gateway.session import SessionSource, build_session_key
from gateway.platforms.base import MessageEvent
cli_session_id = row["id"]
platform_name = (row.get("handoff_platform") or "").strip().lower()
if not platform_name:
raise RuntimeError("handoff_platform is empty")
try:
platform = Platform(platform_name)
except (ValueError, KeyError):
raise RuntimeError(f"unknown platform '{platform_name}'")
adapter = self.adapters.get(platform)
if not adapter:
raise RuntimeError(
f"platform '{platform_name}' is not active in this gateway"
)
home = self.config.get_home_channel(platform)
if not home or not home.chat_id:
raise RuntimeError(
f"no home channel configured for {platform_name}; "
f"run /sethome on the desired chat first"
)
cli_title = row.get("title") or cli_session_id[:8]
thread_name = f"Hermes — {cli_title}"
try:
new_thread_id = await adapter.create_handoff_thread(
str(home.chat_id), thread_name,
)
except Exception as exc:
logger.debug(
"Handoff: create_handoff_thread raised on %s: %s",
platform_name, exc, exc_info=True,
)
new_thread_id = None
effective_thread_id = new_thread_id or (
str(home.thread_id) if home.thread_id else None
)
if new_thread_id:
dest_chat_type = "thread"
else:
dest_chat_type = "dm"
dest_source = SessionSource(
platform=platform,
chat_id=str(home.chat_id),
chat_name=home.name,
chat_type=dest_chat_type,
user_id="system:handoff",
user_name="Handoff",
thread_id=effective_thread_id,
)
platform_cfg = self.config.platforms.get(platform)
extra = platform_cfg.extra if platform_cfg else {}
session_key = build_session_key(
dest_source,
group_sessions_per_user=extra.get("group_sessions_per_user", True),
thread_sessions_per_user=extra.get("thread_sessions_per_user", False),
)
self.session_store.get_or_create_session(dest_source)
switched = self.session_store.switch_session(session_key, cli_session_id)
if switched is None:
raise RuntimeError(
f"could not switch session key {session_key} → {cli_session_id}"
)
self._evict_cached_agent(session_key)
self._release_running_agent_state(session_key)
synthetic_text = (
f"[Session was just handed off from CLI (\"{cli_title}\") to this "
f"channel. The full prior conversation history is loaded above. "
f"Briefly confirm you're working here and summarize what we were "
f"working on, so the user can continue from this device.]"
)
synthetic_event = MessageEvent(
text=synthetic_text,
source=dest_source,
internal=True,
)
logger.info(
"Handoff: dispatching synthetic turn for CLI session %s → %s "
"(home=%s, thread=%s, session_key=%s)",
cli_session_id, platform_name, home.chat_id, effective_thread_id,
session_key,
)
response_text = await self._handle_message(synthetic_event)
if not response_text:
return
send_metadata: Dict[str, Any] = {}
if effective_thread_id:
send_metadata["thread_id"] = effective_thread_id
try:
result = await adapter.send(
chat_id=str(home.chat_id),
content=response_text,
metadata=send_metadata or None,
)
except Exception as exc:
raise RuntimeError(f"adapter.send failed: {exc}") from exc
if not getattr(result, "success", True):
err = getattr(result, "error", "send returned success=False")
raise RuntimeError(f"adapter.send failed: {err}")
async def _session_expiry_watcher(self, interval: int = 300):
"""Background task that finalizes expired sessions.
Runs every ``interval`` seconds (default 5 min). For each session
whose reset policy has expired, invokes ``on_session_finalize``
hooks, cleans up the cached AIAgent's tool resources, evicts the
cache entry so it can be garbage-collected, and marks the session
so it won't be finalized again.
"""
await asyncio.sleep(60)
_finalize_failures: dict[str, int] = {}
_MAX_FINALIZE_RETRIES = 3
while self._running:
try:
self.session_store._ensure_loaded()
_expired_entries = []
for key, entry in list(self.session_store._entries.items()):
if entry.expiry_finalized:
continue
if not self.session_store._is_session_expired(entry):
continue
_expired_entries.append((key, entry))
if _expired_entries:
_platforms: dict[str, int] = {}
for _k, _e in _expired_entries:
_parts = _k.split(":")
_plat = _parts[2] if len(_parts) > 2 else "unknown"
_platforms[_plat] = _platforms.get(_plat, 0) + 1
_plat_summary = ", ".join(
f"{p}:{c}" for p, c in sorted(_platforms.items())
)
logger.info(
"Session expiry: %d sessions to finalize (%s)",
len(_expired_entries), _plat_summary,
)
for key, entry in _expired_entries:
try:
try:
from hermes_cli.plugins import invoke_hook as _invoke_hook
_parts = key.split(":")
_platform = _parts[2] if len(_parts) > 2 else ""
_invoke_hook(
"on_session_finalize",
session_id=entry.session_id,
platform=_platform,
)
except Exception:
pass
_cached_agent = None
_cache_lock = getattr(self, "_agent_cache_lock", None)
if _cache_lock is not None:
with _cache_lock:
_cached = self._agent_cache.get(key)
_cached_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
if _cached_agent is None:
_cached_agent = self._running_agents.get(key)
if _cached_agent and _cached_agent is not _AGENT_PENDING_SENTINEL:
self._cleanup_agent_resources(_cached_agent)
self._evict_cached_agent(key)
with self.session_store._lock:
entry.expiry_finalized = True
self.session_store._save()
logger.debug(
"Session expiry finalized for %s",
entry.session_id,
)
_finalize_failures.pop(entry.session_id, None)
except Exception as e:
failures = _finalize_failures.get(entry.session_id, 0) + 1
_finalize_failures[entry.session_id] = failures
if failures >= _MAX_FINALIZE_RETRIES:
logger.warning(
"Session finalize gave up after %d attempts for %s: %s. "
"Marking as finalized to prevent infinite retry loop.",
failures, entry.session_id, e,
)
with self.session_store._lock:
entry.expiry_finalized = True
self.session_store._save()
_finalize_failures.pop(entry.session_id, None)
else:
logger.debug(
"Session finalize failed (%d/%d) for %s: %s",
failures, _MAX_FINALIZE_RETRIES, entry.session_id, e,
)
if _expired_entries:
_done = sum(
1 for _, e in _expired_entries if e.expiry_finalized
)
_failed = len(_expired_entries) - _done
if _failed:
logger.info(
"Session expiry done: %d finalized, %d pending retry",
_done, _failed,
)
else:
logger.info(
"Session expiry done: %d finalized", _done,
)
try:
_idle_evicted = self._sweep_idle_cached_agents()
if _idle_evicted:
logger.info(
"Agent cache idle sweep: evicted %d agent(s)",
_idle_evicted,
)
except Exception as _e:
logger.debug("Idle agent sweep failed: %s", _e)
_last_prune_ts = getattr(self, "_last_session_store_prune_ts", 0.0)
_prune_interval = 3600.0
if time.time() - _last_prune_ts > _prune_interval:
try:
_max_age = int(
getattr(self.config, "session_store_max_age_days", 0) or 0
)
if _max_age > 0:
_pruned = self.session_store.prune_old_entries(_max_age)
if _pruned:
logger.info(
"SessionStore prune: dropped %d stale entries",
_pruned,
)
except Exception as _e:
logger.debug("SessionStore prune failed: %s", _e)
self._last_session_store_prune_ts = time.time()
except Exception as e:
logger.debug("Session expiry watcher error: %s", e)
for _ in range(interval):
if not self._running:
break
await asyncio.sleep(1)
def _active_profile_name(self) -> str:
"""Return the profile name this gateway represents."""
try:
from hermes_cli.profiles import get_active_profile_name
return get_active_profile_name() or "default"
except Exception:
return "default"
async def _kanban_notifier_watcher(self, interval: float = 5.0) -> None:
"""Poll ``kanban_notify_subs`` and deliver terminal events to users.
For each subscription row, fetches ``task_events`` newer than the
stored cursor with kind in the terminal set (``completed``,
``blocked``, ``gave_up``, ``crashed``, ``timed_out``). Sends one
message per new event to ``(platform, chat_id, thread_id)``,
then advances the cursor. When a task reaches a terminal state
(``completed`` / ``archived``), the subscription is removed.
Runs in the gateway event loop; all SQLite work is pushed to a
thread via ``asyncio.to_thread`` so the loop never blocks on the
WAL lock. Failures in one tick don't stop subsequent ticks.
**Multi-board:** iterates every board discovered on disk per
tick. Subscriptions live inside each board's own DB and cannot
cross boards, so delivery semantics are unchanged — this is
purely a fan-out of the single-DB poll.
"""
from gateway.config import Platform as _Platform
try:
from hermes_cli import kanban_db as _kb
except Exception:
logger.warning("kanban notifier: kanban_db not importable; notifier disabled")
return
TERMINAL_KINDS = ("completed", "blocked", "gave_up", "crashed", "timed_out")
MAX_SEND_FAILURES = 3
sub_fail_counts: dict[tuple, int] = getattr(
self, "_kanban_sub_fail_counts", {}
)
self._kanban_sub_fail_counts = sub_fail_counts
notifier_profile = getattr(self, "_kanban_notifier_profile", None)
if not notifier_profile:
notifier_profile = self._active_profile_name()
self._kanban_notifier_profile = notifier_profile
await asyncio.sleep(5)
while self._running:
try:
def _collect():
deliveries: list[dict] = []
active_platforms = {
getattr(platform, "value", str(platform)).lower()
for platform in self.adapters.keys()
}
if not active_platforms:
logger.debug("kanban notifier: no connected adapters; skipping tick")
return deliveries
try:
boards = _kb.list_boards(include_archived=False)
except Exception:
boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
seen_db_paths: set[str] = set()
for board_meta in boards:
slug = board_meta.get("slug") or _kb.DEFAULT_BOARD
db_path = board_meta.get("db_path")
try:
resolved_db_path = str(Path(db_path).expanduser().resolve()) if db_path else str(_kb.kanban_db_path(slug).resolve())
except Exception:
resolved_db_path = f"slug:{slug}"
if resolved_db_path in seen_db_paths:
logger.debug(
"kanban notifier: skipping duplicate board slug %s for DB %s",
slug, resolved_db_path,
)
continue
seen_db_paths.add(resolved_db_path)
try:
conn = _kb.connect(board=slug)
except Exception as exc:
logger.debug("kanban notifier: cannot open board %s: %s", slug, exc)
continue
try:
subs = _kb.list_notify_subs(conn)
if not subs:
logger.debug("kanban notifier: board %s has no subscriptions", slug)
for sub in subs:
owner_profile = sub.get("notifier_profile") or None
if owner_profile and owner_profile != notifier_profile:
logger.debug(
"kanban notifier: subscription for %s owned by profile %s; current profile %s skipping",
sub.get("task_id"), owner_profile, notifier_profile,
)
continue
platform = (sub.get("platform") or "").lower()
if platform not in active_platforms:
logger.debug(
"kanban notifier: subscription for %s on %s skipped; adapter not connected",
sub.get("task_id"), platform or "<missing>",
)
continue
old_cursor, cursor, events = _kb.claim_unseen_events_for_sub(
conn,
task_id=sub["task_id"],
platform=sub["platform"],
chat_id=sub["chat_id"],
thread_id=sub.get("thread_id") or "",
kinds=TERMINAL_KINDS,
)
if not events:
continue
task = _kb.get_task(conn, sub["task_id"])
logger.debug(
"kanban notifier: claimed %d event(s) for %s on board %s cursor %s→%s",
len(events), sub["task_id"], slug, old_cursor, cursor,
)
deliveries.append({
"sub": sub,
"old_cursor": old_cursor,
"cursor": cursor,
"events": events,
"task": task,
"board": slug,
})
finally:
conn.close()
return deliveries
deliveries = await asyncio.to_thread(_collect)
for d in deliveries:
sub = d["sub"]
task = d["task"]
board_slug = d.get("board")
platform_str = (sub["platform"] or "").lower()
try:
plat = _Platform(platform_str)
except ValueError:
await asyncio.to_thread(
self._kanban_advance, sub, d["cursor"], board_slug,
)
continue
adapter = self.adapters.get(plat)
if adapter is None:
logger.debug(
"kanban notifier: adapter %s disconnected before delivery for %s; rewinding claim",
platform_str, sub["task_id"],
)
await asyncio.to_thread(
self._kanban_rewind,
sub,
d["cursor"],
d.get("old_cursor", 0),
board_slug,
)
continue
title = (task.title if task else sub["task_id"])[:120]
for ev in d["events"]:
kind = ev.kind
who = (task.assignee if task and task.assignee else None)
tag = f"@{who} " if who else ""
if kind == "completed":
handoff = ""
payload_summary = None
if ev.payload and ev.payload.get("summary"):
payload_summary = str(ev.payload["summary"])
if payload_summary:
h = payload_summary.strip().splitlines()[0][:200]
handoff = f"\n{h}"
elif task and task.result:
r = task.result.strip().splitlines()[0][:160]
handoff = f"\n{r}"
msg = (
f"✔ {tag}Kanban {sub['task_id']} done"
f" — {title}{handoff}"
)
elif kind == "blocked":
reason = ""
if ev.payload and ev.payload.get("reason"):
reason = f": {str(ev.payload['reason'])[:160]}"
msg = f"⏸ {tag}Kanban {sub['task_id']} blocked{reason}"
elif kind == "gave_up":
err = ""
if ev.payload and ev.payload.get("error"):
err = f"\n{str(ev.payload['error'])[:200]}"
msg = (
f"✖ {tag}Kanban {sub['task_id']} gave up "
f"after repeated spawn failures{err}"
)
elif kind == "crashed":
msg = (
f"✖ {tag}Kanban {sub['task_id']} worker crashed "
f"(pid gone); dispatcher will retry"
)
elif kind == "timed_out":
limit = 0
if ev.payload and ev.payload.get("limit_seconds"):
limit = int(ev.payload["limit_seconds"])
msg = (
f"⏱ {tag}Kanban {sub['task_id']} timed out "
f"(max_runtime={limit}s); will retry"
)
else:
continue
metadata: dict[str, Any] = {}
if sub.get("thread_id"):
metadata["thread_id"] = sub["thread_id"]
sub_key = (
sub["task_id"], sub["platform"],
sub["chat_id"], sub.get("thread_id") or "",
)
try:
await adapter.send(
sub["chat_id"], msg, metadata=metadata,
)
logger.debug(
"kanban notifier: delivered %s event for %s to %s/%s on board %s",
kind, sub["task_id"], platform_str, sub["chat_id"], board_slug,
)
if kind == "completed":
try:
await self._deliver_kanban_artifacts(
adapter=adapter,
chat_id=sub["chat_id"],
metadata=metadata,
event_payload=getattr(ev, "payload", None),
task=task,
)
except Exception as art_exc:
logger.debug(
"kanban notifier: artifact delivery for %s failed: %s",
sub["task_id"], art_exc,
)
sub_fail_counts.pop(sub_key, None)
except Exception as exc:
fails = sub_fail_counts.get(sub_key, 0) + 1
sub_fail_counts[sub_key] = fails
logger.warning(
"kanban notifier: send failed for %s on %s "
"(attempt %d/%d): %s",
sub["task_id"], platform_str, fails,
MAX_SEND_FAILURES, exc,
)
if fails >= MAX_SEND_FAILURES:
logger.warning(
"kanban notifier: dropping subscription "
"%s on %s after %d consecutive send failures",
sub["task_id"], platform_str, fails,
)
await asyncio.to_thread(self._kanban_unsub, sub, board_slug)
sub_fail_counts.pop(sub_key, None)
else:
await asyncio.to_thread(
self._kanban_rewind,
sub,
d["cursor"],
d.get("old_cursor", 0),
board_slug,
)
break
else:
await asyncio.to_thread(
self._kanban_advance, sub, d["cursor"], board_slug,
)
task_terminal = task and task.status in {"done", "archived"}
if task_terminal:
await asyncio.to_thread(
self._kanban_unsub, sub, board_slug,
)
except Exception as exc:
logger.warning("kanban notifier tick failed: %s", exc)
for _ in range(int(max(1, interval))):
if not self._running:
return
await asyncio.sleep(1)
def _kanban_advance(
self, sub: dict, cursor: int, board: Optional[str] = None,
) -> None:
"""Sync helper: advance a subscription's cursor. Runs in to_thread.
``board`` scopes the DB connection to the board that owns this
subscription. Unsub cursors in one board can't touch another's.
"""
from hermes_cli import kanban_db as _kb
conn = _kb.connect(board=board)
try:
_kb.advance_notify_cursor(
conn,
task_id=sub["task_id"],
platform=sub["platform"],
chat_id=sub["chat_id"],
thread_id=sub.get("thread_id") or "",
new_cursor=cursor,
)
finally:
conn.close()
def _kanban_unsub(self, sub: dict, board: Optional[str] = None) -> None:
from hermes_cli import kanban_db as _kb
conn = _kb.connect(board=board)
try:
_kb.remove_notify_sub(
conn,
task_id=sub["task_id"],
platform=sub["platform"],
chat_id=sub["chat_id"],
thread_id=sub.get("thread_id") or "",
)
finally:
conn.close()
def _kanban_rewind(
self,
sub: dict,
claimed_cursor: int,
old_cursor: int,
board: Optional[str] = None,
) -> None:
"""Sync helper: undo a claimed notification cursor after send failure."""
from hermes_cli import kanban_db as _kb
conn = _kb.connect(board=board)
try:
_kb.rewind_notify_cursor(
conn,
task_id=sub["task_id"],
platform=sub["platform"],
chat_id=sub["chat_id"],
thread_id=sub.get("thread_id") or "",
claimed_cursor=claimed_cursor,
old_cursor=old_cursor,
)
finally:
conn.close()
async def _deliver_kanban_artifacts(
self,
*,
adapter,
chat_id: str,
metadata: dict,
event_payload: Optional[dict],
task,
) -> None:
"""Upload artifact files referenced by a completed kanban task.
Workers passing ``kanban_complete(artifacts=[...])`` ship absolute
file paths through the completion event so downstream humans get
the deliverable as a native upload instead of a path printed in
chat.
Sources scanned, in priority order:
1. ``event_payload['artifacts']`` (explicit list — preferred)
2. ``event_payload['summary']`` (truncated first line)
3. ``task.result`` (legacy fallback)
Files are deduplicated, missing files are silently skipped (the
path may have been mentioned for reference only), and delivery
errors are logged but do not break the notifier loop.
"""
from pathlib import Path as _Path
candidates: list[str] = []
seen: set[str] = set()
def _add(path: str) -> None:
if not path:
return
expanded = os.path.expanduser(path)
if expanded in seen:
return
if not os.path.isfile(expanded):
return
seen.add(expanded)
candidates.append(expanded)
if isinstance(event_payload, dict):
raw = event_payload.get("artifacts")
if isinstance(raw, (list, tuple)):
for item in raw:
if isinstance(item, str):
_add(item)
summary = event_payload.get("summary")
if isinstance(summary, str) and summary:
paths, _ = adapter.extract_local_files(summary)
for p in paths:
_add(p)
if task is not None and getattr(task, "result", None):
result_text = str(task.result)
paths, _ = adapter.extract_local_files(result_text)
for p in paths:
_add(p)
if not candidates:
return
_IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
from urllib.parse import quote as _quote
image_paths = [p for p in candidates if _Path(p).suffix.lower() in _IMAGE_EXTS]
other_paths = [p for p in candidates if _Path(p).suffix.lower() not in _IMAGE_EXTS]
if image_paths:
try:
batch = [(f"file://{_quote(p)}", "") for p in image_paths]
await adapter.send_multiple_images(
chat_id=chat_id, images=batch, metadata=metadata,
)
except Exception as exc:
logger.warning(
"kanban notifier: image batch upload failed: %s", exc,
)
for path in other_paths:
ext = _Path(path).suffix.lower()
try:
if ext in _VIDEO_EXTS:
await adapter.send_video(
chat_id=chat_id, video_path=path, metadata=metadata,
)
else:
await adapter.send_document(
chat_id=chat_id, file_path=path, metadata=metadata,
)
except Exception as exc:
logger.warning(
"kanban notifier: artifact upload (%s) failed: %s",
path, exc,
)
async def _kanban_dispatcher_watcher(self) -> None:
"""Embedded kanban dispatcher — one tick every `dispatch_interval_seconds`.
Gated by `kanban.dispatch_in_gateway` in config.yaml (default True).
When true, the gateway hosts the single dispatcher for this profile:
no separate `hermes kanban daemon` process needed. When false, the
loop exits immediately and an external daemon is expected.
Each tick calls :func:`kanban_db.dispatch_once` inside
``asyncio.to_thread`` so the SQLite WAL lock never blocks the
event loop. Failures in one tick don't stop subsequent ticks —
same pattern as `_kanban_notifier_watcher`.
Shutdown: the loop checks ``self._running`` between ticks; gateway
stop() flips it to False and cancels pending tasks, and the
in-flight ``to_thread`` returns on its own after the current
``dispatch_once`` call finishes (typically <1ms on an idle board).
"""
try:
from hermes_cli.config import load_config as _load_config
except Exception:
logger.warning("kanban dispatcher: config loader unavailable; disabled")
return
env_override = os.environ.get("HERMES_KANBAN_DISPATCH_IN_GATEWAY", "").strip().lower()
if env_override in {"0", "false", "no", "off"}:
logger.info("kanban dispatcher: disabled via HERMES_KANBAN_DISPATCH_IN_GATEWAY env")
return
try:
cfg = _load_config()
except Exception as exc:
logger.warning("kanban dispatcher: cannot load config (%s); disabled", exc)
return
kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {}
if not kanban_cfg.get("dispatch_in_gateway", True):
logger.info(
"kanban dispatcher: disabled via config kanban.dispatch_in_gateway=false"
)
return
try:
from hermes_cli import kanban_db as _kb
except Exception:
logger.warning("kanban dispatcher: kanban_db not importable; dispatcher disabled")
return
interval = float(kanban_cfg.get("dispatch_interval_seconds", 60) or 60)
interval = max(interval, 1.0)
max_spawn = kanban_cfg.get("max_spawn", None)
if max_spawn is not None:
logger.info(f"kanban dispatcher: max_spawn={max_spawn}")
raw_max_in_progress = kanban_cfg.get("max_in_progress", None)
max_in_progress = None
if raw_max_in_progress is not None:
try:
max_in_progress = int(raw_max_in_progress)
except (TypeError, ValueError):
logger.warning(
"kanban dispatcher: invalid kanban.max_in_progress=%r; ignoring",
raw_max_in_progress,
)
max_in_progress = None
else:
if max_in_progress < 1:
logger.warning(
"kanban dispatcher: kanban.max_in_progress=%r is below 1; ignoring",
raw_max_in_progress,
)
max_in_progress = None
else:
logger.info(f"kanban dispatcher: max_in_progress={max_in_progress}")
raw_failure_limit = kanban_cfg.get("failure_limit", _kb.DEFAULT_FAILURE_LIMIT)
try:
failure_limit = int(raw_failure_limit)
except (TypeError, ValueError):
logger.warning(
"kanban dispatcher: invalid kanban.failure_limit=%r; using default %d",
raw_failure_limit,
_kb.DEFAULT_FAILURE_LIMIT,
)
failure_limit = _kb.DEFAULT_FAILURE_LIMIT
if failure_limit < 1:
logger.warning(
"kanban dispatcher: kanban.failure_limit=%r is below 1; using default %d",
raw_failure_limit,
_kb.DEFAULT_FAILURE_LIMIT,
)
failure_limit = _kb.DEFAULT_FAILURE_LIMIT
raw_stale = kanban_cfg.get("dispatch_stale_timeout_seconds", 0)
try:
stale_timeout_seconds = int(raw_stale or 0)
except (TypeError, ValueError):
logger.warning(
"kanban dispatcher: invalid kanban.dispatch_stale_timeout_seconds=%r; "
"disabling stale detection",
raw_stale,
)
stale_timeout_seconds = 0
await asyncio.sleep(5)
HEALTH_WINDOW = 6
bad_ticks = 0
last_warn_at = 0
disabled_corrupt_boards: dict[str, tuple[str, int | None, int | None]] = {}
def _board_db_fingerprint(slug: str) -> tuple[str, int | None, int | None]:
path = _kb.kanban_db_path(slug)
try:
resolved = str(path.expanduser().resolve())
except Exception:
resolved = str(path)
try:
stat = path.stat()
except OSError:
return (resolved, None, None)
return (resolved, stat.st_mtime_ns, stat.st_size)
def _is_corrupt_board_db_error(exc: Exception) -> bool:
if not isinstance(exc, sqlite3.DatabaseError):
return False
msg = str(exc).lower()
return (
"file is not a database" in msg
or "database disk image is malformed" in msg
)
def _tick_once_for_board(slug: str) -> "Optional[object]":
"""Run one dispatch_once for a specific board.
Runs in a worker thread via `asyncio.to_thread`. `board=slug`
is passed through `dispatch_once` so `resolve_workspace` and
`_default_spawn` see the right paths. The per-board DB is
opened explicitly so concurrent boards never share a
connection handle or accidentally claim across each other.
"""
conn = None
fingerprint = _board_db_fingerprint(slug)
disabled_fingerprint = disabled_corrupt_boards.get(slug)
if disabled_fingerprint == fingerprint:
return None
if disabled_fingerprint is not None:
logger.info(
"kanban dispatcher: board %s database changed; retrying dispatch",
slug,
)
disabled_corrupt_boards.pop(slug, None)
try:
conn = _kb.connect(board=slug)
return _kb.dispatch_once(
conn,
board=slug,
max_spawn=max_spawn,
max_in_progress=max_in_progress,
failure_limit=failure_limit,
stale_timeout_seconds=stale_timeout_seconds,
)
except sqlite3.DatabaseError as exc:
if _is_corrupt_board_db_error(exc):
disabled_corrupt_boards[slug] = fingerprint
logger.error(
"kanban dispatcher: board %s database %s is not a valid "
"SQLite database; disabling dispatch for this board "
"until the file changes or the gateway restarts. Move "
"or restore the file, then run `hermes kanban init` if "
"you need a fresh board.",
slug,
fingerprint[0],
)
return None
logger.exception("kanban dispatcher: tick failed on board %s", slug)
return None
except Exception:
logger.exception("kanban dispatcher: tick failed on board %s", slug)
return None
finally:
if conn is not None:
try:
conn.close()
except Exception:
pass
def _tick_once() -> "list[tuple[str, Optional[object]]]":
"""Run one dispatch_once per board. Returns (slug, result) pairs.
Enumerating boards on every tick keeps the dispatcher honest
when users create a new board mid-run: no restart required,
the next tick picks it up automatically.
"""
try:
boards = _kb.list_boards(include_archived=False)
except Exception:
boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
out: list[tuple[str, "Optional[object]"]] = []
for b in boards:
slug = b.get("slug") or _kb.DEFAULT_BOARD
out.append((slug, _tick_once_for_board(slug)))
return out
def _ready_nonempty() -> bool:
"""Cheap probe: is there at least one ready+assigned+unclaimed
task on ANY board whose assignee maps to a real Hermes profile
(i.e. one the dispatcher would actually spawn for)?
Tasks assigned to control-plane lanes (e.g. ``orion-cc``,
``orion-research``) are pulled by terminals via
``claim_task`` directly and never spawnable, so a queue full
of those is "correctly idle", not "stuck". Filtering them out
here keeps the stuck-warn fire only on real failures (broken
PATH, missing venv, credential loss for a real Hermes profile).
"""
try:
boards = _kb.list_boards(include_archived=False)
except Exception:
boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
for b in boards:
slug = b.get("slug") or _kb.DEFAULT_BOARD
conn = None
try:
conn = _kb.connect(board=slug)
if _kb.has_spawnable_ready(conn):
return True
if _kb.has_spawnable_review(conn):
return True
except Exception:
continue
finally:
if conn is not None:
try:
conn.close()
except Exception:
pass
return False
auto_decompose_enabled = bool(kanban_cfg.get("auto_decompose", True))
try:
auto_decompose_per_tick = int(
kanban_cfg.get("auto_decompose_per_tick", 3) or 3
)
except (TypeError, ValueError):
auto_decompose_per_tick = 3
if auto_decompose_per_tick < 1:
auto_decompose_per_tick = 1
def _auto_decompose_tick() -> int:
"""Run the auto-decomposer for up to N triage tasks across all
boards. Returns the number of triage tasks that were
successfully decomposed or specified this tick.
"""
try:
from hermes_cli import kanban_decompose as _decomp
except Exception as exc:
logger.warning(
"kanban auto-decompose: import failed (%s); skipping", exc,
)
return 0
try:
boards = _kb.list_boards(include_archived=False)
except Exception:
boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
attempted = 0
successes = 0
for b in boards:
slug = b.get("slug") or _kb.DEFAULT_BOARD
if attempted >= auto_decompose_per_tick:
break
prev_env = os.environ.get("HERMES_KANBAN_BOARD")
try:
os.environ["HERMES_KANBAN_BOARD"] = slug
try:
triage_ids = _decomp.list_triage_ids()
except Exception as exc:
logger.debug(
"kanban auto-decompose: list_triage_ids failed on board %s (%s)",
slug, exc,
)
triage_ids = []
for tid in triage_ids:
if attempted >= auto_decompose_per_tick:
break
attempted += 1
try:
outcome = _decomp.decompose_task(
tid, author="auto-decomposer",
)
except Exception:
logger.exception(
"kanban auto-decompose: decompose_task crashed on %s",
tid,
)
continue
if outcome.ok:
successes += 1
if outcome.fanout and outcome.child_ids:
logger.info(
"kanban auto-decompose [%s]: %s → %d children",
slug, tid, len(outcome.child_ids),
)
else:
logger.info(
"kanban auto-decompose [%s]: %s → single task (no fanout)",
slug, tid,
)
else:
logger.debug(
"kanban auto-decompose [%s]: %s skipped: %s",
slug, tid, outcome.reason,
)
finally:
if prev_env is None:
os.environ.pop("HERMES_KANBAN_BOARD", None)
else:
os.environ["HERMES_KANBAN_BOARD"] = prev_env
return successes
logger.info(
"kanban dispatcher: embedded in gateway (interval=%.1fs)", interval
)
while self._running:
try:
if auto_decompose_enabled:
await asyncio.to_thread(_auto_decompose_tick)
results = await asyncio.to_thread(_tick_once)
any_spawned = False
for slug, res in (results or []):
if res is not None and getattr(res, "spawned", None):
any_spawned = True
logger.info(
"kanban dispatcher [%s]: spawned=%d reclaimed=%d "
"crashed=%d timed_out=%d promoted=%d auto_blocked=%d",
slug,
len(res.spawned),
res.reclaimed,
len(res.crashed) if hasattr(res.crashed, "__len__") else 0,
len(res.timed_out) if hasattr(res.timed_out, "__len__") else 0,
res.promoted,
len(res.auto_blocked) if hasattr(res.auto_blocked, "__len__") else 0,
)
ready_pending = await asyncio.to_thread(_ready_nonempty)
if ready_pending and not any_spawned:
bad_ticks += 1
else:
bad_ticks = 0
if bad_ticks >= HEALTH_WINDOW:
now = int(time.time())
if now - last_warn_at >= 300:
logger.warning(
"kanban dispatcher stuck: ready queue non-empty for "
"%d consecutive ticks but 0 workers spawned. Check "
"profile health (venv, PATH, credentials) and "
"`hermes kanban list --status ready`.",
bad_ticks,
)
last_warn_at = now
except asyncio.CancelledError:
logger.debug("kanban dispatcher: cancelled")
raise
except Exception:
logger.exception("kanban dispatcher: unexpected watcher error")
slept = 0.0
while slept < interval and self._running:
await asyncio.sleep(min(1.0, interval - slept))
slept += 1.0
async def _platform_reconnect_watcher(self) -> None:
"""Background task that periodically retries connecting failed platforms.
Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
Retryable failures keep retrying at the backoff cap indefinitely
— but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row
without ever succeeding, it is *paused*: kept in the retry queue
but no longer hammered. The user surfaces it with ``/platform list``
and resumes it with ``/platform resume <name>``. Non-retryable
failures (bad auth, etc.) still drop out of the queue immediately.
"""
_BACKOFF_CAP = 300
_PAUSE_AFTER_FAILURES = 10
await asyncio.sleep(10)
while self._running:
if not self._failed_platforms:
for _ in range(30):
if not self._running:
return
await asyncio.sleep(1)
continue
now = time.monotonic()
for platform in list(self._failed_platforms.keys()):
if not self._running:
return
info = self._failed_platforms[platform]
if info.get("paused"):
continue
if now < info["next_retry"]:
continue
platform_config = info["config"]
attempt = info["attempts"] + 1
logger.info(
"Reconnecting %s (attempt %d)...",
platform.value, attempt,
)
try:
adapter = self._create_adapter(platform, platform_config)
if not adapter:
logger.warning(
"Reconnect %s: adapter creation returned None, removing from retry queue",
platform.value,
)
del self._failed_platforms[platform]
continue
adapter.set_message_handler(self._handle_message)
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
adapter.set_session_store(self.session_store)
adapter.set_busy_session_handler(self._handle_active_session_busy_message)
success = await self._connect_adapter_with_timeout(adapter, platform)
if success:
self.adapters[platform] = adapter
self._sync_voice_mode_state_to_adapter(adapter)
self.delivery_router.adapters = self.adapters
del self._failed_platforms[platform]
self._update_platform_runtime_status(
platform.value,
platform_state="connected",
error_code=None,
error_message=None,
)
logger.info("✓ %s reconnected successfully", platform.value)
try:
from gateway.channel_directory import build_channel_directory
await build_channel_directory(self.adapters)
except Exception:
pass
elif adapter.has_fatal_error and not adapter.fatal_error_retryable:
self._update_platform_runtime_status(
platform.value,
platform_state="fatal",
error_code=adapter.fatal_error_code,
error_message=adapter.fatal_error_message,
)
logger.warning(
"Reconnect %s: non-retryable error (%s), removing from retry queue",
platform.value, adapter.fatal_error_message,
)
del self._failed_platforms[platform]
else:
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",
error_code=adapter.fatal_error_code,
error_message=adapter.fatal_error_message or "failed to reconnect",
)
backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
info["attempts"] = attempt
info["next_retry"] = time.monotonic() + backoff
logger.info(
"Reconnect %s failed, next retry in %ds",
platform.value, backoff,
)
if attempt >= _PAUSE_AFTER_FAILURES:
self._pause_failed_platform(
platform,
reason=(
adapter.fatal_error_message
or "failed to reconnect"
),
)
except Exception as e:
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",
error_code=None,
error_message=str(e),
)
backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
info["attempts"] = attempt
info["next_retry"] = time.monotonic() + backoff
logger.warning(
"Reconnect %s error: %s, next retry in %ds",
platform.value, e, backoff,
)
if attempt >= _PAUSE_AFTER_FAILURES:
self._pause_failed_platform(platform, reason=str(e))
for _ in range(10):
if not self._running:
return
await asyncio.sleep(1)
async def stop(
self,
*,
restart: bool = False,
detached_restart: bool = False,
service_restart: bool = False,
) -> None:
"""Stop the gateway and disconnect all adapters."""
if restart:
self._restart_requested = True
self._restart_detached = detached_restart
self._restart_via_service = service_restart
if self._stop_task is not None:
await self._stop_task
return
async def _stop_impl() -> None:
def _kill_tool_subprocesses(phase: str) -> None:
"""Kill tool subprocesses + tear down terminal envs + browsers.
Called twice in the shutdown path: once eagerly after a
drain timeout forces agent interrupt (so we reclaim bash/
sleep children before systemd TimeoutStopSec escalates to
SIGKILL on the cgroup — #8202), and once as a final
catch-all at the end of _stop_impl() for the graceful
path or anything respawned mid-teardown.
All steps are best-effort; exceptions are swallowed so
one subsystem's failure doesn't block the rest.
"""
try:
from tools.process_registry import process_registry
_killed = process_registry.kill_all()
if _killed:
logger.info(
"Shutdown (%s): killed %d tool subprocess(es)",
phase, _killed,
)
except Exception as _e:
logger.debug("process_registry.kill_all (%s) error: %s", phase, _e)
try:
from tools.terminal_tool import cleanup_all_environments
cleanup_all_environments()
except Exception as _e:
logger.debug("cleanup_all_environments (%s) error: %s", phase, _e)
try:
from tools.browser_tool import cleanup_all_browsers
cleanup_all_browsers()
except Exception as _e:
logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e)
logger.info(
"Stopping gateway%s...",
" for restart" if self._restart_requested else "",
)
_stop_started_at = time.monotonic()
def _phase_elapsed() -> float:
return time.monotonic() - _stop_started_at
self._running = False
self._draining = True
await self._notify_active_sessions_of_shutdown()
logger.info(
"Shutdown phase: notify_active_sessions done at +%.2fs",
_phase_elapsed(),
)
timeout = self._restart_drain_timeout
_pre_drain_keys: list[str] = []
for _sk, _agent in list(self._running_agents.items()):
if _agent is _AGENT_PENDING_SENTINEL:
continue
try:
self.session_store.mark_resume_pending(
_sk,
"restart_timeout" if self._restart_requested else "shutdown_timeout",
)
_pre_drain_keys.append(_sk)
except Exception as _e:
logger.debug("pre-drain mark_resume_pending failed for %s: %s", _sk, _e)
_drain_started_at = time.monotonic()
active_agents, timed_out = await self._drain_active_agents(timeout)
logger.info(
"Shutdown phase: drain done at +%.2fs (drain took %.2fs, "
"timed_out=%s, active_at_start=%d, active_now=%d)",
_phase_elapsed(),
time.monotonic() - _drain_started_at,
timed_out,
len(active_agents),
self._running_agent_count(),
)
if not timed_out:
for _sk in _pre_drain_keys:
if _sk not in self._running_agents:
try:
self.session_store.clear_resume_pending(_sk)
except Exception as _e:
logger.debug(
"clear_resume_pending after drain failed for %s: %s",
_sk, _e,
)
if timed_out:
logger.warning(
"Gateway drain timed out after %.1fs with %d active agent(s); interrupting remaining work.",
timeout,
self._running_agent_count(),
)
_resume_reason = (
"restart_timeout" if self._restart_requested else "shutdown_timeout"
)
for _sk, _agent in list(self._running_agents.items()):
if _agent is _AGENT_PENDING_SENTINEL:
continue
try:
self.session_store.mark_resume_pending(_sk, _resume_reason)
except Exception as _e:
logger.debug(
"mark_resume_pending failed for %s: %s",
_sk, _e,
)
self._interrupt_running_agents(
_INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
)
interrupt_deadline = asyncio.get_running_loop().time() + 5.0
while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline:
self._update_runtime_status("draining")
await asyncio.sleep(0.1)
_kill_tool_subprocesses("post-interrupt")
logger.info(
"Shutdown phase: post-interrupt tool kill done at +%.2fs",
_phase_elapsed(),
)
if self._restart_requested and self._restart_detached:
try:
await self._launch_detached_restart_command()
except Exception as e:
logger.error("Failed to launch detached gateway restart: %s", e)
self._finalize_shutdown_agents(active_agents)
_cache_lock = getattr(self, "_agent_cache_lock", None)
_cache = getattr(self, "_agent_cache", None)
if _cache_lock is not None and _cache is not None:
with _cache_lock:
_idle_agents = list(_cache.values())
_cache.clear()
for _entry in _idle_agents:
_agent = (
_entry[0] if isinstance(_entry, tuple) else _entry
)
self._cleanup_agent_resources(_agent)
for platform, adapter in list(self.adapters.items()):
_adapter_started_at = time.monotonic()
try:
await adapter.cancel_background_tasks()
except Exception as e:
logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
try:
await adapter.disconnect()
logger.info(
"✓ %s disconnected (%.2fs)",
platform.value,
time.monotonic() - _adapter_started_at,
)
except Exception as e:
logger.error(
"✗ %s disconnect error after %.2fs: %s",
platform.value,
time.monotonic() - _adapter_started_at,
e,
)
logger.info(
"Shutdown phase: all adapters disconnected at +%.2fs",
_phase_elapsed(),
)
for _task in list(self._background_tasks):
if _task is self._stop_task:
continue
_task.cancel()
self._background_tasks.clear()
self.adapters.clear()
self._running_agents.clear()
self._running_agents_ts.clear()
self._pending_messages.clear()
self._pending_approvals.clear()
if hasattr(self, '_busy_ack_ts'):
self._busy_ack_ts.clear()
self._shutdown_event.set()
_kill_tool_subprocesses("final-cleanup")
logger.info(
"Shutdown phase: final-cleanup tool kill done at +%.2fs",
_phase_elapsed(),
)
try:
from agent.auxiliary_client import shutdown_cached_clients
shutdown_cached_clients()
except Exception as _e:
logger.debug("shutdown_cached_clients error: %s", _e)
for _db_holder in (self, getattr(self, "session_store", None)):
_db = getattr(_db_holder, "_db", None) if _db_holder else None
if _db is None or not hasattr(_db, "close"):
continue
try:
_db.close()
except Exception as _e:
logger.debug("SessionDB close error: %s", _e)
logger.info(
"Shutdown phase: SessionDB close done at +%.2fs",
_phase_elapsed(),
)
from gateway.status import remove_pid_file, release_gateway_runtime_lock
remove_pid_file()
release_gateway_runtime_lock()
if not timed_out:
try:
(_hermes_home / ".clean_shutdown").touch()
except Exception:
pass
else:
logger.info(
"Skipping .clean_shutdown marker — drain timed out with "
"interrupted agents; next startup will suspend recently "
"active sessions."
)
if active_agents:
self._increment_restart_failure_counts(set(active_agents.keys()))
if self._restart_requested and self._restart_via_service:
self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
self._exit_reason = self._exit_reason or "Gateway restart requested"
self._draining = False
self._update_runtime_status("stopped", self._exit_reason)
logger.info("Gateway stopped (total teardown %.2fs)", _phase_elapsed())
self._stop_task = asyncio.create_task(_stop_impl())
await self._stop_task
async def wait_for_shutdown(self) -> None:
"""Wait for shutdown signal."""
await self._shutdown_event.wait()
def _create_adapter(
self,
platform: Platform,
config: Any
) -> Optional[BasePlatformAdapter]:
"""Create the appropriate adapter for a platform.
Checks the platform_registry first (plugin adapters), then falls
through to the built-in if/elif chain for core platforms.
"""
if hasattr(config, "extra") and isinstance(config.extra, dict):
config.extra.setdefault(
"group_sessions_per_user",
self.config.group_sessions_per_user,
)
config.extra.setdefault(
"thread_sessions_per_user",
getattr(self.config, "thread_sessions_per_user", False),
)
try:
from gateway.platform_registry import platform_registry
if platform_registry.is_registered(platform.value):
adapter = platform_registry.create_adapter(platform.value, config)
if adapter is not None:
return adapter
logger.error(
"Platform '%s' is registered but adapter creation failed "
"(check dependencies and config)",
platform.value,
)
return None
except Exception as e:
logger.debug("Platform registry lookup for '%s' failed: %s", platform.value, e)
if platform == Platform.TELEGRAM:
from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
if not check_telegram_requirements():
logger.warning("Telegram: python-telegram-bot not installed")
return None
adapter = TelegramAdapter(config)
_notify_mode = os.getenv("HERMES_TELEGRAM_NOTIFICATIONS", "")
if not _notify_mode:
try:
_gw_cfg = _load_gateway_config()
_raw = cfg_get(_gw_cfg, "display", "platforms", "telegram", "notifications")
if _raw not in {None, ""}:
_notify_mode = str(_raw).strip().lower()
except Exception:
pass
_notify_mode = _notify_mode or "important"
if _notify_mode not in {"all", "important"}:
logger.warning(
"Unknown telegram notifications mode '%s', "
"defaulting to 'important' (valid: all, important)",
_notify_mode,
)
_notify_mode = "important"
adapter._notifications_mode = _notify_mode
return adapter
elif platform == Platform.DISCORD:
from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
if not check_discord_requirements():
logger.warning("Discord: discord.py not installed")
return None
adapter = DiscordAdapter(config)
adapter.gateway_runner = self
return adapter
elif platform == Platform.WHATSAPP:
from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
if not check_whatsapp_requirements():
logger.warning("WhatsApp: Node.js not installed or bridge not configured")
return None
return WhatsAppAdapter(config)
elif platform == Platform.SLACK:
from gateway.platforms.slack import SlackAdapter, check_slack_requirements
if not check_slack_requirements():
logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
return None
return SlackAdapter(config)
elif platform == Platform.SIGNAL:
from gateway.platforms.signal import SignalAdapter, check_signal_requirements
if not check_signal_requirements():
logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
return None
return SignalAdapter(config)
elif platform == Platform.HOMEASSISTANT:
from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
if not check_ha_requirements():
logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
return None
return HomeAssistantAdapter(config)
elif platform == Platform.EMAIL:
from gateway.platforms.email import EmailAdapter, check_email_requirements
if not check_email_requirements():
logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
return None
return EmailAdapter(config)
elif platform == Platform.SMS:
from gateway.platforms.sms import SmsAdapter, check_sms_requirements
if not check_sms_requirements():
logger.warning("SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set")
return None
return SmsAdapter(config)
elif platform == Platform.DINGTALK:
from gateway.platforms.dingtalk import DingTalkAdapter, check_dingtalk_requirements
if not check_dingtalk_requirements():
logger.warning("DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set")
return None
return DingTalkAdapter(config)
elif platform == Platform.FEISHU:
from gateway.platforms.feishu import FeishuAdapter, check_feishu_requirements
if not check_feishu_requirements():
logger.warning("Feishu: lark-oapi not installed or FEISHU_APP_ID/SECRET not set")
return None
return FeishuAdapter(config)
elif platform == Platform.WECOM_CALLBACK:
from gateway.platforms.wecom_callback import (
WecomCallbackAdapter,
check_wecom_callback_requirements,
)
if not check_wecom_callback_requirements():
logger.warning("WeComCallback: aiohttp/httpx not installed")
return None
return WecomCallbackAdapter(config)
elif platform == Platform.WECOM:
from gateway.platforms.wecom import WeComAdapter, check_wecom_requirements
if not check_wecom_requirements():
logger.warning("WeCom: aiohttp not installed or WECOM_BOT_ID/SECRET not set")
return None
return WeComAdapter(config)
elif platform == Platform.WEIXIN:
from gateway.platforms.weixin import WeixinAdapter, check_weixin_requirements
if not check_weixin_requirements():
logger.warning("Weixin: aiohttp/cryptography not installed")
return None
return WeixinAdapter(config)
elif platform == Platform.MATTERMOST:
from gateway.platforms.mattermost import MattermostAdapter, check_mattermost_requirements
if not check_mattermost_requirements():
logger.warning("Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing")
return None
return MattermostAdapter(config)
elif platform == Platform.MATRIX:
from gateway.platforms.matrix import MatrixAdapter, check_matrix_requirements
if not check_matrix_requirements():
logger.warning("Matrix: mautrix not installed or credentials not set. Run: pip install 'mautrix[encryption]'")
return None
return MatrixAdapter(config)
elif platform == Platform.API_SERVER:
from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
if not check_api_server_requirements():
logger.warning("API Server: aiohttp not installed")
return None
return APIServerAdapter(config)
elif platform == Platform.WEBHOOK:
from gateway.platforms.webhook import WebhookAdapter, check_webhook_requirements
if not check_webhook_requirements():
logger.warning("Webhook: aiohttp not installed")
return None
adapter = WebhookAdapter(config)
adapter.gateway_runner = self
return adapter
elif platform == Platform.MSGRAPH_WEBHOOK:
from gateway.platforms.msgraph_webhook import (
MSGraphWebhookAdapter,
check_msgraph_webhook_requirements,
)
if not check_msgraph_webhook_requirements():
logger.warning("MSGraph webhook: aiohttp not installed")
return None
return MSGraphWebhookAdapter(config)
elif platform == Platform.BLUEBUBBLES:
from gateway.platforms.bluebubbles import BlueBubblesAdapter, check_bluebubbles_requirements
if not check_bluebubbles_requirements():
logger.warning("BlueBubbles: aiohttp/httpx missing or BLUEBUBBLES_SERVER_URL/BLUEBUBBLES_PASSWORD not configured")
return None
return BlueBubblesAdapter(config)
elif platform == Platform.QQBOT:
from gateway.platforms.qqbot import QQAdapter, check_qq_requirements
if not check_qq_requirements():
logger.warning("QQBot: aiohttp/httpx missing or QQ_APP_ID/QQ_CLIENT_SECRET not configured")
return None
return QQAdapter(config)
elif platform == Platform.YUANBAO:
from gateway.platforms.yuanbao import YuanbaoAdapter, WEBSOCKETS_AVAILABLE
if not WEBSOCKETS_AVAILABLE:
logger.warning("Yuanbao: websockets not installed. Run: pip install websockets")
return None
return YuanbaoAdapter(config)
return None
def _is_user_authorized(self, source: SessionSource) -> bool:
"""
Check if a user is authorized to use the bot.
Checks in order:
1. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
2. Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
3. DM pairing approved list
4. Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
5. Default: deny
"""
if source.platform in {Platform.HOMEASSISTANT, Platform.WEBHOOK}:
return True
user_id = source.user_id
if source.chat_type in {"group", "forum", "channel"} and source.chat_id:
chat_allowlist_env = {
Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_CHATS",
Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
}.get(source.platform, "")
if chat_allowlist_env:
raw_chat_allowlist = os.getenv(chat_allowlist_env, "").strip()
if raw_chat_allowlist:
allowed_group_ids = {
cid.strip()
for cid in raw_chat_allowlist.split(",")
if cid.strip()
}
if "*" in allowed_group_ids or source.chat_id in allowed_group_ids:
return True
if not user_id:
return False
platform_env_map = {
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
Platform.SLACK: "SLACK_ALLOWED_USERS",
Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
Platform.EMAIL: "EMAIL_ALLOWED_USERS",
Platform.SMS: "SMS_ALLOWED_USERS",
Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
Platform.MATRIX: "MATRIX_ALLOWED_USERS",
Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
Platform.FEISHU: "FEISHU_ALLOWED_USERS",
Platform.WECOM: "WECOM_ALLOWED_USERS",
Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOWED_USERS",
Platform.WEIXIN: "WEIXIN_ALLOWED_USERS",
Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS",
Platform.QQBOT: "QQ_ALLOWED_USERS",
Platform.YUANBAO: "YUANBAO_ALLOWED_USERS",
}
platform_group_user_env_map = {
Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS",
}
platform_group_chat_env_map = {
Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_CHATS",
Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
}
platform_allow_all_map = {
Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
Platform.SMS: "SMS_ALLOW_ALL_USERS",
Platform.MATTERMOST: "MATTERMOST_ALLOW_ALL_USERS",
Platform.MATRIX: "MATRIX_ALLOW_ALL_USERS",
Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
Platform.FEISHU: "FEISHU_ALLOW_ALL_USERS",
Platform.WECOM: "WECOM_ALLOW_ALL_USERS",
Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOW_ALL_USERS",
Platform.WEIXIN: "WEIXIN_ALLOW_ALL_USERS",
Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOW_ALL_USERS",
Platform.QQBOT: "QQ_ALLOW_ALL_USERS",
Platform.YUANBAO: "YUANBAO_ALLOW_ALL_USERS",
}
platform_allow_bots_map = {
Platform.DISCORD: "DISCORD_ALLOW_BOTS",
Platform.FEISHU: "FEISHU_ALLOW_BOTS",
}
if source.platform not in platform_env_map:
try:
from gateway.platform_registry import platform_registry
entry = platform_registry.get(source.platform.value)
if entry:
if entry.allowed_users_env:
platform_env_map[source.platform] = entry.allowed_users_env
if entry.allow_all_env:
platform_allow_all_map[source.platform] = entry.allow_all_env
except Exception:
pass
platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in {"true", "1", "yes"}:
return True
if getattr(source, "is_bot", False):
allow_bots_var = platform_allow_bots_map.get(source.platform)
if allow_bots_var and os.getenv(allow_bots_var, "none").lower().strip() in {"mentions", "all"}:
return True
if (
source.platform == Platform.DISCORD
and os.getenv("DISCORD_ALLOWED_ROLES", "").strip()
):
return True
platform_name = source.platform.value if source.platform else ""
if self.pairing_store.is_approved(platform_name, user_id):
return True
platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
group_user_allowlist = ""
group_chat_allowlist = ""
if source.chat_type in {"group", "forum"}:
group_user_allowlist = os.getenv(platform_group_user_env_map.get(source.platform, ""), "").strip()
group_chat_allowlist = os.getenv(platform_group_chat_env_map.get(source.platform, ""), "").strip()
global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
if not platform_allowlist and not group_user_allowlist and not group_chat_allowlist and not global_allowlist:
return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in {"true", "1", "yes"}
if group_chat_allowlist and source.chat_type in {"group", "forum"} and source.chat_id:
allowed_group_ids = {
chat_id.strip() for chat_id in group_chat_allowlist.split(",") if chat_id.strip()
}
if "*" in allowed_group_ids or source.chat_id in allowed_group_ids:
return True
if (
source.platform == Platform.TELEGRAM
and group_user_allowlist
and source.chat_type in {"group", "forum"}
and source.chat_id
):
legacy_chat_ids = {
v.strip()
for v in group_user_allowlist.split(",")
if v.strip().startswith("-")
}
if legacy_chat_ids:
if not getattr(self, "_warned_telegram_group_users_legacy", False):
logger.warning(
"TELEGRAM_GROUP_ALLOWED_USERS contains chat-ID-shaped values "
"(%s). Treating them as chat IDs for backward compatibility. "
"Move chat IDs to TELEGRAM_GROUP_ALLOWED_CHATS — the _USERS var "
"is now for sender user IDs.",
",".join(sorted(legacy_chat_ids)),
)
self._warned_telegram_group_users_legacy = True
if source.chat_id in legacy_chat_ids:
return True
allowed_ids = set()
if platform_allowlist:
allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
if group_user_allowlist:
allowed_ids.update(uid.strip() for uid in group_user_allowlist.split(",") if uid.strip())
if global_allowlist:
allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())
if "*" in allowed_ids:
return True
check_ids = {user_id}
if "@" in user_id:
check_ids.add(user_id.split("@")[0])
if source.platform == Platform.WHATSAPP:
normalized_allowed_ids = set()
for allowed_id in allowed_ids:
normalized_allowed_ids.update(_expand_whatsapp_auth_aliases(allowed_id))
if normalized_allowed_ids:
allowed_ids = normalized_allowed_ids
check_ids.update(_expand_whatsapp_auth_aliases(user_id))
normalized_user_id = _normalize_whatsapp_identifier(user_id)
if normalized_user_id:
check_ids.add(normalized_user_id)
return bool(check_ids & allowed_ids)
def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
"""Return how unauthorized DMs should be handled for a platform.
Resolution order:
1. Explicit per-platform ``unauthorized_dm_behavior`` in config — always wins.
2. Explicit global ``unauthorized_dm_behavior`` in config — wins when no per-platform.
3. When an allowlist (``PLATFORM_ALLOWED_USERS``,
``PLATFORM_GROUP_ALLOWED_USERS`` / ``PLATFORM_GROUP_ALLOWED_CHATS``,
or ``GATEWAY_ALLOWED_USERS``) is configured, default to ``"ignore"`` —
the allowlist signals that the owner has deliberately restricted
access; spamming unknown contacts with pairing codes is both noisy
and a potential info-leak. (#9337)
4. No allowlist and no explicit config → ``"pair"`` (open-gateway default).
"""
config = getattr(self, "config", None)
if config and hasattr(config, "get_unauthorized_dm_behavior") and platform:
platform_cfg = config.platforms.get(platform) if hasattr(config, "platforms") else None
if platform_cfg and "unauthorized_dm_behavior" in getattr(platform_cfg, "extra", {}):
return config.get_unauthorized_dm_behavior(platform)
if config and hasattr(config, "unauthorized_dm_behavior"):
if config.unauthorized_dm_behavior != "pair":
return config.unauthorized_dm_behavior
if platform:
platform_env_map = {
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
Platform.SLACK: "SLACK_ALLOWED_USERS",
Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
Platform.EMAIL: "EMAIL_ALLOWED_USERS",
Platform.SMS: "SMS_ALLOWED_USERS",
Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
Platform.MATRIX: "MATRIX_ALLOWED_USERS",
Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
Platform.FEISHU: "FEISHU_ALLOWED_USERS",
Platform.WECOM: "WECOM_ALLOWED_USERS",
Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOWED_USERS",
Platform.WEIXIN: "WEIXIN_ALLOWED_USERS",
Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS",
Platform.QQBOT: "QQ_ALLOWED_USERS",
}
platform_group_env_map = {
Platform.TELEGRAM: (
"TELEGRAM_GROUP_ALLOWED_USERS",
"TELEGRAM_GROUP_ALLOWED_CHATS",
),
Platform.QQBOT: ("QQ_GROUP_ALLOWED_USERS",),
}
if os.getenv(platform_env_map.get(platform, ""), "").strip():
return "ignore"
for env_key in platform_group_env_map.get(platform, ()):
if os.getenv(env_key, "").strip():
return "ignore"
if os.getenv("GATEWAY_ALLOWED_USERS", "").strip():
return "ignore"
return "pair"
async def _deliver_platform_notice(self, source, content: str) -> None:
"""Deliver a setup/operational notice using platform-specific privacy rules."""
adapter = self.adapters.get(source.platform)
if not adapter:
return
config = getattr(self, "config", None)
notice_delivery = "public"
if config and hasattr(config, "get_notice_delivery"):
notice_delivery = config.get_notice_delivery(source.platform)
metadata = self._thread_metadata_for_source(source)
if notice_delivery == "private" and getattr(source, "user_id", None):
try:
result = await adapter.send_private_notice(
source.chat_id,
source.user_id,
content,
metadata=metadata,
)
if getattr(result, "success", False):
return
except Exception:
logger.debug(
"[%s] send_private_notice failed, falling back to public",
getattr(source, "platform", "?"),
exc_info=True,
)
await adapter.send(source.chat_id, content, metadata=metadata)
async def _handle_message(self, event: MessageEvent) -> Optional[str]:
"""
Handle an incoming message from any platform.
This is the core message processing pipeline:
1. Check user authorization
2. Check for commands (/new, /reset, etc.)
3. Check for running agent and interrupt if needed
4. Get or create session
5. Build context for agent
6. Run agent conversation
7. Return response
"""
source = event.source
is_internal = bool(getattr(event, "internal", False))
if not is_internal:
try:
from hermes_cli.plugins import invoke_hook as _invoke_hook
_hook_results = _invoke_hook(
"pre_gateway_dispatch",
event=event,
gateway=self,
session_store=self.session_store,
)
except Exception as _hook_exc:
logger.warning("pre_gateway_dispatch invocation failed: %s", _hook_exc)
_hook_results = []
for _result in _hook_results:
if not isinstance(_result, dict):
continue
_action = _result.get("action")
if _action == "skip":
logger.info(
"pre_gateway_dispatch skip: reason=%s platform=%s chat=%s",
_result.get("reason"),
source.platform.value if source.platform else "unknown",
source.chat_id or "unknown",
)
return None
if _action == "rewrite":
_new_text = _result.get("text")
if isinstance(_new_text, str):
event = dataclasses.replace(event, text=_new_text)
source = event.source
break
if _action == "allow":
break
if is_internal:
pass
elif source.user_id is None:
if not self._is_user_authorized(source):
logger.debug("Ignoring message with no user_id from %s", source.platform.value)
return None
elif not self._is_user_authorized(source):
logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
platform_name = source.platform.value if source.platform else "unknown"
if self.pairing_store._is_rate_limited(platform_name, source.user_id):
return None
code = self.pairing_store.generate_code(
platform_name, source.user_id, source.user_name or ""
)
if code:
adapter = self.adapters.get(source.platform)
if adapter:
await adapter.send(
source.chat_id,
f"Hi~ I don't recognize you yet!\n\n"
f"Here's your pairing code: `{code}`\n\n"
f"Ask the bot owner to run:\n"
f"`hermes pairing approve {platform_name} {code}`"
)
else:
adapter = self.adapters.get(source.platform)
if adapter:
await adapter.send(
source.chat_id,
"Too many pairing requests right now~ "
"Please try again later!"
)
self.pairing_store._record_rate_limit(platform_name, source.user_id)
return None
_quick_key = self._session_key_for_source(source)
_update_prompts = getattr(self, "_update_prompt_pending", {})
if _update_prompts.get(_quick_key):
raw = (event.text or "").strip()
cmd = event.get_command()
if cmd in {"approve", "yes"}:
response_text = "y"
elif cmd in {"deny", "no"}:
response_text = "n"
else:
_recognized_cmd = None
if cmd:
try:
from hermes_cli.commands import resolve_command as _resolve_update_cmd
except Exception:
_resolve_update_cmd = None
if _resolve_update_cmd is not None:
try:
_cmd_def = _resolve_update_cmd(cmd)
_recognized_cmd = _cmd_def.name if _cmd_def else None
except Exception:
_recognized_cmd = None
if _recognized_cmd:
response_text = ""
else:
response_text = raw
if response_text:
response_path = _hermes_home / ".update_response"
prompt_path = _hermes_home / ".update_prompt.json"
try:
tmp = response_path.with_suffix(".tmp")
tmp.write_text(response_text)
tmp.replace(response_path)
prompt_path.unlink(missing_ok=True)
except OSError as e:
logger.warning("Failed to write update response: %s", e)
return f"✗ Failed to send response to update process: {e}"
_update_prompts.pop(_quick_key, None)
label = response_text if len(response_text) <= 20 else response_text[:20] + "…"
return f"✓ Sent `{label}` to the update process."
if _recognized_cmd:
response_path = _hermes_home / ".update_response"
prompt_path = _hermes_home / ".update_prompt.json"
try:
tmp = response_path.with_suffix(".tmp")
tmp.write_text("")
tmp.replace(response_path)
prompt_path.unlink(missing_ok=True)
logger.info(
"Recognized /%s during pending update prompt for %s; "
"cancelled prompt with default and dispatching command",
_recognized_cmd,
_quick_key,
)
except OSError as e:
logger.warning(
"Failed to write cancel response for pending update prompt: %s",
e,
)
_update_prompts.pop(_quick_key, None)
try:
from tools import clarify_gateway as _clarify_mod
_pending_clarify = _clarify_mod.get_pending_for_session(_quick_key)
except Exception:
_pending_clarify = None
if _pending_clarify is not None:
_raw_clarify_reply = (event.text or "").strip()
if _raw_clarify_reply and not _raw_clarify_reply.startswith("/"):
_resolved = _clarify_mod.resolve_gateway_clarify(
_pending_clarify.clarify_id, _raw_clarify_reply,
)
if _resolved:
logger.info(
"Gateway intercepted clarify text response (session=%s, id=%s)",
_quick_key, _pending_clarify.clarify_id,
)
return ""
from tools import slash_confirm as _slash_confirm_mod
_pending_confirm = _slash_confirm_mod.get_pending(_quick_key)
_tool_approval_live = False
try:
from tools.approval import has_blocking_approval
_tool_approval_live = has_blocking_approval(_quick_key)
except Exception:
_tool_approval_live = False
if _pending_confirm and not _tool_approval_live:
_raw_reply = (event.text or "").strip()
_cmd_reply = event.get_command()
_confirm_choice = None
if _cmd_reply in {"approve", "yes", "ok", "confirm"}:
_confirm_choice = "once"
elif _cmd_reply in {"always", "remember"}:
_confirm_choice = "always"
elif _cmd_reply in {"cancel", "no", "deny", "nevermind"}:
_confirm_choice = "cancel"
elif _raw_reply.lower() in {"approve", "approve once", "once"}:
_confirm_choice = "once"
elif _raw_reply.lower() in {"always", "always approve"}:
_confirm_choice = "always"
elif _raw_reply.lower() in {"cancel", "nevermind", "no"}:
_confirm_choice = "cancel"
if _confirm_choice is not None:
_resolved = await _slash_confirm_mod.resolve(
_quick_key, _pending_confirm.get("confirm_id"), _confirm_choice,
)
return _resolved or ""
_slash_confirm_mod.clear_if_stale(_quick_key)
_raw_stale_timeout = _float_env("HERMES_AGENT_TIMEOUT", 1800)
_stale_ts = self._running_agents_ts.get(_quick_key, 0)
if _quick_key in self._running_agents and _stale_ts:
_stale_age = time.time() - _stale_ts
_stale_agent = self._running_agents.get(_quick_key)
_stale_idle = float("inf")
_stale_detail = ""
if _stale_agent and hasattr(_stale_agent, "get_activity_summary"):
try:
_sa = _stale_agent.get_activity_summary()
_stale_idle = _sa.get("seconds_since_activity", float("inf"))
_stale_detail = (
f" | last_activity={_sa.get('last_activity_desc', 'unknown')} "
f"({_stale_idle:.0f}s ago) "
f"| iteration={_sa.get('api_call_count', 0)}/{_sa.get('max_iterations', 0)}"
)
except Exception:
pass
_wall_ttl = max(_raw_stale_timeout * 10, 7200) if _raw_stale_timeout > 0 else float("inf")
_should_evict = (
_stale_agent is not _AGENT_PENDING_SENTINEL
and (
(_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
or _stale_age > _wall_ttl
)
)
if _should_evict:
logger.warning(
"Evicting stale _running_agents entry for %s "
"(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
_quick_key, _stale_age, _stale_idle,
_raw_stale_timeout, _stale_detail,
)
self._invalidate_session_run_generation(
_quick_key,
reason="stale_running_agent_eviction",
)
self._release_running_agent_state(_quick_key)
if _quick_key in self._running_agents:
if event.get_command() == "status":
return await self._handle_status_command(event)
from hermes_cli.commands import (
ACTIVE_SESSION_BYPASS_COMMANDS as _DEDICATED_HANDLERS,
resolve_command as _resolve_cmd_inner,
)
_evt_cmd = event.get_command()
_cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
if _evt_cmd and _cmd_def_inner is not None:
_denied = self._check_slash_access(source, _cmd_def_inner.name)
if _denied is not None:
return _denied
if _cmd_def_inner and _cmd_def_inner.name == "restart":
return await self._handle_restart_command(event)
if _cmd_def_inner and _cmd_def_inner.name == "stop":
await self._interrupt_and_clear_session(
_quick_key,
source,
interrupt_reason=_INTERRUPT_REASON_STOP,
invalidation_reason="stop_command",
)
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key)
return EphemeralReply(t("gateway.stop.stopped"))
if _cmd_def_inner and _cmd_def_inner.name == "new":
await self._interrupt_and_clear_session(
_quick_key,
source,
interrupt_reason=_INTERRUPT_REASON_RESET,
invalidation_reason="new_command",
)
return await self._handle_reset_command(event)
if event.get_command() in {"queue", "q"}:
queued_text = event.get_command_args().strip()
if not queued_text:
return "Usage: /queue <prompt>"
adapter = self.adapters.get(source.platform)
if adapter:
queued_event = MessageEvent(
text=queued_text,
message_type=MessageType.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
)
self._enqueue_fifo(_quick_key, queued_event, adapter)
depth = self._queue_depth(_quick_key, adapter=self.adapters.get(source.platform))
if depth <= 1:
return "Queued for the next turn."
return f"Queued for the next turn. ({depth} queued)"
if _cmd_def_inner and _cmd_def_inner.name == "steer":
steer_text = event.get_command_args().strip()
if not steer_text:
return "Usage: /steer <prompt>"
running_agent = self._running_agents.get(_quick_key)
if running_agent is _AGENT_PENDING_SENTINEL:
adapter = self.adapters.get(source.platform)
if adapter:
queued_event = MessageEvent(
text=steer_text,
message_type=MessageType.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
)
adapter._pending_messages[_quick_key] = queued_event
return "Agent still starting — /steer queued for the next turn."
if running_agent and hasattr(running_agent, "steer"):
try:
accepted = running_agent.steer(steer_text)
except Exception as exc:
logger.warning("Steer failed for session %s: %s", _quick_key, exc)
return f"⚠️ Steer failed: {exc}"
if accepted:
preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
return f"⏩ Steer queued — arrives after the next tool call: '{preview}'"
return "Steer rejected (empty payload)."
adapter = self.adapters.get(source.platform)
if adapter:
queued_event = MessageEvent(
text=steer_text,
message_type=MessageType.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
)
adapter._pending_messages[_quick_key] = queued_event
return "No active agent — /steer queued for the next turn."
if _cmd_def_inner and _cmd_def_inner.name == "model":
return "Agent is running — wait or /stop first, then switch models."
if _cmd_def_inner and _cmd_def_inner.name == "codex-runtime":
return ("Agent is running — wait or /stop first, then "
"change runtime.")
if _cmd_def_inner and _cmd_def_inner.name in {"approve", "deny"}:
if _cmd_def_inner.name == "approve":
return await self._handle_approve_command(event)
return await self._handle_deny_command(event)
if _cmd_def_inner and _cmd_def_inner.name == "agents":
return await self._handle_agents_command(event)
if _cmd_def_inner and _cmd_def_inner.name == "background":
return await self._handle_background_command(event)
if _cmd_def_inner and _cmd_def_inner.name == "kanban":
return await self._handle_kanban_command(event)
if _cmd_def_inner and _cmd_def_inner.name == "goal":
_goal_arg = (event.get_command_args() or "").strip().lower()
if not _goal_arg or _goal_arg in {"status", "pause", "resume", "clear", "stop", "done"}:
return await self._handle_goal_command(event)
return "Agent is running — use /goal status / pause / clear mid-run, or /stop before setting a new goal."
if _cmd_def_inner and _cmd_def_inner.name == "subgoal":
return await self._handle_subgoal_command(event)
if _cmd_def_inner and _cmd_def_inner.name in {"yolo", "verbose"}:
if _cmd_def_inner.name == "yolo":
return await self._handle_yolo_command(event)
if _cmd_def_inner.name == "verbose":
return await self._handle_verbose_command(event)
if _cmd_def_inner.name == "footer":
return await self._handle_footer_command(event)
if _cmd_def_inner and _cmd_def_inner.name in _DEDICATED_HANDLERS:
if _cmd_def_inner.name == "help":
return await self._handle_help_command(event)
if _cmd_def_inner.name == "commands":
return await self._handle_commands_command(event)
if _cmd_def_inner.name == "profile":
return await self._handle_profile_command(event)
if _cmd_def_inner.name == "update":
return await self._handle_update_command(event)
if _cmd_def_inner:
return (
f"⏳ Agent is running — `/{_cmd_def_inner.name}` can't run "
f"mid-turn. Wait for the current response or `/stop` first."
)
if event.message_type == MessageType.PHOTO:
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key)
adapter = self.adapters.get(source.platform)
if adapter:
merge_pending_message_event(adapter._pending_messages, _quick_key, event)
return None
_telegram_followup_grace = float(
os.getenv("HERMES_TELEGRAM_FOLLOWUP_GRACE_SECONDS", "3.0")
)
_started_at = self._running_agents_ts.get(_quick_key, 0)
if (
source.platform == Platform.TELEGRAM
and event.message_type == MessageType.TEXT
and _telegram_followup_grace > 0
and _started_at
and (time.time() - _started_at) <= _telegram_followup_grace
):
logger.debug(
"Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
time.time() - _started_at,
_quick_key,
)
adapter = self.adapters.get(source.platform)
if adapter:
merge_pending_message_event(
adapter._pending_messages,
_quick_key,
event,
merge_text=True,
)
return None
running_agent = self._running_agents.get(_quick_key)
if running_agent is _AGENT_PENDING_SENTINEL:
if event.get_command() == "stop":
self._release_running_agent_state(_quick_key)
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key)
return EphemeralReply("⚡ Force-stopped. The agent was still starting — session unlocked.")
adapter = self.adapters.get(source.platform)
if adapter:
merge_pending_message_event(
adapter._pending_messages,
_quick_key,
event,
merge_text=True,
)
return None
if self._draining:
if self._queue_during_drain_enabled():
self._queue_or_replace_pending_event(_quick_key, event)
return (
f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
if self._queue_during_drain_enabled()
else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
)
if self._busy_input_mode == "queue":
logger.debug("PRIORITY queue follow-up for session %s", _quick_key)
self._queue_or_replace_pending_event(_quick_key, event)
return None
if self._busy_input_mode == "steer":
steer_text = (event.text or "").strip()
steered = False
if steer_text and hasattr(running_agent, "steer"):
try:
steered = bool(running_agent.steer(steer_text))
except Exception as exc:
logger.warning("PRIORITY steer failed for session %s: %s", _quick_key, exc)
steered = False
if steered:
logger.debug("PRIORITY steer for session %s", _quick_key)
return None
logger.debug("PRIORITY steer-fallback-to-queue for session %s", _quick_key)
self._queue_or_replace_pending_event(_quick_key, event)
return None
logger.debug("PRIORITY interrupt for session %s", _quick_key)
running_agent.interrupt(event.text)
return None
command = event.get_command()
from hermes_cli.commands import (
GATEWAY_KNOWN_COMMANDS,
is_gateway_known_command,
resolve_command as _resolve_cmd,
)
_cmd_def = _resolve_cmd(command) if command else None
canonical = _cmd_def.name if _cmd_def else command
if command and _cmd_def is None:
if isinstance(self.config, dict):
quick_commands = self.config.get("quick_commands", {}) or {}
else:
quick_commands = getattr(self.config, "quick_commands", {}) or {}
if isinstance(quick_commands, dict) and command in quick_commands:
qcmd = quick_commands[command]
if qcmd.get("type") == "alias":
target = qcmd.get("target", "").strip()
if target:
target = target if target.startswith("/") else f"/{target}"
target_command = target.lstrip("/")
user_args = event.get_command_args().strip()
event.text = f"{target} {user_args}".strip()
command = target_command.split()[0] if target_command else target_command
_cmd_def = _resolve_cmd(command) if command else None
canonical = _cmd_def.name if _cmd_def else command
if command and canonical and is_gateway_known_command(canonical):
_denied = self._check_slash_access(source, canonical)
if _denied is not None:
return _denied
if command and is_gateway_known_command(canonical):
raw_args = event.get_command_args().strip()
hook_ctx = {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"command": canonical,
"raw_command": command,
"args": raw_args,
"raw_args": raw_args,
}
try:
hook_results = await self.hooks.emit_collect(
f"command:{canonical}", hook_ctx
)
except Exception as _hook_err:
logger.debug(
"command:%s hook dispatch failed (non-fatal): %s",
canonical, _hook_err,
)
hook_results = []
for hook_result in hook_results:
if not isinstance(hook_result, dict):
continue
decision = str(hook_result.get("decision", "")).strip().lower()
if not decision or decision == "allow":
continue
if decision == "deny":
message = hook_result.get("message")
if isinstance(message, str) and message:
return message
return f"Command `/{command}` was blocked by a hook."
if decision == "handled":
message = hook_result.get("message")
return message if isinstance(message, str) and message else None
if decision == "rewrite":
new_command = str(
hook_result.get("command_name", "")
).strip().lstrip("/")
if not new_command:
continue
new_args = str(hook_result.get("raw_args", "")).strip()
event.text = f"/{new_command} {new_args}".strip()
command = event.get_command()
_cmd_def = _resolve_cmd(command) if command else None
canonical = _cmd_def.name if _cmd_def else command
break
if canonical == "new":
if self._is_telegram_topic_root_lobby(source):
return self._telegram_topic_root_new_message()
async def _do_reset():
return await self._handle_reset_command(event)
return await self._maybe_confirm_destructive_slash(
event=event,
command="new",
title="/new",
detail=(
"This starts a fresh session and discards the current "
"conversation history."
),
execute=_do_reset,
)
if canonical == "topic":
return await self._handle_topic_command(event)
if canonical == "help":
return await self._handle_help_command(event)
if canonical == "commands":
return await self._handle_commands_command(event)
if canonical == "profile":
return await self._handle_profile_command(event)
if canonical == "whoami":
return await self._handle_whoami_command(event)
if canonical == "status":
return await self._handle_status_command(event)
if canonical == "agents":
return await self._handle_agents_command(event)
if canonical == "platform":
return await self._handle_platform_command(event)
if canonical == "restart":
return await self._handle_restart_command(event)
if canonical == "stop":
return await self._handle_stop_command(event)
if canonical == "reasoning":
return await self._handle_reasoning_command(event)
if canonical == "fast":
return await self._handle_fast_command(event)
if canonical == "verbose":
return await self._handle_verbose_command(event)
if canonical == "footer":
return await self._handle_footer_command(event)
if canonical == "yolo":
return await self._handle_yolo_command(event)
if canonical == "model":
return await self._handle_model_command(event)
if canonical == "codex-runtime":
return await self._handle_codex_runtime_command(event)
if canonical == "personality":
return await self._handle_personality_command(event)
if canonical == "kanban":
return await self._handle_kanban_command(event)
if canonical == "retry":
return await self._handle_retry_command(event)
if canonical == "undo":
async def _do_undo():
return await self._handle_undo_command(event)
return await self._maybe_confirm_destructive_slash(
event=event,
command="undo",
title="/undo",
detail="This removes the last user/assistant exchange from history.",
execute=_do_undo,
)
if canonical == "sethome":
return await self._handle_set_home_command(event)
if canonical == "compress":
return await self._handle_compress_command(event)
if canonical == "usage":
return await self._handle_usage_command(event)
if canonical == "insights":
return await self._handle_insights_command(event)
if canonical == "reload-mcp":
return await self._handle_reload_mcp_command(event)
if canonical == "reload-skills":
return await self._handle_reload_skills_command(event)
if canonical == "bundles":
return await self._handle_bundles_command(event)
if canonical == "approve":
return await self._handle_approve_command(event)
if canonical == "deny":
return await self._handle_deny_command(event)
if canonical == "update":
return await self._handle_update_command(event)
if canonical == "debug":
return await self._handle_debug_command(event)
if canonical == "title":
return await self._handle_title_command(event)
if canonical == "resume":
return await self._handle_resume_command(event)
if canonical == "branch":
return await self._handle_branch_command(event)
if canonical == "rollback":
return await self._handle_rollback_command(event)
if canonical == "background":
return await self._handle_background_command(event)
if canonical == "steer":
steer_payload = event.get_command_args().strip()
if not steer_payload:
return "Usage: /steer <prompt> (no agent is running; sending as a normal message)"
try:
event.text = steer_payload
except Exception:
pass
if canonical == "goal":
return await self._handle_goal_command(event)
if canonical == "subgoal":
return await self._handle_subgoal_command(event)
if canonical == "voice":
return await self._handle_voice_command(event)
if self._draining:
return f"⏳ Gateway is {self._status_action_gerund()} and is not accepting new work right now."
if command:
if isinstance(self.config, dict):
quick_commands = self.config.get("quick_commands", {}) or {}
else:
quick_commands = getattr(self.config, "quick_commands", {}) or {}
if not isinstance(quick_commands, dict):
quick_commands = {}
if command in quick_commands:
qcmd = quick_commands[command]
if qcmd.get("type") == "exec":
exec_cmd = qcmd.get("command", "")
if exec_cmd:
try:
from tools.environments.local import _sanitize_subprocess_env
sanitized_env = _sanitize_subprocess_env(os.environ.copy())
proc = await asyncio.create_subprocess_shell(
exec_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=sanitized_env,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
output = (stdout or stderr).decode().strip()
if output:
from agent.redact import redact_sensitive_text
output = redact_sensitive_text(output)
return output if output else "Command returned no output."
except asyncio.TimeoutError:
return "Quick command timed out (30s)."
except Exception as e:
return f"Quick command error: {e}"
else:
return f"Quick command '/{command}' has no command defined."
elif qcmd.get("type") == "alias":
target = qcmd.get("target", "").strip()
if target:
target = target if target.startswith("/") else f"/{target}"
target_command = target.lstrip("/")
user_args = event.get_command_args().strip()
event.text = f"{target} {user_args}".strip()
command = target_command.split()[0] if target_command else target_command
else:
return f"Quick command '/{command}' has no target defined."
else:
return f"Quick command '/{command}' has unsupported type (supported: 'exec', 'alias')."
if command:
try:
from hermes_cli.plugins import get_plugin_command_handler
plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
if plugin_handler:
user_args = event.get_command_args().strip()
result = plugin_handler(user_args)
if asyncio.iscoroutine(result):
result = await result
return str(result) if result else None
except Exception as e:
logger.debug("Plugin command dispatch failed (non-fatal): %s", e)
if command:
_bundle_handled = False
try:
from agent.skill_bundles import (
build_bundle_invocation_message,
resolve_bundle_command_key,
)
bundle_key = resolve_bundle_command_key(command)
if bundle_key is not None:
user_instruction = event.get_command_args().strip()
bundle_result = build_bundle_invocation_message(
bundle_key, user_instruction, task_id=_quick_key
)
if bundle_result:
msg, _loaded, missing = bundle_result
event.text = msg
_bundle_handled = True
if missing:
logger.info(
"Bundle %s skipped missing skills: %s",
bundle_key, ", ".join(missing),
)
except Exception as exc:
logger.debug("Bundle dispatch failed (non-fatal): %s", exc)
if command and not locals().get("_bundle_handled", False):
try:
from agent.skill_commands import (
get_skill_commands,
build_skill_invocation_message,
resolve_skill_command_key,
)
skill_cmds = get_skill_commands()
cmd_key = resolve_skill_command_key(command)
if cmd_key is not None:
_skill_name = skill_cmds[cmd_key].get("name", "")
_plat = source.platform.value if source.platform else None
if _plat and _skill_name:
from agent.skill_utils import get_disabled_skill_names as _get_plat_disabled
if _skill_name in _get_plat_disabled(platform=_plat):
return (
f"The **{_skill_name}** skill is disabled for {_plat}.\n"
f"Enable it with: `hermes skills config`"
)
user_instruction = event.get_command_args().strip()
msg = build_skill_invocation_message(
cmd_key, user_instruction, task_id=_quick_key
)
if msg:
event.text = msg
else:
_unavail_msg = _check_unavailable_skill(command)
if _unavail_msg:
return _unavail_msg
if command.replace("_", "-") not in GATEWAY_KNOWN_COMMANDS:
logger.warning(
"Unrecognized slash command /%s from %s — "
"replying with unknown-command notice",
command,
source.platform.value if source.platform else "?",
)
return (
f"Unknown command `/{command}`. "
f"Type /commands to see what's available, "
f"or resend without the leading slash to send "
f"as a regular message."
)
except Exception as e:
logger.debug("Skill command check failed (non-fatal): %s", e)
if self._is_telegram_topic_root_lobby(source):
if self._should_send_telegram_lobby_reminder(source):
return self._telegram_topic_root_lobby_message()
return None
self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
self._running_agents_ts[_quick_key] = time.time()
_run_generation = self._begin_session_run_generation(_quick_key)
try:
_agent_result = await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
try:
_final_text = ""
if isinstance(_agent_result, dict):
_final_text = str(_agent_result.get("final_response") or "")
elif isinstance(_agent_result, str):
_final_text = _agent_result
if _final_text.strip():
try:
session_entry = self.session_store.get_or_create_session(source)
except Exception:
session_entry = None
if session_entry is not None:
await self._post_turn_goal_continuation(
session_entry=session_entry,
source=source,
final_response=_final_text,
)
except Exception as _goal_exc:
logger.debug("goal continuation hook failed: %s", _goal_exc)
return _agent_result
finally:
if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
self._release_running_agent_state(_quick_key)
else:
self._running_agents_ts.pop(_quick_key, None)
if hasattr(self, "_busy_ack_ts"):
self._busy_ack_ts.pop(_quick_key, None)
async def _prepare_inbound_message_text(
self,
*,
event: MessageEvent,
source: SessionSource,
history: List[Dict[str, Any]],
) -> Optional[str]:
"""Prepare inbound event text for the agent.
Keep the normal inbound path and the queued follow-up path on the same
preprocessing pipeline so sender attribution, image enrichment, STT,
document notes, reply context, and @ references all behave the same.
Side effect: buffers per-session native image paths when the active
model supports native vision AND the user has images attached. The
caller consumes and clears that session-scoped buffer at the
``run_conversation`` site to build a multimodal user turn. When the
list is empty, the ``_enrich_message_with_vision`` text path has
already run and images are represented in-text.
"""
history = history or []
message_text = event.text or ""
_group_sessions_per_user = getattr(self.config, "group_sessions_per_user", True)
_thread_sessions_per_user = getattr(self.config, "thread_sessions_per_user", False)
session_key = self._session_key_for_source(source)
self._consume_pending_native_image_paths(session_key)
_is_shared_multi_user = is_shared_multi_user_session(
source,
group_sessions_per_user=_group_sessions_per_user,
thread_sessions_per_user=_thread_sessions_per_user,
)
if _is_shared_multi_user and source.user_name:
message_text = f"[{source.user_name}] {message_text}"
if getattr(event, "channel_context", None):
message_text = f"{event.channel_context}\n\n[New message]\n{message_text}"
audio_file_paths: list[str] = []
if event.media_urls:
image_paths = []
audio_paths = []
for i, path in enumerate(event.media_urls):
mtype = event.media_types[i] if i < len(event.media_types) else ""
if mtype.startswith("image/") or event.message_type == MessageType.PHOTO:
image_paths.append(path)
if event.message_type == MessageType.AUDIO:
audio_file_paths.append(path)
elif event.message_type == MessageType.VOICE or (
mtype.startswith("audio/")
and event.message_type not in {MessageType.AUDIO, MessageType.DOCUMENT}
):
audio_paths.append(path)
if image_paths:
_img_mode = self._decide_image_input_mode()
if _img_mode == "native":
pending_native = getattr(self, "_pending_native_image_paths_by_session", None)
if pending_native is None:
pending_native = {}
self._pending_native_image_paths_by_session = pending_native
pending_native[session_key] = list(image_paths)
logger.info(
"Image routing: native (model supports vision). %d image(s) will be attached inline.",
len(image_paths),
)
else:
logger.info(
"Image routing: text (mode=%s). Pre-analyzing %d image(s) via vision_analyze.",
_img_mode, len(image_paths),
)
message_text = await self._enrich_message_with_vision(
message_text,
image_paths,
)
if audio_paths:
message_text = await self._enrich_message_with_transcription(
message_text,
audio_paths,
)
_stt_fail_markers = (
"No STT provider",
"STT is disabled",
"can't listen",
"VOICE_TOOLS_OPENAI_KEY",
)
if any(marker in message_text for marker in _stt_fail_markers):
_stt_adapter = self.adapters.get(source.platform)
_stt_meta = self._thread_metadata_for_source(source, self._reply_anchor_for_event(event))
if _stt_adapter:
try:
_stt_msg = (
"🎤 I received your voice message but can't transcribe it — "
"no speech-to-text provider is configured.\n\n"
"To enable voice: install faster-whisper "
"(`pip install faster-whisper` in the Hermes venv) "
"and set `stt.enabled: true` in config.yaml, "
"then /restart the gateway."
)
if self._has_setup_skill():
_stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
await _stt_adapter.send(
source.chat_id,
_stt_msg,
metadata=_stt_meta,
)
except Exception:
pass
if audio_file_paths:
from tools.credential_files import to_agent_visible_cache_path as _to_agent_path
for _apath in audio_file_paths:
_basename = os.path.basename(_apath)
_parts = _basename.split("_", 2)
_display = _parts[2] if len(_parts) >= 3 else _basename
_display = re.sub(r'[^\w.\- ]', '_', _display)
_agent_path = _to_agent_path(_apath)
_note = (
f"[The user sent an audio file attachment: '{_display}'. "
f"It is saved at: {_agent_path}. "
f"Ask the user what they'd like you to do with it, or pass the path to a transcription or media tool.]"
)
message_text = f"{_note}\n\n{message_text}"
if event.media_urls and event.message_type == MessageType.DOCUMENT:
import mimetypes as _mimetypes
from tools.credential_files import to_agent_visible_cache_path
_TEXT_EXTENSIONS = {".txt", ".md", ".csv", ".log", ".json", ".xml", ".yaml", ".yml", ".toml", ".ini", ".cfg"}
for i, path in enumerate(event.media_urls):
mtype = event.media_types[i] if i < len(event.media_types) else ""
if mtype in {"", "application/octet-stream"}:
_ext = os.path.splitext(path)[1].lower()
if _ext in _TEXT_EXTENSIONS:
mtype = "text/plain"
else:
guessed, _ = _mimetypes.guess_type(path)
if guessed:
mtype = guessed
if not mtype.startswith(("application/", "text/")):
continue
basename = os.path.basename(path)
parts = basename.split("_", 2)
display_name = parts[2] if len(parts) >= 3 else basename
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
agent_path = to_agent_visible_cache_path(path)
if mtype.startswith("text/"):
context_note = (
f"[The user sent a text document: '{display_name}'. "
f"Its content has been included below. "
f"The file is also saved at: {agent_path}]"
)
else:
context_note = (
f"[The user sent a document: '{display_name}'. "
f"The file is saved at: {agent_path}. "
f"Ask the user what they'd like you to do with it.]"
)
message_text = f"{context_note}\n\n{message_text}"
if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
reply_snippet = event.reply_to_text[:500]
message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
if "@" in message_text:
try:
from agent.context_references import preprocess_context_references_async
from agent.model_metadata import get_model_context_length
_msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
_msg_runtime = _resolve_runtime_agent_kwargs()
_msg_config_ctx = None
try:
_msg_cfg = _load_gateway_config()
_msg_model_cfg = _msg_cfg.get("model", {})
if isinstance(_msg_model_cfg, dict):
_msg_raw_ctx = _msg_model_cfg.get("context_length")
if _msg_raw_ctx is not None:
_msg_config_ctx = int(_msg_raw_ctx)
except Exception:
pass
_msg_ctx_len = get_model_context_length(
self._model,
base_url=self._base_url or _msg_runtime.get("base_url") or "",
api_key=_msg_runtime.get("api_key") or "",
config_context_length=_msg_config_ctx,
)
_ctx_result = await preprocess_context_references_async(
message_text,
cwd=_msg_cwd,
context_length=_msg_ctx_len,
allowed_root=_msg_cwd,
)
if _ctx_result.blocked:
_adapter = self.adapters.get(source.platform)
if _adapter:
await _adapter.send(
source.chat_id,
"\n".join(_ctx_result.warnings) or "Context injection refused.",
)
return None
if _ctx_result.expanded:
message_text = _ctx_result.message
except Exception as exc:
logger.debug("@ context reference expansion failed: %s", exc)
return message_text
def _consume_pending_native_image_paths(self, session_key: str) -> List[str]:
pending_native = getattr(self, "_pending_native_image_paths_by_session", None)
if not pending_native:
return []
return list(pending_native.pop(session_key, []) or [])
def _cache_session_source(self, session_key: str, source) -> None:
if not session_key or source is None:
return
cached_sources = getattr(self, "_session_sources", None)
if cached_sources is None:
cached_sources = OrderedDict()
self._session_sources = cached_sources
try:
cached_sources[session_key] = dataclasses.replace(source)
except Exception:
logger.debug("Failed to cache live session source for %s", session_key, exc_info=True)
return
try:
cached_sources.move_to_end(session_key)
max_size = getattr(self, "_session_sources_max", 512)
while len(cached_sources) > max_size:
cached_sources.popitem(last=False)
except Exception:
pass
def _get_cached_session_source(self, session_key: str):
if not session_key:
return None
cached_sources = getattr(self, "_session_sources", None)
if not cached_sources:
return None
source = cached_sources.get(session_key)
if source is not None:
try:
cached_sources.move_to_end(session_key)
except Exception:
pass
return source
async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int):
"""Inner handler that runs under the _running_agents sentinel guard."""
_msg_start_time = time.time()
_platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
_msg_preview = (event.text or "")[:80].replace("\n", " ")
logger.info(
"inbound message: platform=%s user=%s chat=%s msg=%r",
_platform_name, source.user_name or source.user_id or "unknown",
source.chat_id or "unknown", _msg_preview,
)
recovered = self._recover_telegram_topic_thread_id(source)
if recovered is not None:
logger.info(
"telegram topic recovery: chat=%s user=%s %r -> %s",
source.chat_id, source.user_id, source.thread_id, recovered,
)
source = dataclasses.replace(source, thread_id=recovered)
try:
event.source = source
except Exception:
pass
session_entry = self.session_store.get_or_create_session(source)
session_key = session_entry.session_key
self._cache_session_source(session_key, source)
if self._is_telegram_topic_lane(source):
try:
binding = self._session_db.get_telegram_topic_binding(
chat_id=str(source.chat_id),
thread_id=str(source.thread_id),
) if self._session_db else None
except Exception:
logger.debug("Failed to read Telegram topic binding", exc_info=True)
binding = None
if binding:
bound_session_id = str(binding.get("session_id") or "")
if bound_session_id and bound_session_id != session_entry.session_id:
switched = self.session_store.switch_session(session_key, bound_session_id)
if switched is not None:
session_entry = switched
else:
try:
self._record_telegram_topic_binding(source, session_entry)
except Exception:
logger.debug("Failed to record Telegram topic binding", exc_info=True)
if getattr(session_entry, "was_auto_reset", False):
self._session_model_overrides.pop(session_key, None)
self._set_session_reasoning_override(session_key, None)
if hasattr(self, "_pending_model_notes"):
self._pending_model_notes.pop(session_key, None)
_is_new_session = (
session_entry.created_at == session_entry.updated_at
or getattr(session_entry, "was_auto_reset", False)
or getattr(session_entry, "is_fresh_reset", False)
)
if getattr(session_entry, "is_fresh_reset", False):
session_entry.is_fresh_reset = False
if _is_new_session:
await self.hooks.emit("session:start", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_id": session_entry.session_id,
"session_key": session_key,
})
context = build_session_context(source, self.config, session_entry)
_session_env_tokens = self._set_session_env(context)
_redact_pii = False
try:
_pcfg = _load_gateway_config()
_redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
except Exception:
pass
context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
if getattr(session_entry, 'was_auto_reset', False):
reset_reason = getattr(session_entry, 'auto_reset_reason', None) or 'idle'
if reset_reason == "suspended":
context_note = "[System note: The user's previous session was stopped and suspended. This is a fresh conversation with no prior context.]"
elif reset_reason == "daily":
context_note = "[System note: The user's session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.]"
else:
context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
context_prompt = context_note + "\n\n" + context_prompt
try:
policy = self.session_store.config.get_reset_policy(
platform=source.platform,
session_type=getattr(source, 'chat_type', 'dm'),
)
platform_name = source.platform.value if source.platform else ""
had_activity = getattr(session_entry, 'reset_had_activity', False)
should_notify = reset_reason == "suspended" or (
policy.notify
and had_activity
and platform_name not in policy.notify_exclude_platforms
)
if should_notify:
adapter = self.adapters.get(source.platform)
if adapter:
if reset_reason == "suspended":
reason_text = "previous session was stopped or interrupted"
elif reset_reason == "daily":
reason_text = f"daily schedule at {policy.at_hour}:00"
else:
hours = policy.idle_minutes // 60
mins = policy.idle_minutes % 60
duration = f"{hours}h" if not mins else f"{hours}h {mins}m" if hours else f"{mins}m"
reason_text = f"inactive for {duration}"
notice = (
f"◐ Session automatically reset ({reason_text}). "
f"Conversation history cleared.\n"
f"Use /resume to browse and restore a previous session.\n"
f"Adjust reset timing in config.yaml under session_reset."
)
try:
session_info = self._format_session_info()
if session_info:
notice = f"{notice}\n\n{session_info}"
except Exception:
pass
await adapter.send(
source.chat_id, notice,
metadata=self._thread_metadata_for_source(source),
)
except Exception as e:
logger.debug("Auto-reset notification failed (non-fatal): %s", e)
session_entry.was_auto_reset = False
session_entry.auto_reset_reason = None
_auto = getattr(event, "auto_skill", None)
if _is_new_session and _auto:
_skill_names = [_auto] if isinstance(_auto, str) else list(_auto)
try:
from agent.skill_commands import _load_skill_payload, _build_skill_message
_combined_parts: list[str] = []
_loaded_names: list[str] = []
for _sname in _skill_names:
_loaded = _load_skill_payload(_sname, task_id=_quick_key)
if _loaded:
_loaded_skill, _skill_dir, _display_name = _loaded
_note = (
f'[IMPORTANT: The "{_display_name}" skill is auto-loaded. '
f"Follow its instructions for this session.]"
)
_part = _build_skill_message(_loaded_skill, _skill_dir, _note)
if _part:
_combined_parts.append(_part)
_loaded_names.append(_sname)
else:
logger.warning("[Gateway] Auto-skill '%s' not found", _sname)
if _combined_parts:
_combined_parts.append(event.text)
event.text = "\n\n".join(_combined_parts)
logger.info(
"[Gateway] Auto-loaded skill(s) %s for session %s",
_loaded_names, session_key,
)
except Exception as e:
logger.warning("[Gateway] Failed to auto-load skill(s) %s: %s", _skill_names, e)
history = self.session_store.load_transcript(session_entry.session_id)
if history and len(history) >= 4:
from agent.model_metadata import (
estimate_messages_tokens_rough,
get_model_context_length,
)
_hyg_model = "anthropic/claude-sonnet-4.6"
_hyg_threshold_pct = 0.85
_hyg_compression_enabled = True
_hyg_hard_msg_limit = 400
_hyg_config_context_length = None
_hyg_provider = None
_hyg_base_url = None
_hyg_api_key = None
_hyg_data = {}
try:
_hyg_data = _load_gateway_config()
if _hyg_data:
_model_cfg = _hyg_data.get("model", {})
if isinstance(_model_cfg, str):
_hyg_model = _model_cfg
elif isinstance(_model_cfg, dict):
_hyg_model = _model_cfg.get("default") or _model_cfg.get("model") or _hyg_model
_raw_ctx = _model_cfg.get("context_length")
if _raw_ctx is not None:
try:
_hyg_config_context_length = int(_raw_ctx)
except (TypeError, ValueError):
pass
_hyg_provider = _model_cfg.get("provider") or None
_hyg_base_url = _model_cfg.get("base_url") or None
_comp_cfg = _hyg_data.get("compression", {})
if isinstance(_comp_cfg, dict):
_hyg_compression_enabled = str(
_comp_cfg.get("enabled", True)
).lower() in {"true", "1", "yes"}
_raw_hard_limit = _comp_cfg.get("hygiene_hard_message_limit")
if _raw_hard_limit is not None:
try:
_parsed = int(_raw_hard_limit)
if _parsed > 0:
_hyg_hard_msg_limit = _parsed
except (TypeError, ValueError):
pass
try:
_hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
source=source,
session_key=session_key,
user_config=_hyg_data if isinstance(_hyg_data, dict) else None,
)
_hyg_provider = _hyg_runtime.get("provider") or _hyg_provider
_hyg_base_url = _hyg_runtime.get("base_url") or _hyg_base_url
_hyg_api_key = _hyg_runtime.get("api_key") or _hyg_api_key
except Exception:
pass
if _hyg_config_context_length is None and _hyg_base_url:
try:
try:
from hermes_cli.config import get_compatible_custom_providers as _gw_gcp
_hyg_custom_providers = _gw_gcp(_hyg_data)
except Exception:
_hyg_custom_providers = _hyg_data.get("custom_providers")
if not isinstance(_hyg_custom_providers, list):
_hyg_custom_providers = []
for _cp in _hyg_custom_providers:
if not isinstance(_cp, dict):
continue
_cp_url = (_cp.get("base_url") or "").rstrip("/")
if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
_cp_models = _cp.get("models", {})
if isinstance(_cp_models, dict):
_cp_model_cfg = _cp_models.get(_hyg_model, {})
if isinstance(_cp_model_cfg, dict):
_cp_ctx = _cp_model_cfg.get("context_length")
if _cp_ctx is not None:
_hyg_config_context_length = int(_cp_ctx)
break
except (TypeError, ValueError):
pass
except Exception:
pass
if _hyg_compression_enabled:
_hyg_context_length = get_model_context_length(
_hyg_model,
base_url=_hyg_base_url or "",
api_key=_hyg_api_key or "",
config_context_length=_hyg_config_context_length,
provider=_hyg_provider or "",
)
_compress_token_threshold = int(
_hyg_context_length * _hyg_threshold_pct
)
_warn_token_threshold = int(_hyg_context_length * 0.95)
_msg_count = len(history)
_stored_tokens = session_entry.last_prompt_tokens
if _stored_tokens > 0:
_approx_tokens = _stored_tokens
_token_source = "actual"
else:
_approx_tokens = estimate_messages_tokens_rough(history)
_token_source = "estimated"
_HARD_MSG_LIMIT = _hyg_hard_msg_limit
_needs_compress = (
_approx_tokens >= _compress_token_threshold
or _msg_count >= _HARD_MSG_LIMIT
)
if _needs_compress:
logger.info(
"Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
"(threshold: %s%% of %s = %s tokens)",
_msg_count, f"{_approx_tokens:,}", _token_source,
int(_hyg_threshold_pct * 100),
f"{_hyg_context_length:,}",
f"{_compress_token_threshold:,}",
)
_hyg_meta = self._thread_metadata_for_source(source, self._reply_anchor_for_event(event))
try:
from run_agent import AIAgent
_hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
source=source,
session_key=session_key,
user_config=_hyg_data if isinstance(_hyg_data, dict) else None,
)
if _hyg_runtime.get("api_key"):
_hyg_msgs = [
{"role": m.get("role"), "content": m.get("content")}
for m in history
if m.get("role") in {"user", "assistant"}
and m.get("content")
]
if len(_hyg_msgs) >= 4:
_hyg_agent = AIAgent(
**_hyg_runtime,
model=_hyg_model,
max_iterations=4,
quiet_mode=True,
skip_memory=True,
enabled_toolsets=["memory"],
session_id=session_entry.session_id,
)
try:
_hyg_agent._print_fn = lambda *a, **kw: None
loop = asyncio.get_running_loop()
_compressed, _ = await loop.run_in_executor(
None,
lambda: _hyg_agent._compress_context(
_hyg_msgs, "",
approx_tokens=_approx_tokens,
),
)
_hyg_new_sid = _hyg_agent.session_id
if _hyg_new_sid != session_entry.session_id:
session_entry.session_id = _hyg_new_sid
self.session_store._save()
self.session_store.rewrite_transcript(
session_entry.session_id, _compressed
)
session_entry.last_prompt_tokens = 0
history = _compressed
_new_count = len(_compressed)
_new_tokens = estimate_messages_tokens_rough(
_compressed
)
logger.info(
"Session hygiene: compressed %s → %s msgs, "
"~%s → ~%s tokens",
_msg_count, _new_count,
f"{_approx_tokens:,}", f"{_new_tokens:,}",
)
if _new_tokens >= _warn_token_threshold:
logger.warning(
"Session hygiene: still ~%s tokens after "
"compression",
f"{_new_tokens:,}",
)
_comp = getattr(_hyg_agent, "context_compressor", None)
if _comp is not None and getattr(_comp, "_last_compress_aborted", False):
_err = getattr(_comp, "_last_summary_error", None) or "unknown error"
_warn_msg = (
"⚠️ Context compression aborted "
f"({_err}). No messages were dropped — "
"conversation is unchanged. Run /compress "
"to retry, /reset for a clean session, or "
"check your auxiliary.compression model "
"configuration."
)
try:
_adapter = self.adapters.get(source.platform)
if _adapter and source.chat_id:
await _adapter.send(source.chat_id, _warn_msg, metadata=_hyg_meta)
except Exception as _werr:
logger.warning(
"Failed to deliver compression-failure warning to user: %s",
_werr,
)
elif _comp is not None and getattr(_comp, "_last_aux_model_failure_model", None):
_aux_model = getattr(_comp, "_last_aux_model_failure_model", "")
_aux_err = getattr(_comp, "_last_aux_model_failure_error", None) or "unknown error"
_aux_msg = (
f"ℹ️ Configured compression model `{_aux_model}` "
f"failed ({_aux_err}). Recovered using your main "
"model — context is intact — but you may want to "
"check `auxiliary.compression.model` in config.yaml."
)
try:
_adapter = self.adapters.get(source.platform)
if _adapter and source.chat_id:
await _adapter.send(source.chat_id, _aux_msg, metadata=_hyg_meta)
except Exception as _werr:
logger.warning(
"Failed to deliver aux-model-fallback notice to user: %s",
_werr,
)
finally:
self._evict_cached_agent(session_key)
self._cleanup_agent_resources(_hyg_agent)
except Exception as e:
logger.warning(
"Session hygiene auto-compress failed: %s", e
)
if not history and not self.session_store.has_any_sessions():
context_prompt += (
"\n\n[System note: This is the user's very first message ever. "
"Briefly introduce yourself and mention that /help shows available commands. "
"Keep the introduction concise -- one or two sentences max.]"
)
if not history and source.platform and source.platform != Platform.LOCAL and source.platform != Platform.WEBHOOK:
platform_name = source.platform.value
env_key = _home_target_env_var(platform_name)
if not os.getenv(env_key):
sethome_cmd = (
"/hermes sethome"
if source.platform == Platform.SLACK
else "/sethome"
)
notice = (
f"📬 No home channel is set for {platform_name.title()}. "
f"A home channel is where Hermes delivers cron job results "
f"and cross-platform messages.\n\n"
f"Type {sethome_cmd} to make this chat your home channel, "
f"or ignore to skip."
)
await self._deliver_platform_notice(source, notice)
if source.platform == Platform.DISCORD:
adapter = self.adapters.get(Platform.DISCORD)
guild_id = self._get_guild_id(event)
if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
vc_context = adapter.get_voice_channel_context(guild_id)
if vc_context:
context_prompt += f"\n\n{vc_context}"
message_text = await self._prepare_inbound_message_text(
event=event,
source=source,
history=history,
)
if message_text is None:
return
self._bind_adapter_run_generation(
self.adapters.get(source.platform),
session_key,
run_generation,
)
try:
hook_ctx = {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"chat_id": source.chat_id or "",
"session_id": session_entry.session_id,
"message": message_text[:500],
}
await self.hooks.emit("agent:start", hook_ctx)
agent_result = await self._run_agent(
message=message_text,
context_prompt=context_prompt,
history=history,
source=source,
session_id=session_entry.session_id,
session_key=session_key,
run_generation=run_generation,
event_message_id=self._reply_anchor_for_event(event),
channel_prompt=event.channel_prompt,
)
try:
_typing_adapter = self.adapters.get(source.platform)
if _typing_adapter and hasattr(_typing_adapter, "stop_typing"):
await _typing_adapter.stop_typing(source.chat_id)
except Exception:
pass
if not self._is_session_run_current(_quick_key, run_generation):
logger.info(
"Discarding stale agent result for %s — generation %d is no longer current",
_quick_key or "?",
run_generation,
)
_stale_adapter = self.adapters.get(source.platform)
if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None:
_stale_adapter.pop_post_delivery_callback(
_quick_key,
generation=run_generation,
)
elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"):
_stale_adapter._post_delivery_callbacks.pop(_quick_key, None)
return None
response = agent_result.get("final_response") or ""
if response == "(empty)":
response = (
"⚠️ The model returned no response after processing tool "
"results. This can happen with some models — try again or "
"rephrase your question."
)
agent_messages = agent_result.get("messages", [])
_response_time = time.time() - _msg_start_time
_api_calls = agent_result.get("api_calls", 0)
_resp_len = len(response)
logger.info(
"response ready: platform=%s chat=%s time=%.1fs api_calls=%d response=%d chars",
_platform_name, source.chat_id or "unknown",
_response_time, _api_calls, _resp_len,
)
if session_key and _should_clear_resume_pending_after_turn(agent_result):
self._clear_restart_failure_count(session_key)
try:
self.session_store.clear_resume_pending(session_key)
except Exception as _e:
logger.debug(
"clear_resume_pending failed for %s: %s",
session_key, _e,
)
response = _normalize_empty_agent_response(
agent_result, response, history_len=len(history),
)
response = _sanitize_gateway_final_response(source.platform, response)
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
session_entry.session_id = agent_result["session_id"]
try:
from gateway.display_config import resolve_display_setting as _rds
_show_reasoning_effective = _rds(
_load_gateway_config(),
_platform_config_key(source.platform),
"show_reasoning",
getattr(self, "_show_reasoning", False),
)
except Exception:
_show_reasoning_effective = getattr(self, "_show_reasoning", False)
if _show_reasoning_effective and response:
last_reasoning = agent_result.get("last_reasoning")
if last_reasoning:
lines = last_reasoning.strip().splitlines()
if len(lines) > 15:
display_reasoning = "\n".join(lines[:15])
display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
else:
display_reasoning = last_reasoning.strip()
response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
_footer_line = ""
try:
from gateway.runtime_footer import build_footer_line as _bfl
_footer_line = _bfl(
user_config=_load_gateway_config(),
platform_key=_platform_config_key(source.platform),
model=agent_result.get("model"),
context_tokens=agent_result.get("last_prompt_tokens", 0) or 0,
context_length=agent_result.get("context_length") or None,
cwd=os.environ.get("TERMINAL_CWD", ""),
)
except Exception as _footer_err:
logger.debug("runtime_footer build failed: %s", _footer_err)
_footer_line = ""
if _footer_line and response and not agent_result.get("already_sent"):
response = f"{response}\n\n{_footer_line}"
await self.hooks.emit("agent:end", {
**hook_ctx,
"response": (response or "")[:500],
})
try:
from tools.process_registry import process_registry
while process_registry.pending_watchers:
watcher = process_registry.pending_watchers.pop(0)
asyncio.create_task(self._run_process_watcher(watcher))
except Exception as e:
logger.error("Process watcher setup error: %s", e)
try:
from tools.process_registry import process_registry as _pr
_watch_events = []
while not _pr.completion_queue.empty():
evt = _pr.completion_queue.get_nowait()
evt_type = evt.get("type", "completion")
if evt_type in {"watch_match", "watch_disabled"}:
_watch_events.append(evt)
for evt in _watch_events:
synth_text = _format_gateway_process_notification(evt)
if synth_text:
try:
await self._inject_watch_notification(synth_text, evt)
except Exception as e2:
logger.error("Watch notification injection error: %s", e2)
except Exception as e:
logger.debug("Watch queue drain error: %s", e)
agent_failed_early = bool(agent_result.get("failed"))
_err_str_for_classify = str(agent_result.get("error", "")).lower()
is_context_overflow_failure = agent_failed_early and (
bool(agent_result.get("compression_exhausted"))
or any(p in _err_str_for_classify for p in (
"context length", "context size", "context window",
"maximum context", "token limit", "too many tokens",
"reduce the length", "exceeds the limit",
"request entity too large", "prompt is too long",
"payload too large", "input is too long",
))
or ("400" in _err_str_for_classify and len(history) > 50)
)
if is_context_overflow_failure:
logger.info(
"Skipping transcript persistence for context-overflow "
"failure in session %s to prevent session growth loop.",
session_entry.session_id,
)
elif agent_failed_early:
logger.info(
"Transient agent failure in session %s — persisting user "
"message so conversation context is preserved on retry.",
session_entry.session_id,
)
if agent_result.get("compression_exhausted") and session_entry and session_key:
logger.info(
"Auto-resetting session %s after compression exhaustion.",
session_entry.session_id,
)
self.session_store.reset_session(session_key)
self._evict_cached_agent(session_key)
self._session_model_overrides.pop(session_key, None)
self._set_session_reasoning_override(session_key, None)
if hasattr(self, "_pending_model_notes"):
self._pending_model_notes.pop(session_key, None)
response = (response or "") + (
"\n\n🔄 Session auto-reset — the conversation exceeded the "
"maximum context size and could not be compressed further. "
"Your next message will start a fresh session."
)
ts = datetime.now().isoformat()
if is_context_overflow_failure:
pass
elif not history:
tool_defs = agent_result.get("tools", [])
self.session_store.append_to_transcript(
session_entry.session_id,
{
"role": "session_meta",
"tools": tool_defs or [],
"model": _resolve_gateway_model(),
"platform": source.platform.value if source.platform else "",
"timestamp": ts,
}
)
if is_context_overflow_failure:
pass
elif agent_failed_early:
_user_entry = {"role": "user", "content": message_text, "timestamp": ts}
if event.message_id:
_user_entry["message_id"] = str(event.message_id)
self.session_store.append_to_transcript(
session_entry.session_id,
_user_entry,
)
else:
history_len = agent_result.get("history_offset", len(history))
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
if not new_messages:
_user_entry = {"role": "user", "content": message_text, "timestamp": ts}
if event.message_id:
_user_entry["message_id"] = str(event.message_id)
self.session_store.append_to_transcript(
session_entry.session_id,
_user_entry,
)
if response:
self.session_store.append_to_transcript(
session_entry.session_id,
{"role": "assistant", "content": response, "timestamp": ts}
)
else:
agent_persisted = self._session_db is not None
_user_msg_id_attached = False
for msg in new_messages:
if msg.get("role") == "system":
continue
entry = {**msg, "timestamp": ts}
if (
not _user_msg_id_attached
and msg.get("role") == "user"
and event.message_id
and "message_id" not in entry
):
entry["message_id"] = str(event.message_id)
_user_msg_id_attached = True
self.session_store.append_to_transcript(
session_entry.session_id, entry,
skip_db=agent_persisted,
)
self.session_store.update_session(
session_entry.session_key,
last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
)
_already_sent = bool(agent_result.get("already_sent"))
if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
await self._send_voice_reply(event, response)
if agent_result.get("already_sent") and not agent_result.get("failed"):
if response:
_media_adapter = self.adapters.get(source.platform)
if _media_adapter:
await self._deliver_media_from_response(
response, event, _media_adapter,
)
if _footer_line:
try:
_foot_adapter = self.adapters.get(source.platform)
if _foot_adapter:
await _foot_adapter.send(
source.chat_id,
_footer_line,
metadata=self._thread_metadata_for_source(source, self._reply_anchor_for_event(event)),
)
except Exception as _e:
logger.debug("trailing footer send failed: %s", _e)
return None
return response
except Exception as e:
try:
_err_adapter = self.adapters.get(source.platform)
if _err_adapter and hasattr(_err_adapter, "stop_typing"):
await _err_adapter.stop_typing(source.chat_id)
except Exception:
pass
logger.exception("Agent error in session %s", session_key)
error_type = type(e).__name__
error_detail = str(e)[:300] if str(e) else "no details available"
status_hint = ""
status_code = getattr(e, "status_code", None)
_hist_len = len(history) if 'history' in locals() else 0
if status_code == 401:
status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
elif status_code == 402:
status_hint = " Your API balance or quota is exhausted. Check your provider dashboard."
elif status_code == 429:
_err_body = getattr(e, "response", None)
_err_json = {}
try:
if _err_body is not None:
_err_json = _err_body.json().get("error", {})
if not isinstance(_err_json, dict):
_err_json = {}
except Exception:
pass
if _err_json.get("type") == "usage_limit_reached":
_resets_in = _err_json.get("resets_in_seconds")
if _resets_in and _resets_in > 0:
import math
_hours = math.ceil(_resets_in / 3600)
status_hint = f" Your plan's usage limit has been reached. It resets in ~{_hours}h."
else:
status_hint = " Your plan's usage limit has been reached. Please wait until it resets."
else:
status_hint = " You are being rate-limited. Please wait a moment and try again."
elif status_code == 529:
status_hint = " The API is temporarily overloaded. Please try again shortly."
elif status_code in {400, 500}:
if _hist_len > 50:
return (
"⚠️ Session too large for the model's context window.\n"
"Use /compact to compress the conversation, or "
"/reset to start fresh."
)
elif status_code == 400:
status_hint = " The request was rejected by the API."
return (
f"Sorry, I encountered an error ({error_type}).\n"
f"{error_detail}\n"
f"{status_hint}"
"Try again or use /reset to start a fresh session."
)
finally:
self._clear_session_env(_session_env_tokens)
def _format_session_info(self) -> str:
"""Resolve current model config and return a formatted info block.
Surfaces model, provider, context length, and endpoint so gateway
users can immediately see if context detection went wrong (e.g.
local models falling to the 128K default).
"""
from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
model = _resolve_gateway_model()
config_context_length = None
provider = None
base_url = None
api_key = None
custom_provs = None
data = None
try:
data = _load_gateway_config()
if data:
model_cfg = data.get("model", {})
if isinstance(model_cfg, dict):
raw_ctx = model_cfg.get("context_length")
if raw_ctx is not None:
try:
config_context_length = int(raw_ctx)
except (TypeError, ValueError):
pass
provider = model_cfg.get("provider") or None
base_url = model_cfg.get("base_url") or None
try:
from hermes_cli.config import get_compatible_custom_providers
custom_provs = get_compatible_custom_providers(data)
except Exception:
custom_provs = data.get("custom_providers")
except Exception:
pass
if config_context_length is None and data:
try:
custom_providers = data.get("custom_providers", [])
if custom_providers:
for cp in custom_providers:
if not isinstance(cp, dict):
continue
cp_model = cp.get("model") or ""
cp_models = cp.get("models") or {}
if cp_model and cp_model == model:
raw_cp_ctx = cp.get("context_length")
if raw_cp_ctx is not None:
try:
config_context_length = int(raw_cp_ctx)
break
except (TypeError, ValueError):
pass
if isinstance(cp_models, dict):
model_entry = cp_models.get(model)
if isinstance(model_entry, dict):
model_ctx = model_entry.get("context_length")
else:
model_ctx = model_entry
if model_ctx is not None and isinstance(model_ctx, (int, float)):
try:
config_context_length = int(model_ctx)
break
except (TypeError, ValueError):
pass
except Exception:
pass
try:
runtime = _resolve_runtime_agent_kwargs()
provider = provider or runtime.get("provider")
base_url = base_url or runtime.get("base_url")
api_key = runtime.get("api_key")
except Exception:
pass
context_length = get_model_context_length(
model,
base_url=base_url or "",
api_key=api_key or "",
config_context_length=config_context_length,
provider=provider or "",
custom_providers=custom_provs,
)
if config_context_length is not None:
ctx_source = "config"
elif context_length == DEFAULT_FALLBACK_CONTEXT:
ctx_source = "default — set model.context_length in config to override"
else:
ctx_source = "detected"
if context_length >= 1_000_000:
ctx_display = f"{context_length / 1_000_000:.1f}M"
elif context_length >= 1_000:
ctx_display = f"{context_length // 1_000}K"
else:
ctx_display = str(context_length)
lines = [
f"◆ Model: `{model}`",
f"◆ Provider: {provider or 'openrouter'}",
f"◆ Context: {ctx_display} tokens ({ctx_source})",
]
if base_url and ("localhost" in base_url or "127.0.0.1" in base_url or "0.0.0.0" in base_url):
lines.append(f"◆ Endpoint: {base_url}")
return "\n".join(lines)
async def _handle_reset_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
"""Handle /new or /reset command."""
source = event.source
session_key = self._session_key_for_source(source)
self._invalidate_session_run_generation(session_key, reason="session_reset")
old_entry = self.session_store._entries.get(session_key)
_cache_lock = getattr(self, "_agent_cache_lock", None)
if _cache_lock is not None:
with _cache_lock:
_cached = self._agent_cache.get(session_key)
_old_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
if _old_agent is not None:
self._cleanup_agent_resources(_old_agent)
self._evict_cached_agent(session_key)
_qe = getattr(self, "_queued_events", None)
if _qe is not None:
_qe.pop(session_key, None)
try:
from tools.env_passthrough import clear_env_passthrough
clear_env_passthrough()
except Exception:
pass
try:
from tools.credential_files import clear_credential_files
clear_credential_files()
except Exception:
pass
new_entry = self.session_store.reset_session(session_key)
self._session_model_overrides.pop(session_key, None)
self._set_session_reasoning_override(session_key, None)
if hasattr(self, "_pending_model_notes"):
self._pending_model_notes.pop(session_key, None)
self._clear_session_boundary_security_state(session_key)
try:
from hermes_cli.plugins import invoke_hook as _invoke_hook
_old_sid = old_entry.session_id if old_entry else None
_invoke_hook("on_session_finalize", session_id=_old_sid,
platform=source.platform.value if source.platform else "")
except Exception:
pass
await self.hooks.emit("session:end", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_key": session_key,
})
await self.hooks.emit("session:reset", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_key": session_key,
})
try:
session_info = self._format_session_info()
except Exception:
session_info = ""
if new_entry:
header = self._telegram_topic_new_header(source) or t("gateway.reset.header_default")
else:
new_entry = self.session_store.get_or_create_session(source, force_new=True)
header = self._telegram_topic_new_header(source) or t("gateway.reset.header_new")
_title_arg = event.get_command_args().strip()
_title_note = ""
if _title_arg and self._session_db and new_entry:
from hermes_state import SessionDB
try:
sanitized = SessionDB.sanitize_title(_title_arg)
except ValueError as e:
sanitized = None
_title_note = t("gateway.reset.title_rejected", error=str(e))
if sanitized:
try:
self._session_db.set_session_title(new_entry.session_id, sanitized)
header = t("gateway.reset.header_titled", title=sanitized)
except ValueError as e:
_title_note = t("gateway.reset.title_error_untitled", error=str(e))
except Exception:
pass
elif not _title_note:
_title_note = t("gateway.reset.title_empty_untitled")
header = header + _title_note
if self._is_telegram_topic_lane(source) and new_entry is not None:
try:
self._record_telegram_topic_binding(source, new_entry)
except Exception:
logger.debug("Failed to rebind Telegram topic after /new", exc_info=True)
try:
from hermes_cli.plugins import invoke_hook as _invoke_hook
_new_sid = new_entry.session_id if new_entry else None
_invoke_hook("on_session_reset", session_id=_new_sid,
platform=source.platform.value if source.platform else "")
except Exception:
pass
try:
from hermes_cli.tips import get_random_tip
_tip_line = t("gateway.reset.tip", tip=get_random_tip())
except Exception:
_tip_line = ""
if session_info:
return EphemeralReply(f"{header}\n\n{session_info}{_tip_line}")
return EphemeralReply(f"{header}{_tip_line}")
async def _handle_profile_command(self, event: MessageEvent) -> str:
"""Handle /profile — show active profile name and home directory."""
from hermes_constants import display_hermes_home
from hermes_cli.profiles import get_active_profile_name
display = display_hermes_home()
profile_name = get_active_profile_name()
lines = [
t("gateway.profile.header", profile=profile_name),
t("gateway.profile.home", home=display),
]
return "\n".join(lines)
def _check_slash_access(
self, source: SessionSource, canonical_cmd: str
) -> Optional[str]:
"""Return a denial message if ``source`` cannot run ``canonical_cmd``,
else None. Used by both the cold and running-agent dispatch paths
in ``_handle_message`` so admin/user gating can't be bypassed by
an in-flight agent.
Backward-compat semantics live in
:func:`gateway.slash_access.policy_for_source` — when the operator
hasn't set ``allow_admin_from`` for the scope, the policy returns
``enabled=False`` and this method always returns None.
"""
from gateway.slash_access import policy_for_source as _policy_for_source
if not canonical_cmd:
return None
policy = _policy_for_source(self.config, source)
if not policy.enabled or policy.can_run(source.user_id, canonical_cmd):
return None
logger.info(
"Slash command /%s denied for %s:%s (not admin, not in user_allowed_commands)",
canonical_cmd,
source.platform.value if source.platform else "?",
source.user_id,
)
allowed_preview = sorted(policy.user_allowed_commands)
if allowed_preview:
suffix = (
"You can run: "
+ ", ".join(f"/{c}" for c in allowed_preview[:12])
+ ("…" if len(allowed_preview) > 12 else "")
+ ". Use /whoami for the full list."
)
else:
suffix = (
"No slash commands are enabled for non-admins on this "
"platform. Ask an admin to add you to allow_admin_from "
"or to set user_allowed_commands."
)
return f"⛔ /{canonical_cmd} is admin-only here. {suffix}"
async def _handle_whoami_command(self, event: MessageEvent) -> str:
"""Handle /whoami — show the user's slash command access on this scope.
Always works (it's in the always-allowed floor of slash_access).
Reports: platform, scope (DM vs group), the user's tier
(admin / user / unrestricted), and the slash commands they can
actually run on this scope.
"""
from gateway.slash_access import policy_for_source as _policy_for_source
source = event.source
policy = _policy_for_source(self.config, source)
platform = source.platform.value if source and source.platform else "?"
chat_type = (source.chat_type if source else "") or "dm"
scope = "DM" if chat_type.lower() in {"dm", "direct", "private", ""} else "group/channel"
user_id = (source.user_id if source else None) or "?"
if not policy.enabled:
return (
f"**You** — {platform} ({scope})\n"
f"User ID: `{user_id}`\n"
f"Tier: unrestricted (no admin list configured for this scope)\n"
f"Slash commands: all available"
)
if policy.is_admin(user_id):
return (
f"**You** — {platform} ({scope})\n"
f"User ID: `{user_id}`\n"
f"Tier: **admin**\n"
f"Slash commands: all available"
)
floor = ["help", "whoami"]
configured = sorted(policy.user_allowed_commands)
seen: set[str] = set()
runnable: list[str] = []
for c in floor + configured:
if c not in seen:
seen.add(c)
runnable.append(c)
runnable_str = ", ".join(f"/{c}" for c in runnable) if runnable else "(none)"
return (
f"**You** — {platform} ({scope})\n"
f"User ID: `{user_id}`\n"
f"Tier: user\n"
f"Slash commands you can run: {runnable_str}"
)
async def _handle_kanban_command(self, event: MessageEvent) -> str:
"""Handle /kanban — delegate to the shared kanban CLI.
Run the potentially-blocking DB work in a thread pool so the
gateway event loop stays responsive. Read operations (list,
show, context, tail) are permitted while an agent is running;
mutations are allowed too because the board is profile-agnostic
and does not touch the running agent's state.
For ``/kanban create`` invocations we also auto-subscribe the
originating gateway source (platform + chat + thread) to the new
task's terminal events, so the user hears back when the worker
completes / blocks / auto-blocks / crashes without having to poll.
"""
import asyncio
import re
import shlex
from hermes_cli.kanban import run_slash
text = (event.text or "").strip()
if text.startswith("/"):
text = text.lstrip("/")
if text.startswith("kanban"):
text = text[len("kanban"):].lstrip()
tokens = shlex.split(text) if text else []
requested_board = None
action = None
i = 0
while i < len(tokens):
tok = tokens[i]
if tok == "--board":
if i + 1 >= len(tokens):
break
requested_board = tokens[i + 1]
i += 2
continue
if tok.startswith("--board="):
requested_board = tok.split("=", 1)[1]
i += 1
continue
action = tok
break
is_create = action == "create"
try:
output = await asyncio.to_thread(run_slash, text)
except Exception as exc:
return t("gateway.kanban.error_prefix", error=exc)
if is_create and output:
m = re.search(r"Created\s+(t_[0-9a-f]+)\b", output)
if m:
task_id = m.group(1)
try:
source = event.source
platform = getattr(source, "platform", None)
platform_str = (
platform.value if hasattr(platform, "value") else str(platform or "")
).lower()
chat_id = str(getattr(source, "chat_id", "") or "")
thread_id = str(getattr(source, "thread_id", "") or "")
user_id = str(getattr(source, "user_id", "") or "") or None
if platform_str and chat_id:
def _sub():
from hermes_cli import kanban_db as _kb
conn = _kb.connect(board=requested_board)
try:
_kb.add_notify_sub(
conn, task_id=task_id,
platform=platform_str, chat_id=chat_id,
thread_id=thread_id or None,
user_id=user_id,
notifier_profile=getattr(self, "_kanban_notifier_profile", None) or self._active_profile_name(),
)
finally:
conn.close()
await asyncio.to_thread(_sub)
output = (
output.rstrip()
+ "\n"
+ t("gateway.kanban.subscribed_suffix", task_id=task_id)
)
except Exception as exc:
logger.warning("kanban create auto-subscribe failed: %s", exc)
if len(output) > 3800:
output = output[:3800] + "\n" + t("gateway.kanban.truncated_suffix")
return output or t("gateway.kanban.no_output")
async def _handle_status_command(self, event: MessageEvent) -> str:
"""Handle /status command."""
source = event.source
session_entry = self.session_store.get_or_create_session(source)
connected_platforms = [p.value for p in self.adapters.keys()]
session_key = session_entry.session_key
is_running = session_key in self._running_agents
adapter = self.adapters.get(source.platform) if source else None
queue_depth = self._queue_depth(session_key, adapter=adapter)
title = None
db_total_tokens = 0
if self._session_db:
try:
title = self._session_db.get_session_title(session_entry.session_id)
except Exception:
title = None
try:
row = self._session_db.get_session(session_entry.session_id)
if row:
db_total_tokens = (
(row.get("input_tokens") or 0)
+ (row.get("output_tokens") or 0)
+ (row.get("cache_read_tokens") or 0)
+ (row.get("cache_write_tokens") or 0)
+ (row.get("reasoning_tokens") or 0)
)
except Exception:
db_total_tokens = 0
lines = [
t("gateway.status.header"),
"",
t("gateway.status.session_id", session_id=session_entry.session_id),
]
if title:
lines.append(t("gateway.status.title", title=title))
lines.extend([
t("gateway.status.created", timestamp=session_entry.created_at.strftime('%Y-%m-%d %H:%M')),
t("gateway.status.last_activity", timestamp=session_entry.updated_at.strftime('%Y-%m-%d %H:%M')),
t("gateway.status.tokens", tokens=f"{db_total_tokens:,}"),
t("gateway.status.agent_running", state=t("gateway.status.state_yes") if is_running else t("gateway.status.state_no")),
])
if queue_depth:
lines.append(t("gateway.status.queued", count=queue_depth))
lines.extend([
"",
t("gateway.status.platforms", platforms=', '.join(connected_platforms)),
])
try:
from hermes_cli.session_recap import build_recap
history = self.session_store.load_transcript(session_entry.session_id)
recap = build_recap(
history,
session_title=title,
session_id=session_entry.session_id,
platform=source.platform.value if source else None,
)
if recap:
lines.extend(["", recap])
except Exception as exc:
logger.debug("build_recap failed in /status: %s", exc)
return "\n".join(lines)
async def _handle_agents_command(self, event: MessageEvent) -> str:
"""Handle /agents command - list active agents and running tasks."""
from tools.process_registry import format_uptime_short, process_registry
now = time.time()
current_session_key = self._session_key_for_source(event.source)
running_agents: dict = getattr(self, "_running_agents", {}) or {}
running_started: dict = getattr(self, "_running_agents_ts", {}) or {}
agent_rows: list[dict] = []
for session_key, agent in running_agents.items():
started = float(running_started.get(session_key, now))
elapsed = max(0, int(now - started))
is_pending = agent is _AGENT_PENDING_SENTINEL
agent_rows.append(
{
"session_key": session_key,
"elapsed": elapsed,
"state": t("gateway.agents.state_starting") if is_pending else t("gateway.agents.state_running"),
"session_id": "" if is_pending else str(getattr(agent, "session_id", "") or ""),
"model": "" if is_pending else str(getattr(agent, "model", "") or ""),
}
)
agent_rows.sort(key=lambda row: row["elapsed"], reverse=True)
running_processes: list[dict] = []
try:
running_processes = [
p for p in process_registry.list_sessions()
if p.get("status") == "running"
]
except Exception:
running_processes = []
background_tasks = [
t for t in (getattr(self, "_background_tasks", set()) or set())
if hasattr(t, "done") and not t.done()
]
lines = [
t("gateway.agents.header"),
"",
t("gateway.agents.active_agents", count=len(agent_rows)),
]
if agent_rows:
for idx, row in enumerate(agent_rows[:12], 1):
current = t("gateway.agents.this_chat") if row["session_key"] == current_session_key else ""
sid = f" · `{row['session_id']}`" if row["session_id"] else ""
model = f" · `{row['model']}`" if row["model"] else ""
lines.append(
f"{idx}. `{row['session_key']}` · {row['state']} · "
f"{format_uptime_short(row['elapsed'])}{sid}{model}{current}"
)
if len(agent_rows) > 12:
lines.append(t("gateway.agents.more", count=len(agent_rows) - 12))
lines.extend(
[
"",
t("gateway.agents.running_processes", count=len(running_processes)),
]
)
if running_processes:
for proc in running_processes[:12]:
cmd = " ".join(str(proc.get("command", "")).split())
if len(cmd) > 90:
cmd = cmd[:87] + "..."
lines.append(
f"- `{proc.get('session_id', '?')}` · "
f"{format_uptime_short(int(proc.get('uptime_seconds', 0)))} · `{cmd}`"
)
if len(running_processes) > 12:
lines.append(t("gateway.agents.more", count=len(running_processes) - 12))
lines.extend(
[
"",
t("gateway.agents.async_jobs", count=len(background_tasks)),
]
)
if not agent_rows and not running_processes and not background_tasks:
lines.append("")
lines.append(t("gateway.agents.none"))
return "\n".join(lines)
async def _handle_stop_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
"""Handle /stop command - interrupt a running agent.
When an agent is truly hung (blocked thread that never checks
_interrupt_requested), the early intercept in _handle_message()
handles /stop before this method is reached. This handler fires
only through normal command dispatch (no running agent) or as a
fallback. Force-clean the session lock in all cases for safety.
The session is preserved so the user can continue the conversation.
"""
source = event.source
session_entry = self.session_store.get_or_create_session(source)
session_key = session_entry.session_key
agent = self._running_agents.get(session_key)
if agent is _AGENT_PENDING_SENTINEL:
await self._interrupt_and_clear_session(
session_key,
source,
interrupt_reason=_INTERRUPT_REASON_STOP,
invalidation_reason="stop_command_pending",
)
logger.info("STOP (pending) for session %s — sentinel cleared", session_key)
return EphemeralReply(t("gateway.stop.stopped_pending"))
if agent:
await self._interrupt_and_clear_session(
session_key,
source,
interrupt_reason=_INTERRUPT_REASON_STOP,
invalidation_reason="stop_command_handler",
)
return EphemeralReply(t("gateway.stop.stopped"))
else:
return t("gateway.stop.no_active")
async def _handle_platform_command(self, event: MessageEvent) -> str:
"""Handle ``/platform list|pause|resume [name]`` — surface and
manually control failed/paused gateway adapters.
Examples:
``/platform list`` — show connected + failed/paused platforms
``/platform pause whatsapp`` — stop the reconnect watcher hammering whatsapp
``/platform resume whatsapp`` — re-queue a paused platform for retry
"""
text = (getattr(event, "content", "") or "").strip()
parts = text.split(maxsplit=2)
if parts and parts[0].lower().lstrip("/").startswith("platform"):
parts = parts[1:]
action = (parts[0] if parts else "list").lower()
target = parts[1].lower() if len(parts) > 1 else ""
def _resolve_platform(name: str):
if not name:
return None
for p in Platform.__members__.values():
if p.value.lower() == name:
return p
return None
if action == "list":
lines = ["**Gateway platforms**"]
connected = sorted(p.value for p in self.adapters.keys())
if connected:
lines.append("Connected: " + ", ".join(connected))
else:
lines.append("Connected: (none)")
failed = getattr(self, "_failed_platforms", {}) or {}
if failed:
for p, info in failed.items():
if info.get("paused"):
reason = info.get("pause_reason") or "paused"
lines.append(
f" · {p.value} — PAUSED ({reason}). "
f"Resume with `/platform resume {p.value}`."
)
else:
attempts = info.get("attempts", 0)
lines.append(
f" · {p.value} — retrying (attempt {attempts})"
)
else:
lines.append("Failed/paused: (none)")
return "\n".join(lines)
if action in {"pause", "resume"}:
if not target:
return f"Usage: /platform {action} <name>"
platform = _resolve_platform(target)
if platform is None:
return f"Unknown platform: {target}"
failed = getattr(self, "_failed_platforms", {}) or {}
if action == "pause":
if platform not in failed:
return (
f"{platform.value} is not in the retry queue "
f"(it's either connected or not enabled)."
)
if failed[platform].get("paused"):
return f"{platform.value} is already paused."
self._pause_failed_platform(platform, reason="paused via /platform pause")
return (
f"✓ {platform.value} paused. "
f"Resume with `/platform resume {platform.value}` or "
f"`hermes gateway restart` to reset."
)
if platform not in failed:
return (
f"{platform.value} is not in the retry queue — "
f"nothing to resume."
)
if not failed[platform].get("paused"):
return (
f"{platform.value} is already retrying — "
f"no resume needed."
)
self._resume_paused_platform(platform)
return f"✓ {platform.value} resumed — retrying on next watcher tick."
return (
"Usage: /platform <list|pause|resume> [name]\n"
" /platform list — show platform status\n"
" /platform pause <name> — stop retrying a failing platform\n"
" /platform resume <name> — re-queue a paused platform"
)
async def _handle_restart_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
"""Handle /restart command - drain active work, then restart the gateway."""
if self._is_stale_restart_redelivery(event):
logger.info(
"Ignoring redelivered /restart (platform=%s, update_id=%s) — "
"already processed by a previous gateway instance.",
event.source.platform.value if event.source and event.source.platform else "?",
event.platform_update_id,
)
return ""
if self._restart_requested or self._draining:
count = self._running_agent_count()
if count:
return t("gateway.draining", count=count)
return EphemeralReply(t("gateway.restart.in_progress"))
try:
notify_data = {
"platform": event.source.platform.value if event.source.platform else None,
"chat_id": event.source.chat_id,
}
if event.source.thread_id:
notify_data["thread_id"] = event.source.thread_id
atomic_json_write(
_hermes_home / ".restart_notify.json",
notify_data,
indent=None,
)
except Exception as e:
logger.debug("Failed to write restart notify file: %s", e)
try:
dedup_data = {
"platform": event.source.platform.value if event.source.platform else None,
"requested_at": time.time(),
}
if event.platform_update_id is not None:
dedup_data["update_id"] = event.platform_update_id
atomic_json_write(
_hermes_home / ".restart_last_processed.json",
dedup_data,
indent=None,
)
except Exception as e:
logger.debug("Failed to write restart dedup marker: %s", e)
active_agents = self._running_agent_count()
_under_service = bool(os.environ.get("INVOCATION_ID"))
_in_container = os.path.exists("/.dockerenv") or os.path.exists("/run/.containerenv")
if _under_service or _in_container:
self.request_restart(detached=False, via_service=True)
else:
self.request_restart(detached=True, via_service=False)
if active_agents:
return t("gateway.draining", count=active_agents)
return EphemeralReply(t("gateway.restart.restarting"))
def _is_stale_restart_redelivery(self, event: MessageEvent) -> bool:
"""Return True if this /restart is a Telegram re-delivery we already handled.
The previous gateway wrote ``.restart_last_processed.json`` with the
triggering platform + update_id when it processed the /restart. If
we now see a /restart on the same platform with an update_id <= that
recorded value AND the marker is recent (< 5 minutes), it's a
redelivery and should be ignored.
Only applies to Telegram today (the only platform that exposes a
numeric cross-session update ordering); other platforms return False.
"""
if event is None or event.source is None:
return False
if event.platform_update_id is None:
return False
if event.source.platform is None:
return False
try:
platform_value = event.source.platform.value
except Exception:
return False
if platform_value != "telegram":
return False
try:
marker_path = _hermes_home / ".restart_last_processed.json"
if not marker_path.exists():
return False
data = json.loads(marker_path.read_text())
except Exception:
return False
if data.get("platform") != platform_value:
return False
recorded_uid = data.get("update_id")
if not isinstance(recorded_uid, int):
return False
requested_at = data.get("requested_at")
if isinstance(requested_at, (int, float)):
if time.time() - requested_at > 300:
return False
return event.platform_update_id <= recorded_uid
async def _handle_help_command(self, event: MessageEvent) -> str:
"""Handle /help command - list available commands."""
from hermes_cli.commands import gateway_help_lines
lines = [
t("gateway.help.header"),
*gateway_help_lines(),
]
try:
from agent.skill_commands import get_skill_commands
skill_cmds = get_skill_commands()
if skill_cmds:
lines.append(t("gateway.help.skill_header", count=len(skill_cmds)))
sorted_cmds = sorted(skill_cmds)
for cmd in sorted_cmds[:10]:
lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
if len(sorted_cmds) > 10:
lines.append(t("gateway.help.more_use_commands", count=len(sorted_cmds) - 10))
except Exception:
pass
return _telegramize_command_mentions(
"\n".join(lines),
getattr(getattr(event, "source", None), "platform", None),
)
async def _handle_commands_command(self, event: MessageEvent) -> str:
from hermes_cli.commands import gateway_help_lines
raw_args = event.get_command_args().strip()
if raw_args:
try:
requested_page = int(raw_args)
except ValueError:
return t("gateway.commands.usage")
else:
requested_page = 1
entries = list(gateway_help_lines())
try:
from agent.skill_commands import get_skill_commands
skill_cmds = get_skill_commands()
if skill_cmds:
entries.append("")
entries.append(t("gateway.commands.skill_header"))
for cmd in sorted(skill_cmds):
desc = skill_cmds[cmd].get("description", "").strip() or t("gateway.commands.default_desc")
entries.append(f"`{cmd}` — {desc}")
except Exception:
pass
if not entries:
return t("gateway.commands.none")
from gateway.config import Platform
page_size = 15 if event.source.platform == Platform.TELEGRAM else 20
total_pages = max(1, (len(entries) + page_size - 1) // page_size)
page = max(1, min(requested_page, total_pages))
start = (page - 1) * page_size
page_entries = entries[start:start + page_size]
lines = [
t("gateway.commands.header", total=len(entries), page=page, total_pages=total_pages),
"",
*page_entries,
]
if total_pages > 1:
nav_parts = []
if page > 1:
nav_parts.append(t("gateway.commands.nav_prev", page=page - 1))
if page < total_pages:
nav_parts.append(t("gateway.commands.nav_next", page=page + 1))
lines.extend(["", " | ".join(nav_parts)])
if page != requested_page:
lines.append(t("gateway.commands.out_of_range", requested=requested_page, page=page))
return _telegramize_command_mentions(
"\n".join(lines),
getattr(getattr(event, "source", None), "platform", None),
)
async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
"""Handle /model command — switch model for this session.
Supports:
/model — interactive picker (Telegram/Discord) or text list
/model <name> — switch for this session only
/model <name> --global — switch and persist to config.yaml
/model <name> --provider <provider> — switch provider + model
/model --provider <provider> — switch to provider, auto-detect model
"""
import yaml
from hermes_cli.model_switch import (
switch_model as _switch_model, parse_model_flags,
list_authenticated_providers,
list_picker_providers,
)
from hermes_cli.providers import get_label
raw_args = event.get_command_args().strip()
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
current_model = ""
current_provider = "openrouter"
current_base_url = ""
current_api_key = ""
user_provs = None
custom_provs = None
config_path = _hermes_home / "config.yaml"
try:
cfg = _load_gateway_config()
if cfg:
model_cfg = cfg.get("model", {})
if isinstance(model_cfg, dict):
current_model = model_cfg.get("default", "")
current_provider = model_cfg.get("provider", current_provider)
current_base_url = model_cfg.get("base_url", "")
user_provs = cfg.get("providers")
try:
from hermes_cli.config import get_compatible_custom_providers
custom_provs = get_compatible_custom_providers(cfg)
except Exception:
custom_provs = cfg.get("custom_providers")
except Exception:
pass
source = event.source
session_key = self._session_key_for_source(source)
override = self._session_model_overrides.get(session_key, {})
if override:
current_model = override.get("model", current_model)
current_provider = override.get("provider", current_provider)
current_base_url = override.get("base_url", current_base_url)
current_api_key = override.get("api_key", current_api_key)
if not model_input and not explicit_provider:
adapter = self.adapters.get(source.platform)
has_picker = (
adapter is not None
and getattr(type(adapter), "send_model_picker", None) is not None
)
if has_picker:
try:
providers = list_picker_providers(
current_provider=current_provider,
current_base_url=current_base_url,
current_model=current_model,
user_providers=user_provs,
custom_providers=custom_provs,
max_models=50,
)
except Exception:
providers = []
if providers:
_self = self
_session_key = session_key
_cur_model = current_model
_cur_provider = current_provider
_cur_base_url = current_base_url
_cur_api_key = current_api_key
async def _on_model_selected(
_chat_id: str, model_id: str, provider_slug: str
) -> str:
"""Perform the model switch and return confirmation text."""
result = _switch_model(
raw_input=model_id,
current_provider=_cur_provider,
current_model=_cur_model,
current_base_url=_cur_base_url,
current_api_key=_cur_api_key,
is_global=False,
explicit_provider=provider_slug,
user_providers=user_provs,
custom_providers=custom_provs,
)
if not result.success:
return t("gateway.model.error_prefix", error=result.error_message)
cached_entry = None
_cache_lock = getattr(_self, "_agent_cache_lock", None)
_cache = getattr(_self, "_agent_cache", None)
if _cache_lock and _cache is not None:
with _cache_lock:
cached_entry = _cache.get(_session_key)
if cached_entry and cached_entry[0] is not None:
try:
cached_entry[0].switch_model(
new_model=result.new_model,
new_provider=result.target_provider,
api_key=result.api_key,
base_url=result.base_url,
api_mode=result.api_mode,
)
except Exception as exc:
logger.warning("Picker model switch failed for cached agent: %s", exc)
if not hasattr(_self, "_pending_model_notes"):
_self._pending_model_notes = {}
_self._pending_model_notes[_session_key] = (
f"[Note: model was just switched from {_cur_model} to {result.new_model} "
f"via {result.provider_label or result.target_provider}. "
f"Adjust your self-identification accordingly.]"
)
_self._session_model_overrides[_session_key] = {
"model": result.new_model,
"provider": result.target_provider,
"api_key": result.api_key,
"base_url": result.base_url,
"api_mode": result.api_mode,
}
_self._evict_cached_agent(_session_key)
plabel = result.provider_label or result.target_provider
lines = [t("gateway.model.switched", model=result.new_model)]
lines.append(t("gateway.model.provider_label", provider=plabel))
mi = result.model_info
from hermes_cli.model_switch import resolve_display_context_length
_sw_config_ctx = None
try:
_sw_cfg = _load_gateway_config()
_sw_model_cfg = _sw_cfg.get("model", {})
if isinstance(_sw_model_cfg, dict):
_sw_raw = _sw_model_cfg.get("context_length")
if _sw_raw is not None:
_sw_config_ctx = int(_sw_raw)
except Exception:
pass
ctx = resolve_display_context_length(
result.new_model,
result.target_provider,
base_url=result.base_url or current_base_url or "",
api_key=result.api_key or current_api_key or "",
model_info=mi,
custom_providers=custom_provs,
config_context_length=_sw_config_ctx,
)
if ctx:
lines.append(t("gateway.model.context_label", tokens=f"{ctx:,}"))
if mi:
if mi.max_output:
lines.append(t("gateway.model.max_output_label", tokens=f"{mi.max_output:,}"))
if mi.has_cost_data():
lines.append(t("gateway.model.cost_label", cost=mi.format_cost()))
lines.append(t("gateway.model.capabilities_label", capabilities=mi.format_capabilities()))
lines.append(t("gateway.model.session_only_hint"))
return "\n".join(lines)
metadata = self._thread_metadata_for_source(source, self._reply_anchor_for_event(event))
result = await adapter.send_model_picker(
chat_id=source.chat_id,
providers=providers,
current_model=current_model,
current_provider=current_provider,
session_key=session_key,
on_model_selected=_on_model_selected,
metadata=metadata,
)
if result.success:
return None
provider_label = get_label(current_provider)
lines = [t("gateway.model.current_label", model=current_model or "unknown", provider=provider_label), ""]
try:
providers = list_authenticated_providers(
current_provider=current_provider,
current_base_url=current_base_url,
current_model=current_model,
user_providers=user_provs,
custom_providers=custom_provs,
max_models=5,
)
for p in providers:
tag = t("gateway.model.current_tag") if p["is_current"] else ""
lines.append(f"**{p['name']}** `--provider {p['slug']}`{tag}:")
if p["models"]:
model_strs = ", ".join(f"`{m}`" for m in p["models"])
extra = t("gateway.model.more_models_suffix", count=p["total_models"] - len(p["models"])) if p["total_models"] > len(p["models"]) else ""
lines.append(f" {model_strs}{extra}")
elif p.get("api_url"):
lines.append(f" `{p['api_url']}`")
lines.append("")
except Exception:
pass
lines.append(t("gateway.model.usage_switch_model"))
lines.append(t("gateway.model.usage_switch_provider"))
lines.append(t("gateway.model.usage_persist"))
return "\n".join(lines)
result = _switch_model(
raw_input=model_input,
current_provider=current_provider,
current_model=current_model,
current_base_url=current_base_url,
current_api_key=current_api_key,
is_global=persist_global,
explicit_provider=explicit_provider,
user_providers=user_provs,
custom_providers=custom_provs,
)
if not result.success:
return t("gateway.model.error_prefix", error=result.error_message)
cached_entry = None
_cache_lock = getattr(self, "_agent_cache_lock", None)
_cache = getattr(self, "_agent_cache", None)
if _cache_lock and _cache is not None:
with _cache_lock:
cached_entry = _cache.get(session_key)
if cached_entry and cached_entry[0] is not None:
try:
cached_entry[0].switch_model(
new_model=result.new_model,
new_provider=result.target_provider,
api_key=result.api_key,
base_url=result.base_url,
api_mode=result.api_mode,
)
except Exception as exc:
logger.warning("In-place model switch failed for cached agent: %s", exc)
if not hasattr(self, "_pending_model_notes"):
self._pending_model_notes = {}
self._pending_model_notes[session_key] = (
f"[Note: model was just switched from {current_model} to {result.new_model} "
f"via {result.provider_label or result.target_provider}. "
f"Adjust your self-identification accordingly.]"
)
self._session_model_overrides[session_key] = {
"model": result.new_model,
"provider": result.target_provider,
"api_key": result.api_key,
"base_url": result.base_url,
"api_mode": result.api_mode,
}
self._evict_cached_agent(session_key)
if persist_global:
try:
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
else:
cfg = {}
model_cfg = cfg.setdefault("model", {})
model_cfg["default"] = result.new_model
model_cfg["provider"] = result.target_provider
if result.base_url:
model_cfg["base_url"] = result.base_url
from hermes_cli.config import save_config
save_config(cfg)
except Exception as e:
logger.warning("Failed to persist model switch: %s", e)
provider_label = result.provider_label or result.target_provider
lines = [t("gateway.model.switched", model=result.new_model)]
lines.append(t("gateway.model.provider_label", provider=provider_label))
mi = result.model_info
from hermes_cli.model_switch import resolve_display_context_length
_sw2_config_ctx = None
try:
_sw2_cfg = _load_gateway_config()
_sw2_model_cfg = _sw2_cfg.get("model", {})
if isinstance(_sw2_model_cfg, dict):
_sw2_raw = _sw2_model_cfg.get("context_length")
if _sw2_raw is not None:
_sw2_config_ctx = int(_sw2_raw)
except Exception:
pass
ctx = resolve_display_context_length(
result.new_model,
result.target_provider,
base_url=result.base_url or current_base_url or "",
api_key=result.api_key or current_api_key or "",
model_info=mi,
custom_providers=custom_provs,
config_context_length=_sw2_config_ctx,
)
if ctx:
lines.append(t("gateway.model.context_label", tokens=f"{ctx:,}"))
if mi:
if mi.max_output:
lines.append(t("gateway.model.max_output_label", tokens=f"{mi.max_output:,}"))
if mi.has_cost_data():
lines.append(t("gateway.model.cost_label", cost=mi.format_cost()))
lines.append(t("gateway.model.capabilities_label", capabilities=mi.format_capabilities()))
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
or result.api_mode == "anthropic_messages"
)
if cache_enabled:
lines.append(t("gateway.model.prompt_caching_enabled"))
if result.warning_message:
lines.append(t("gateway.model.warning_prefix", warning=result.warning_message))
if persist_global:
lines.append(t("gateway.model.saved_global"))
else:
lines.append(t("gateway.model.session_only_hint"))
return "\n".join(lines)
async def _handle_codex_runtime_command(self, event: MessageEvent) -> str:
"""Handle /codex-runtime command in the gateway.
Same surface as the CLI handler in cli.py:
/codex-runtime — show current state
/codex-runtime auto — Hermes default runtime
/codex-runtime codex_app_server — codex subprocess runtime
/codex-runtime on / off — synonyms
On change, the cached agent for this session is evicted so the next
message creates a fresh AIAgent with the new api_mode wired in
(avoids prompt-cache invalidation mid-session)."""
from hermes_cli import codex_runtime_switch as crs
raw_args = event.get_command_args().strip() if event else ""
new_value, errors = crs.parse_args(raw_args)
if errors:
return "❌ " + "\n❌ ".join(errors)
try:
from hermes_cli.config import load_config, save_config
except Exception as exc:
return f"❌ Could not load config: {exc}"
cfg = load_config()
result = crs.apply(
cfg,
new_value,
persist_callback=(save_config if new_value is not None else None),
)
if result.success and new_value is not None and result.requires_new_session:
try:
session_key = self._session_key_for_source(event.source)
self._evict_cached_agent(session_key)
except Exception:
logger.debug("could not evict cached agent after codex-runtime change",
exc_info=True)
prefix = "✓" if result.success else "✗"
return f"{prefix} {result.message}"
async def _handle_personality_command(self, event: MessageEvent) -> str:
"""Handle /personality command - list or set a personality."""
from hermes_constants import display_hermes_home
args = event.get_command_args().strip().lower()
config_path = _hermes_home / 'config.yaml'
try:
config = _load_gateway_config()
personalities = cfg_get(config, "agent", "personalities", default={})
except Exception:
config = {}
personalities = {}
if not personalities:
return t("gateway.personality.none_configured", path=display_hermes_home())
if not args:
lines = [t("gateway.personality.header")]
lines.append(t("gateway.personality.none_option"))
for name, prompt in personalities.items():
if isinstance(prompt, dict):
preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
else:
preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
lines.append(t("gateway.personality.item", name=name, preview=preview))
lines.append(t("gateway.personality.usage"))
return "\n".join(lines)
def _resolve_prompt(value):
if isinstance(value, dict):
parts = [value.get("system_prompt", "")]
if value.get("tone"):
parts.append(f'Tone: {value["tone"]}')
if value.get("style"):
parts.append(f'Style: {value["style"]}')
return "\n".join(p for p in parts if p)
return str(value)
if args in {"none", "default", "neutral"}:
try:
if "agent" not in config or not isinstance(config.get("agent"), dict):
config["agent"] = {}
config["agent"]["system_prompt"] = ""
atomic_yaml_write(config_path, config)
except Exception as e:
return t("gateway.personality.save_failed", error=str(e))
self._ephemeral_system_prompt = ""
return t("gateway.personality.cleared")
elif args in personalities:
new_prompt = _resolve_prompt(personalities[args])
try:
if "agent" not in config or not isinstance(config.get("agent"), dict):
config["agent"] = {}
config["agent"]["system_prompt"] = new_prompt
atomic_yaml_write(config_path, config)
except Exception as e:
return t("gateway.personality.save_failed", error=str(e))
self._ephemeral_system_prompt = new_prompt
return t("gateway.personality.set_to", name=args)
available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
return t("gateway.personality.unknown", name=args, available=available)
async def _handle_retry_command(self, event: MessageEvent) -> str:
"""Handle /retry command - re-send the last user message."""
source = event.source
session_entry = self.session_store.get_or_create_session(source)
history = self.session_store.load_transcript(session_entry.session_id)
last_user_msg = None
last_user_idx = None
for i in range(len(history) - 1, -1, -1):
if history[i].get("role") == "user":
last_user_msg = history[i].get("content", "")
last_user_idx = i
break
if not last_user_msg:
return t("gateway.retry.no_previous")
truncated = history[:last_user_idx]
self.session_store.rewrite_transcript(session_entry.session_id, truncated)
session_entry.last_prompt_tokens = 0
retry_event = MessageEvent(
text=last_user_msg,
message_type=MessageType.TEXT,
source=source,
raw_message=event.raw_message,
channel_prompt=event.channel_prompt,
)
return await self._handle_message(retry_event)
def _goal_max_turns_from_config(self) -> int:
"""Resolve the configured /goal turn budget for gateway sessions.
GatewayRunner.config is a GatewayConfig dataclass, not the full
user config mapping. Top-level config blocks such as ``goals`` are
therefore only available through hermes_cli.config.load_config().
"""
try:
goals_cfg = (
(self.config or {}).get("goals", {})
if isinstance(self.config, dict)
else getattr(self.config, "goals", {}) or {}
)
if not goals_cfg:
from hermes_cli.config import load_config
goals_cfg = (load_config() or {}).get("goals") or {}
return int(goals_cfg.get("max_turns", 20) or 20)
except Exception:
return 20
def _get_goal_manager_for_event(self, event: "MessageEvent"):
"""Return a GoalManager bound to the session for this gateway event.
Returns ``(manager, session_entry)`` or ``(None, None)`` if the
goals module can't be loaded.
"""
try:
from hermes_cli.goals import GoalManager
except Exception as exc:
logger.debug("goal manager unavailable: %s", exc)
return None, None
try:
session_entry = self.session_store.get_or_create_session(event.source)
except Exception as exc:
logger.debug("goal manager: session lookup failed: %s", exc)
return None, None
sid = getattr(session_entry, "session_id", None) or ""
if not sid:
return None, None
max_turns = self._goal_max_turns_from_config()
return GoalManager(session_id=sid, default_max_turns=max_turns), session_entry
async def _handle_goal_command(self, event: "MessageEvent") -> str:
"""Handle /goal for gateway platforms.
Subcommands: ``/goal`` / ``/goal status`` / ``/goal pause`` /
``/goal resume`` / ``/goal clear``. Any other text becomes the
new goal.
Setting a new goal queues the goal text as the next turn so the
agent starts working on it immediately — the post-turn
continuation hook then takes over from there.
"""
args = (event.get_command_args() or "").strip()
lower = args.lower()
mgr, session_entry = self._get_goal_manager_for_event(event)
if mgr is None:
return t("gateway.goal.unavailable")
if not args or lower == "status":
return mgr.status_line()
if lower == "pause":
state = mgr.pause(reason="user-paused")
if state is None:
return t("gateway.goal.no_goal_set")
try:
adapter = self.adapters.get(event.source.platform) if event.source else None
_quick_key = self._session_key_for_source(event.source) if event.source else None
if adapter and _quick_key:
self._clear_goal_pending_continuations(_quick_key, adapter)
except Exception as exc:
logger.debug("goal pause: pending continuation cleanup failed: %s", exc)
return t("gateway.goal.paused", goal=state.goal)
if lower == "resume":
state = mgr.resume()
if state is None:
return t("gateway.goal.no_resume")
return t("gateway.goal.resumed", goal=state.goal)
if lower in {"clear", "stop", "done"}:
had = mgr.has_goal()
mgr.clear()
try:
adapter = self.adapters.get(event.source.platform) if event.source else None
_quick_key = self._session_key_for_source(event.source) if event.source else None
if adapter and _quick_key:
self._clear_goal_pending_continuations(_quick_key, adapter)
except Exception as exc:
logger.debug("goal clear: pending continuation cleanup failed: %s", exc)
return t("gateway.goal_cleared") if had else t("gateway.no_active_goal")
try:
state = mgr.set(args)
except ValueError as exc:
return t("gateway.goal.invalid", error=str(exc))
adapter = self.adapters.get(event.source.platform) if event.source else None
_quick_key = self._session_key_for_source(event.source) if event.source else None
if adapter and _quick_key:
try:
kickoff_event = MessageEvent(
text=state.goal,
message_type=MessageType.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
)
self._enqueue_fifo(_quick_key, kickoff_event, adapter)
except Exception as exc:
logger.debug("goal kickoff enqueue failed: %s", exc)
return t("gateway.goal.set", budget=state.max_turns, goal=state.goal)
async def _handle_subgoal_command(self, event: "MessageEvent") -> str:
"""Handle /subgoal for gateway platforms (mirror of CLI handler).
Subgoals are extra criteria appended to the active goal mid-loop.
They modify state read at the next turn boundary, so this is safe
to invoke while the agent is running.
"""
args = (event.get_command_args() or "").strip()
mgr, _session_entry = self._get_goal_manager_for_event(event)
if mgr is None:
return t("gateway.goal.unavailable")
if not mgr.has_goal():
return "No active goal. Set one with /goal <text>."
if not args:
return f"{mgr.status_line()}\n{mgr.render_subgoals()}"
tokens = args.split(None, 1)
verb = tokens[0].lower()
rest = tokens[1].strip() if len(tokens) > 1 else ""
if verb == "remove":
if not rest:
return "Usage: /subgoal remove <n>"
try:
idx = int(rest.split()[0])
except ValueError:
return "/subgoal remove: <n> must be an integer (1-based index)."
try:
removed = mgr.remove_subgoal(idx)
except (IndexError, RuntimeError) as exc:
return f"/subgoal remove: {exc}"
return f"✓ Removed subgoal {idx}: {removed}"
if verb == "clear":
try:
prev = mgr.clear_subgoals()
except RuntimeError as exc:
return f"/subgoal clear: {exc}"
if prev:
return f"✓ Cleared {prev} subgoal{'s' if prev != 1 else ''}."
return "No subgoals to clear."
try:
text = mgr.add_subgoal(args)
except (ValueError, RuntimeError) as exc:
return f"/subgoal: {exc}"
idx = len(mgr.state.subgoals) if mgr.state else 0
return f"✓ Added subgoal {idx}: {text}"
async def _send_goal_status_notice(self, source: Any, message: str) -> None:
"""Send a /goal judge status line back to the originating chat/thread."""
adapter = self.adapters.get(source.platform)
if not adapter:
logger.debug("goal continuation: no adapter for %s", getattr(source, "platform", None))
return
try:
metadata = self._thread_metadata_for_source(source)
except Exception:
metadata = None
result = await adapter.send(source.chat_id, message, metadata=metadata)
if result is not None and not getattr(result, "success", True):
logger.warning(
"goal continuation: status send failed: %s",
getattr(result, "error", "unknown error"),
)
async def _defer_goal_status_notice_after_delivery(self, source: Any, message: str) -> None:
"""Send a /goal status line after the main response is delivered.
The gateway message handler returns the agent response to the platform
adapter, which sends it after this method's caller has returned. For a
natural Discord/Telegram reading order, goal status belongs after that
send. Platform adapters provide a one-shot post-delivery callback for
exactly this boundary; when unavailable, fall back to direct awaited
delivery rather than silently dropping the notice.
"""
adapter = self.adapters.get(source.platform)
if not adapter:
logger.debug("goal continuation: no adapter for %s", getattr(source, "platform", None))
return
async def _deliver() -> None:
try:
await self._send_goal_status_notice(source, message)
except Exception as exc:
logger.warning("goal continuation: status send failed: %s", exc, exc_info=True)
try:
session_key = self._session_key_for_source(source)
except Exception:
session_key = None
if session_key and hasattr(adapter, "register_post_delivery_callback"):
try:
generation = None
active = getattr(adapter, "_active_sessions", {}).get(session_key)
if active is not None:
generation = getattr(active, "_hermes_run_generation", None)
adapter.register_post_delivery_callback(
session_key,
_deliver,
generation=generation,
)
return
except Exception as exc:
logger.debug("goal continuation: post-delivery callback registration failed: %s", exc)
await _deliver()
async def _post_turn_goal_continuation(
self,
*,
session_entry: Any,
source: Any,
final_response: str,
) -> None:
"""Run the goal judge after a gateway turn and, if still active,
enqueue a continuation prompt for the same session.
Called from ``_handle_message_with_agent`` at turn boundary, AFTER
the response has been delivered. Safe when no goal is set.
We use the adapter's pending-message / FIFO machinery so any real
user message that arrives simultaneously is handled by the same
queue and takes priority naturally.
"""
try:
from hermes_cli.goals import GoalManager
except Exception as exc:
logger.debug("goal continuation: goals module unavailable: %s", exc)
return
sid = getattr(session_entry, "session_id", None) or ""
if not sid:
return
max_turns = self._goal_max_turns_from_config()
mgr = GoalManager(session_id=sid, default_max_turns=max_turns)
if not mgr.is_active():
return
decision = mgr.evaluate_after_turn(final_response or "", user_initiated=True)
msg = decision.get("message") or ""
if msg and source is not None:
await self._defer_goal_status_notice_after_delivery(source, msg)
if not decision.get("should_continue"):
return
prompt = decision.get("continuation_prompt") or ""
if not prompt or source is None:
return
try:
adapter = self.adapters.get(source.platform)
_quick_key = self._session_key_for_source(source)
if adapter and _quick_key:
cont_event = MessageEvent(
text=prompt,
message_type=MessageType.TEXT,
source=source,
message_id=None,
channel_prompt=None,
)
self._enqueue_fifo(_quick_key, cont_event, adapter)
except Exception as exc:
logger.debug("goal continuation: enqueue failed: %s", exc)
async def _handle_undo_command(self, event: MessageEvent) -> str:
"""Handle /undo command - remove the last user/assistant exchange."""
source = event.source
session_entry = self.session_store.get_or_create_session(source)
history = self.session_store.load_transcript(session_entry.session_id)
last_user_idx = None
for i in range(len(history) - 1, -1, -1):
if history[i].get("role") == "user":
last_user_idx = i
break
if last_user_idx is None:
return t("gateway.undo.nothing")
removed_msg = history[last_user_idx].get("content", "")
removed_count = len(history) - last_user_idx
self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
session_entry.last_prompt_tokens = 0
preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
return t("gateway.undo.removed", count=removed_count, preview=preview)
async def _handle_set_home_command(self, event: MessageEvent) -> str:
"""Handle /sethome command -- set the current chat as the platform's home channel."""
source = event.source
platform_name = source.platform.value if source.platform else "unknown"
chat_id = source.chat_id
chat_name = source.chat_name or chat_id
env_key = _home_target_env_var(platform_name)
thread_env_key = _home_thread_env_var(platform_name)
thread_id = source.thread_id
try:
from hermes_cli.config import save_env_value
save_env_value(env_key, str(chat_id))
save_env_value(thread_env_key, str(thread_id or ""))
except Exception as e:
return t("gateway.set_home.save_failed", error=e)
if source.platform:
platform_config = self.config.platforms.setdefault(
source.platform,
PlatformConfig(enabled=True),
)
platform_config.home_channel = HomeChannel(
platform=source.platform,
chat_id=str(chat_id),
name=chat_name,
thread_id=str(thread_id) if thread_id else None,
)
return t("gateway.set_home.success", name=chat_name, chat_id=chat_id)
@staticmethod
def _get_guild_id(event: MessageEvent) -> Optional[int]:
"""Extract Discord guild_id from the raw message object."""
raw = getattr(event, "raw_message", None)
if raw is None:
return None
if hasattr(raw, "guild_id") and raw.guild_id:
return int(raw.guild_id)
if hasattr(raw, "guild") and raw.guild:
return raw.guild.id
return None
async def _handle_voice_command(self, event: MessageEvent) -> str:
"""Handle /voice [on|off|tts|channel|leave|status] command."""
args = event.get_command_args().strip().lower()
chat_id = event.source.chat_id
platform = event.source.platform
voice_key = self._voice_key(platform, chat_id)
adapter = self.adapters.get(platform)
if args in {"on", "enable"}:
self._voice_mode[voice_key] = "voice_only"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
return t("gateway.voice.enabled_voice_only")
elif args in {"off", "disable"}:
self._voice_mode[voice_key] = "off"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
return t("gateway.voice.disabled_text")
elif args == "tts":
self._voice_mode[voice_key] = "all"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
return t("gateway.voice.tts_enabled")
elif args in {"channel", "join"}:
return await self._handle_voice_channel_join(event)
elif args == "leave":
return await self._handle_voice_channel_leave(event)
elif args == "status":
mode = self._voice_mode.get(voice_key, "off")
labels = {
"off": t("gateway.voice.label_off"),
"voice_only": t("gateway.voice.label_voice_only"),
"all": t("gateway.voice.label_all"),
}
adapter = self.adapters.get(event.source.platform)
guild_id = self._get_guild_id(event)
if guild_id and hasattr(adapter, "get_voice_channel_info"):
info = adapter.get_voice_channel_info(guild_id)
if info:
lines = [
t("gateway.voice.status_mode", label=labels.get(mode, mode)),
t("gateway.voice.status_channel", channel=info['channel_name']),
t("gateway.voice.status_participants", count=info['member_count']),
]
for m in info["members"]:
status = t("gateway.voice.speaking") if m.get("is_speaking") else ""
lines.append(t("gateway.voice.status_member", name=m['display_name'], status=status))
return "\n".join(lines)
return t("gateway.voice.status_mode", label=labels.get(mode, mode))
else:
current = self._voice_mode.get(voice_key, "off")
if current == "off":
self._voice_mode[voice_key] = "voice_only"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
return t("gateway.voice.enabled_short")
else:
self._voice_mode[voice_key] = "off"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
return t("gateway.voice.disabled_short")
async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
"""Join the user's current Discord voice channel."""
adapter = self.adapters.get(event.source.platform)
if not hasattr(adapter, "join_voice_channel"):
return "Voice channels are not supported on this platform."
guild_id = self._get_guild_id(event)
if not guild_id:
return "This command only works in a Discord server."
voice_channel = await adapter.get_user_voice_channel(
guild_id, event.source.user_id
)
if not voice_channel:
return "You need to be in a voice channel first."
if hasattr(adapter, "_voice_input_callback"):
adapter._voice_input_callback = self._handle_voice_channel_input
if hasattr(adapter, "_on_voice_disconnect"):
adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
try:
success = await adapter.join_voice_channel(voice_channel)
except Exception as e:
logger.warning("Failed to join voice channel: %s", e)
adapter._voice_input_callback = None
err_lower = str(e).lower()
if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
return (
"Voice dependencies are missing (PyNaCl / davey). "
f"Install with: `{sys.executable} -m pip install PyNaCl`"
)
return f"Failed to join voice channel: {e}"
if success:
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
if hasattr(adapter, "_voice_sources"):
adapter._voice_sources[guild_id] = event.source.to_dict()
self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
self._save_voice_modes()
self._set_adapter_auto_tts_enabled(adapter, event.source.chat_id, enabled=True)
return (
f"Joined voice channel **{voice_channel.name}**.\n"
f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
)
adapter._voice_input_callback = None
return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
"""Leave the Discord voice channel."""
adapter = self.adapters.get(event.source.platform)
guild_id = self._get_guild_id(event)
if not guild_id or not hasattr(adapter, "leave_voice_channel"):
return "Not in a voice channel."
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
return "Not in a voice channel."
try:
await adapter.leave_voice_channel(guild_id)
except Exception as e:
logger.warning("Error leaving voice channel: %s", e)
self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off"
self._save_voice_modes()
self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
if hasattr(adapter, "_voice_input_callback"):
adapter._voice_input_callback = None
return "Left voice channel."
def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
"""Called by the adapter when a voice channel times out.
Cleans up runner-side voice_mode state that the adapter cannot reach.
"""
self._voice_mode[self._voice_key(Platform.DISCORD, chat_id)] = "off"
self._save_voice_modes()
adapter = self.adapters.get(Platform.DISCORD)
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
def _is_duplicate_voice_transcript(self, guild_id: int, user_id: int, transcript: str) -> bool:
"""Suppress repeated STT outputs for the same recent utterance.
Voice capture can occasionally emit the same utterance twice a few
seconds apart, which creates a second queued agent run and overlapping
spoken replies. Dedup exact and near-exact repeats per guild/user over a
short window while allowing genuinely new turns through.
"""
from difflib import SequenceMatcher
normalized = re.sub(r"\s+", " ", transcript).strip().lower()
normalized = re.sub(r"[^\w\s]", "", normalized)
if not normalized:
return False
now = time.monotonic()
window_seconds = 12.0
key = (guild_id, user_id)
recent_store = getattr(self, "_recent_voice_transcripts", None)
if not isinstance(recent_store, dict):
recent_store = {}
self._recent_voice_transcripts = recent_store
recent = [
(ts, txt)
for ts, txt in recent_store.get(key, [])
if now - ts <= window_seconds
]
for _, prior in recent:
if prior == normalized:
recent_store[key] = recent
return True
if len(prior) >= 16 and len(normalized) >= 16:
if SequenceMatcher(None, prior, normalized).ratio() >= 0.95:
recent_store[key] = recent
return True
recent.append((now, normalized))
recent_store[key] = recent[-5:]
return False
async def _handle_voice_channel_input(
self, guild_id: int, user_id: int, transcript: str
):
"""Handle transcribed voice from a user in a voice channel.
Creates a synthetic MessageEvent and processes it through the
adapter's full message pipeline (session, typing, agent, TTS reply).
"""
adapter = self.adapters.get(Platform.DISCORD)
if not adapter:
return
text_ch_id = adapter._voice_text_channels.get(guild_id)
if not text_ch_id:
return
source_data = getattr(adapter, "_voice_sources", {}).get(guild_id)
if source_data:
source = SessionSource.from_dict(source_data)
source.user_id = str(user_id)
source.user_name = str(user_id)
else:
source = SessionSource(
platform=Platform.DISCORD,
chat_id=str(text_ch_id),
user_id=str(user_id),
user_name=str(user_id),
chat_type="channel",
)
if not self._is_user_authorized(source):
logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
return
if self._is_duplicate_voice_transcript(guild_id, user_id, transcript):
logger.info(
"Suppressing duplicate voice transcript for guild=%s user=%s: %s",
guild_id,
user_id,
transcript[:100],
)
return
try:
channel = adapter._client.get_channel(text_ch_id)
if channel:
safe_text = transcript[:2000].replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere")
await channel.send(f"**[Voice]** <@{user_id}>: {safe_text}")
except Exception:
pass
from types import SimpleNamespace
event = MessageEvent(
source=source,
text=transcript,
message_type=MessageType.VOICE,
raw_message=SimpleNamespace(guild_id=guild_id, guild=None),
)
await adapter.handle_message(event)
def _should_send_voice_reply(
self,
event: MessageEvent,
response: str,
agent_messages: list,
already_sent: bool = False,
) -> bool:
"""Decide whether the runner should send a TTS voice reply.
Returns False when:
- voice_mode is off for this chat
- response is empty or an error
- agent already called text_to_speech tool (dedup)
- voice input and base adapter auto-TTS already handled it (skip_double)
UNLESS streaming already consumed the response (already_sent=True),
in which case the base adapter won't have text for auto-TTS so the
runner must handle it.
"""
if not response or response.startswith("Error:"):
return False
chat_id = event.source.chat_id
voice_mode = self._voice_mode.get(self._voice_key(event.source.platform, chat_id), "off")
is_voice_input = (event.message_type == MessageType.VOICE)
should = (
(voice_mode == "all")
or (voice_mode == "voice_only" and is_voice_input)
)
if not should:
return False
has_agent_tts = any(
msg.get("role") == "assistant"
and any(
tc.get("function", {}).get("name") == "text_to_speech"
for tc in (msg.get("tool_calls") or [])
)
for msg in agent_messages
)
if has_agent_tts:
return False
if is_voice_input and not already_sent:
return False
return True
async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
"""Generate TTS audio and send as a voice message before the text reply."""
import uuid as _uuid
audio_path = None
actual_path = None
try:
from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts
tts_text = _strip_markdown_for_tts(text[:4000])
if not tts_text:
return
audio_path = os.path.join(
tempfile.gettempdir(), "hermes_voice",
f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
)
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
result_json = await asyncio.to_thread(
text_to_speech_tool, text=tts_text, output_path=audio_path
)
try:
result = json.loads(result_json)
except (json.JSONDecodeError, TypeError):
logger.warning("Auto voice reply TTS returned invalid JSON: %s", result_json[:200] if result_json else result_json)
return
actual_path = result.get("file_path", audio_path)
if not result.get("success") or not os.path.isfile(actual_path):
logger.warning("Auto voice reply TTS failed: %s", result.get("error"))
return
adapter = self.adapters.get(event.source.platform)
guild_id = self._get_guild_id(event)
if (guild_id
and hasattr(adapter, "play_in_voice_channel")
and hasattr(adapter, "is_in_voice_channel")
and adapter.is_in_voice_channel(guild_id)):
await adapter.play_in_voice_channel(guild_id, actual_path)
elif adapter and hasattr(adapter, "send_voice"):
reply_anchor = self._reply_anchor_for_event(event)
thread_meta = self._thread_metadata_for_source(event.source, reply_anchor)
if thread_meta is not None:
thread_meta = dict(thread_meta)
thread_meta["notify"] = True
else:
thread_meta = {"notify": True}
send_kwargs: Dict[str, Any] = {
"chat_id": event.source.chat_id,
"audio_path": actual_path,
"reply_to": reply_anchor,
"metadata": thread_meta,
}
await adapter.send_voice(**send_kwargs)
except Exception as e:
logger.warning("Auto voice reply failed: %s", e, exc_info=True)
finally:
for p in {audio_path, actual_path} - {None}:
try:
os.unlink(p)
except OSError:
pass
async def _deliver_media_from_response(
self,
response: str,
event: MessageEvent,
adapter,
) -> None:
"""Extract MEDIA: tags and local file paths from a response and deliver them.
Called after streaming has already sent the text to the user, so the
text itself is already delivered — this only handles file attachments
that the normal _process_message_background path would have caught.
"""
from pathlib import Path
from urllib.parse import quote as _quote
try:
force_document_attachments = "[[as_document]]" in response
media_files, _ = adapter.extract_media(response)
_, cleaned = adapter.extract_images(response)
local_files, _ = adapter.extract_local_files(cleaned)
_thread_meta = self._thread_metadata_for_source(event.source, self._reply_anchor_for_event(event))
from gateway.platforms.base import should_send_media_as_audio
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
image_paths: list = []
non_image_media: list = []
for media_path, is_voice in media_files:
ext = Path(media_path).suffix.lower()
if (ext in _IMAGE_EXTS
and not is_voice
and not force_document_attachments):
image_paths.append(media_path)
else:
non_image_media.append((media_path, is_voice))
non_image_local: list = []
for file_path in local_files:
if (Path(file_path).suffix.lower() in _IMAGE_EXTS
and not force_document_attachments):
image_paths.append(file_path)
else:
non_image_local.append(file_path)
if image_paths:
try:
images = [(f"file://{_quote(p)}", "") for p in image_paths]
await adapter.send_multiple_images(
chat_id=event.source.chat_id,
images=images,
metadata=_thread_meta,
)
except Exception as e:
logger.warning("[%s] Post-stream image batch delivery failed: %s", adapter.name, e)
for media_path, is_voice in non_image_media:
try:
ext = Path(media_path).suffix.lower()
if should_send_media_as_audio(event.source.platform, ext, is_voice=is_voice):
await adapter.send_voice(
chat_id=event.source.chat_id,
audio_path=media_path,
metadata=_thread_meta,
)
elif ext in _VIDEO_EXTS:
await adapter.send_video(
chat_id=event.source.chat_id,
video_path=media_path,
metadata=_thread_meta,
)
else:
await adapter.send_document(
chat_id=event.source.chat_id,
file_path=media_path,
metadata=_thread_meta,
)
except Exception as e:
logger.warning("[%s] Post-stream media delivery failed: %s", adapter.name, e)
for file_path in non_image_local:
try:
ext = Path(file_path).suffix.lower()
if ext in _VIDEO_EXTS:
await adapter.send_video(
chat_id=event.source.chat_id,
video_path=file_path,
metadata=_thread_meta,
)
else:
await adapter.send_document(
chat_id=event.source.chat_id,
file_path=file_path,
metadata=_thread_meta,
)
except Exception as e:
logger.warning("[%s] Post-stream file delivery failed: %s", adapter.name, e)
except Exception as e:
logger.warning("Post-stream media extraction failed: %s", e)
async def _handle_rollback_command(self, event: MessageEvent) -> str:
"""Handle /rollback command — list or restore filesystem checkpoints."""
from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list
cp_cfg = {}
try:
import yaml as _y
_cfg_path = _hermes_home / "config.yaml"
if _cfg_path.exists():
with open(_cfg_path, encoding="utf-8") as _f:
_data = _y.safe_load(_f) or {}
cp_cfg = _data.get("checkpoints", {})
if isinstance(cp_cfg, bool):
cp_cfg = {"enabled": cp_cfg}
except Exception:
pass
if not cp_cfg.get("enabled", False):
return t("gateway.rollback.not_enabled")
mgr = CheckpointManager(
enabled=True,
max_snapshots=cp_cfg.get("max_snapshots", 50),
max_total_size_mb=cp_cfg.get("max_total_size_mb", 500),
max_file_size_mb=cp_cfg.get("max_file_size_mb", 10),
)
cwd = os.getenv("TERMINAL_CWD", str(Path.home()))
arg = event.get_command_args().strip()
if not arg:
checkpoints = mgr.list_checkpoints(cwd)
return format_checkpoint_list(checkpoints, cwd)
checkpoints = mgr.list_checkpoints(cwd)
if not checkpoints:
return t("gateway.rollback.none_found", cwd=cwd)
target_hash = None
try:
idx = int(arg) - 1
if 0 <= idx < len(checkpoints):
target_hash = checkpoints[idx]["hash"]
else:
return t("gateway.rollback.invalid_number", max=len(checkpoints))
except ValueError:
target_hash = arg
result = mgr.restore(cwd, target_hash)
if result["success"]:
return t(
"gateway.rollback.restored",
hash=result["restored_to"],
reason=result["reason"],
)
return t("gateway.rollback.restore_failed", error=result["error"])
async def _handle_background_command(self, event: MessageEvent) -> str:
"""Handle /background <prompt> — run a prompt in a separate background session.
Spawns a new AIAgent in a background thread with its own session.
When it completes, sends the result back to the same chat without
modifying the active session's conversation history.
"""
prompt = event.get_command_args().strip()
if not prompt:
return t("gateway.background.usage")
source = event.source
task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"
event_message_id = self._reply_anchor_for_event(event)
media_urls = list(event.media_urls) if event.media_urls else []
media_types = list(event.media_types) if event.media_types else []
_task = asyncio.create_task(
self._run_background_task(
prompt,
source,
task_id,
event_message_id=event_message_id,
media_urls=media_urls,
media_types=media_types,
)
)
self._background_tasks.add(_task)
_task.add_done_callback(self._background_tasks.discard)
preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
return t("gateway.background.started", preview=preview, task_id=task_id)
async def _run_background_task(
self,
prompt: str,
source: "SessionSource",
task_id: str,
event_message_id: Optional[str] = None,
media_urls: Optional[List[str]] = None,
media_types: Optional[List[str]] = None,
) -> None:
"""Execute a background agent task and deliver the result to the chat."""
from run_agent import AIAgent
media_urls = media_urls or []
media_types = media_types or []
adapter = self.adapters.get(source.platform)
if not adapter:
logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
return
_thread_metadata = self._thread_metadata_for_source(source, event_message_id)
try:
user_config = _load_gateway_config()
model, runtime_kwargs = self._resolve_session_agent_runtime(
source=source,
user_config=user_config,
)
if not runtime_kwargs.get("api_key"):
await adapter.send(
source.chat_id,
f"❌ Background task {task_id} failed: no provider credentials configured.",
metadata=_thread_metadata,
)
return
platform_key = _platform_config_key(source.platform)
from hermes_cli.tools_config import _get_platform_tools
enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
agent_cfg = user_config.get("agent") or {}
disabled_toolsets = agent_cfg.get("disabled_toolsets") or None
pr = self._provider_routing
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
reasoning_config = self._resolve_session_reasoning_config(source=source)
self._reasoning_config = reasoning_config
self._service_tier = self._load_service_tier()
turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)
enriched_prompt = prompt
if media_urls:
image_paths = []
for i, path in enumerate(media_urls):
mtype = media_types[i] if i < len(media_types) else ""
if mtype.startswith("image/"):
image_paths.append(path)
if image_paths:
try:
enriched_prompt = await self._enrich_message_with_vision(
prompt, image_paths,
)
except Exception as e:
logger.warning("Background task vision enrichment failed: %s", e)
def run_sync():
agent = AIAgent(
model=turn_route["model"],
**turn_route["runtime"],
max_iterations=max_iterations,
quiet_mode=True,
verbose_logging=False,
enabled_toolsets=enabled_toolsets,
disabled_toolsets=disabled_toolsets,
reasoning_config=reasoning_config,
service_tier=self._service_tier,
request_overrides=turn_route.get("request_overrides"),
providers_allowed=pr.get("only"),
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
provider_require_parameters=pr.get("require_parameters", False),
provider_data_collection=pr.get("data_collection"),
session_id=task_id,
platform=platform_key,
user_id=source.user_id,
user_name=source.user_name,
chat_id=source.chat_id,
chat_name=source.chat_name,
chat_type=source.chat_type,
thread_id=source.thread_id,
session_db=self._session_db,
fallback_model=self._fallback_model,
)
try:
return agent.run_conversation(
user_message=enriched_prompt,
task_id=task_id,
)
finally:
self._cleanup_agent_resources(agent)
result = await self._run_in_executor_with_context(run_sync)
response = result.get("final_response", "") if result else ""
if not response and result and result.get("error"):
response = f"Error: {result['error']}"
if response:
media_files, response = adapter.extract_media(response)
images, text_content = adapter.extract_images(response)
preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'
if text_content:
await adapter.send(
chat_id=source.chat_id,
content=header + text_content,
metadata=_thread_metadata,
)
elif not images and not media_files:
await adapter.send(
chat_id=source.chat_id,
content=header + "(No response generated)",
metadata=_thread_metadata,
)
for image_url, alt_text in (images or []):
try:
await adapter.send_image(
chat_id=source.chat_id,
image_url=image_url,
caption=alt_text,
metadata=_thread_metadata,
)
except Exception:
pass
for media_path, _is_voice in (media_files or []):
try:
await adapter.send_document(
chat_id=source.chat_id,
file_path=media_path,
metadata=_thread_metadata,
)
except Exception:
pass
else:
preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
await adapter.send(
chat_id=source.chat_id,
content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
metadata=_thread_metadata,
)
except Exception as e:
logger.exception("Background task %s failed", task_id)
try:
await adapter.send(
chat_id=source.chat_id,
content=f"❌ Background task {task_id} failed: {e}",
metadata=_thread_metadata,
)
except Exception:
pass
async def _handle_reasoning_command(self, event: MessageEvent) -> str:
"""Handle /reasoning command — manage reasoning effort and display toggle.
Usage:
/reasoning Show current effort level and display state
/reasoning <level> Set reasoning effort for this session only
/reasoning <level> --global Persist reasoning effort to config.yaml
/reasoning reset Clear this session's reasoning override
/reasoning show|on Show model reasoning in responses
/reasoning hide|off Hide model reasoning from responses
"""
import yaml
raw_args = event.get_command_args().strip()
args, persist_global = self._parse_reasoning_command_args(raw_args)
config_path = _hermes_home / "config.yaml"
session_key = self._session_key_for_source(event.source)
self._show_reasoning = self._load_show_reasoning()
self._reasoning_config = self._resolve_session_reasoning_config(
source=event.source,
session_key=session_key,
)
def _save_config_key(key_path: str, value):
"""Save a dot-separated key to config.yaml."""
try:
user_config = {}
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
user_config = yaml.safe_load(f) or {}
keys = key_path.split(".")
current = user_config
for k in keys[:-1]:
if k not in current or not isinstance(current[k], dict):
current[k] = {}
current = current[k]
current[keys[-1]] = value
atomic_yaml_write(config_path, user_config)
return True
except Exception as e:
logger.error("Failed to save config key %s: %s", key_path, e)
return False
if not raw_args:
rc = self._reasoning_config
if rc is None:
level = t("gateway.reasoning.level_default")
elif rc.get("enabled") is False:
level = t("gateway.reasoning.level_disabled")
else:
level = rc.get("effort", "medium")
display_state = (
t("gateway.reasoning.display_on")
if self._show_reasoning
else t("gateway.reasoning.display_off")
)
has_session_override = session_key in (getattr(self, "_session_reasoning_overrides", {}) or {})
scope = (
t("gateway.reasoning.scope_session")
if has_session_override
else t("gateway.reasoning.scope_global")
)
return t(
"gateway.reasoning.status",
level=level,
scope=scope,
display=display_state,
)
platform_key = _platform_config_key(event.source.platform)
if args in {"show", "on"}:
self._show_reasoning = True
_save_config_key(f"display.platforms.{platform_key}.show_reasoning", True)
return t("gateway.reasoning.display_set_on", platform=platform_key)
if args in {"hide", "off"}:
self._show_reasoning = False
_save_config_key(f"display.platforms.{platform_key}.show_reasoning", False)
return t("gateway.reasoning.display_set_off", platform=platform_key)
effort = args.strip()
if effort == "reset":
if persist_global:
return t("gateway.reasoning.reset_global_unsupported")
self._set_session_reasoning_override(session_key, None)
self._reasoning_config = self._load_reasoning_config()
self._evict_cached_agent(session_key)
return t("gateway.reasoning.reset_done")
if effort == "none":
parsed = {"enabled": False}
elif effort in {"minimal", "low", "medium", "high", "xhigh"}:
parsed = {"enabled": True, "effort": effort}
else:
return t(
"gateway.reasoning.unknown_arg",
arg=effort or raw_args.lower(),
)
self._reasoning_config = parsed
if persist_global:
if _save_config_key("agent.reasoning_effort", effort):
self._set_session_reasoning_override(session_key, None)
self._evict_cached_agent(session_key)
return t("gateway.reasoning.set_global", effort=effort)
self._set_session_reasoning_override(session_key, parsed)
self._evict_cached_agent(session_key)
return t("gateway.reasoning.set_global_save_failed", effort=effort)
self._set_session_reasoning_override(session_key, parsed)
self._evict_cached_agent(session_key)
return t("gateway.reasoning.set_session", effort=effort)
async def _handle_fast_command(self, event: MessageEvent) -> str:
"""Handle /fast — mirror the CLI Priority Processing toggle in gateway chats."""
import yaml
from hermes_cli.models import model_supports_fast_mode
args = event.get_command_args().strip().lower()
config_path = _hermes_home / "config.yaml"
self._service_tier = self._load_service_tier()
user_config = _load_gateway_config()
model = _resolve_gateway_model(user_config)
if not model_supports_fast_mode(model):
return t("gateway.fast.not_supported")
def _save_config_key(key_path: str, value):
"""Save a dot-separated key to config.yaml."""
try:
user_config = {}
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
user_config = yaml.safe_load(f) or {}
keys = key_path.split(".")
current = user_config
for k in keys[:-1]:
if k not in current or not isinstance(current[k], dict):
current[k] = {}
current = current[k]
current[keys[-1]] = value
atomic_yaml_write(config_path, user_config)
return True
except Exception as e:
logger.error("Failed to save config key %s: %s", key_path, e)
return False
if not args or args == "status":
status = t("gateway.fast.status_fast") if self._service_tier == "priority" else t("gateway.fast.status_normal")
return t("gateway.fast.status", mode=status)
if args in {"fast", "on"}:
self._service_tier = "priority"
saved_value = "fast"
label = t("gateway.fast.label_fast")
elif args in {"normal", "off"}:
self._service_tier = None
saved_value = "normal"
label = t("gateway.fast.label_normal")
else:
return t("gateway.fast.unknown_arg", arg=args)
if _save_config_key("agent.service_tier", saved_value):
return t("gateway.fast.saved", label=label)
return t("gateway.fast.session_only", label=label)
async def _handle_yolo_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
"""Handle /yolo — toggle dangerous command approval bypass for this session only."""
from tools.approval import (
disable_session_yolo,
enable_session_yolo,
is_session_yolo_enabled,
)
session_key = self._session_key_for_source(event.source)
current = is_session_yolo_enabled(session_key)
if current:
disable_session_yolo(session_key)
return EphemeralReply(t("gateway.yolo.disabled"))
else:
enable_session_yolo(session_key)
return EphemeralReply(t("gateway.yolo.enabled"))
async def _handle_verbose_command(self, event: MessageEvent) -> str:
"""Handle /verbose command — cycle tool progress display mode.
Gated by ``display.tool_progress_command`` in config.yaml (default off).
When enabled, cycles the tool progress mode through off → new → all →
verbose → off for the *current platform*. The setting is saved to
``display.platforms.<platform>.tool_progress`` so each channel can
have its own verbosity level independently.
"""
config_path = _hermes_home / "config.yaml"
platform_key = _platform_config_key(event.source.platform)
try:
user_config = _load_gateway_config()
gate_enabled = is_truthy_value(
cfg_get(user_config, "display", "tool_progress_command"),
default=False,
)
except Exception:
gate_enabled = False
if not gate_enabled:
return t("gateway.verbose.not_enabled")
cycle = ["off", "new", "all", "verbose"]
descriptions = {
"off": t("gateway.verbose.mode_off"),
"new": t("gateway.verbose.mode_new"),
"all": t("gateway.verbose.mode_all"),
"verbose": t("gateway.verbose.mode_verbose"),
}
from gateway.display_config import resolve_display_setting
current = resolve_display_setting(user_config, platform_key, "tool_progress", "all")
if current not in cycle:
current = "all"
idx = (cycle.index(current) + 1) % len(cycle)
new_mode = cycle[idx]
try:
if "display" not in user_config or not isinstance(user_config.get("display"), dict):
user_config["display"] = {}
display = user_config["display"]
if "platforms" not in display or not isinstance(display.get("platforms"), dict):
display["platforms"] = {}
if platform_key not in display["platforms"] or not isinstance(display["platforms"].get(platform_key), dict):
display["platforms"][platform_key] = {}
display["platforms"][platform_key]["tool_progress"] = new_mode
atomic_yaml_write(config_path, user_config)
return (
f"{descriptions[new_mode]}\n"
+ t("gateway.verbose.saved_suffix", platform=platform_key)
)
except Exception as e:
logger.warning("Failed to save tool_progress mode: %s", e)
return f"{descriptions[new_mode]}\n" + t("gateway.verbose.save_failed", error=e)
async def _handle_footer_command(self, event: MessageEvent) -> str:
"""Handle /footer command — toggle the runtime-metadata footer.
Usage:
/footer → toggle on/off
/footer on → enable globally
/footer off → disable globally
/footer status → show current state + fields
The footer is saved to ``display.runtime_footer.enabled`` (global).
Per-platform overrides under ``display.platforms.<platform>.runtime_footer``
are respected but not modified here — edit config.yaml directly for
per-platform control.
"""
from gateway.runtime_footer import resolve_footer_config
config_path = _hermes_home / "config.yaml"
platform_key = _platform_config_key(event.source.platform)
arg = ""
try:
text = (getattr(event, "message", None) or "").strip()
if text.startswith("/"):
parts = text.split(None, 1)
if len(parts) > 1:
arg = parts[1].strip().lower()
except Exception:
arg = ""
try:
user_config: dict = _load_gateway_config()
except Exception as e:
return t("gateway.config_read_failed", error=e)
effective = resolve_footer_config(user_config, platform_key)
if arg in {"status", "?"}:
state = t("gateway.footer.state_on") if effective["enabled"] else t("gateway.footer.state_off")
fields = ", ".join(effective.get("fields") or [])
return t(
"gateway.footer.status",
state=state,
fields=fields,
platform=platform_key,
)
if arg in {"on", "enable", "true", "1"}:
new_state = True
elif arg in {"off", "disable", "false", "0"}:
new_state = False
elif arg == "":
new_state = not effective["enabled"]
else:
return t("gateway.footer.usage")
try:
if not isinstance(user_config.get("display"), dict):
user_config["display"] = {}
display = user_config["display"]
if not isinstance(display.get("runtime_footer"), dict):
display["runtime_footer"] = {}
display["runtime_footer"]["enabled"] = new_state
atomic_yaml_write(config_path, user_config)
except Exception as e:
logger.warning("Failed to save runtime_footer.enabled: %s", e)
return t("gateway.config_save_failed", error=e)
state = t("gateway.footer.state_on") if new_state else t("gateway.footer.state_off")
example = ""
if new_state:
from gateway.runtime_footer import format_runtime_footer
preview = format_runtime_footer(
model=_resolve_gateway_model(user_config) or None,
context_tokens=0,
context_length=None,
fields=effective.get("fields") or ["model", "context_pct", "cwd"],
)
if preview:
example = t("gateway.footer.example_line", preview=preview)
return t("gateway.footer.saved", state=state, example=example)
async def _handle_compress_command(self, event: MessageEvent) -> str:
"""Handle /compress command -- manually compress conversation context.
Accepts an optional focus topic: ``/compress <focus>`` guides the
summariser to preserve information related to *focus* while being
more aggressive about discarding everything else.
"""
source = event.source
session_entry = self.session_store.get_or_create_session(source)
history = self.session_store.load_transcript(session_entry.session_id)
if not history or len(history) < 4:
return t("gateway.compress.not_enough")
focus_topic = (event.get_command_args() or "").strip() or None
try:
from run_agent import AIAgent
from agent.manual_compression_feedback import summarize_manual_compression
from agent.model_metadata import estimate_request_tokens_rough
session_key = self._session_key_for_source(source)
model, runtime_kwargs = self._resolve_session_agent_runtime(
source=source,
session_key=session_key,
)
if not runtime_kwargs.get("api_key"):
return t("gateway.compress.no_provider")
msgs = [
{"role": m.get("role"), "content": m.get("content")}
for m in history
if m.get("role") in {"user", "assistant"} and m.get("content")
]
tmp_agent = AIAgent(
**runtime_kwargs,
model=model,
max_iterations=4,
quiet_mode=True,
skip_memory=True,
enabled_toolsets=["memory"],
session_id=session_entry.session_id,
)
try:
tmp_agent._print_fn = lambda *a, **kw: None
_sys_prompt = getattr(tmp_agent, "_cached_system_prompt", "") or ""
_tools = getattr(tmp_agent, "tools", None) or None
approx_tokens = estimate_request_tokens_rough(
msgs, system_prompt=_sys_prompt, tools=_tools
)
compressor = tmp_agent.context_compressor
if not compressor.has_content_to_compress(msgs):
return t("gateway.compress.nothing_to_do")
loop = asyncio.get_running_loop()
compressed, _ = await loop.run_in_executor(
None,
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic, force=True)
)
new_session_id = tmp_agent.session_id
if new_session_id != session_entry.session_id:
session_entry.session_id = new_session_id
self.session_store._save()
self.session_store.rewrite_transcript(new_session_id, compressed)
self.session_store.update_session(
session_entry.session_key, last_prompt_tokens=0
)
new_tokens = estimate_request_tokens_rough(
compressed, system_prompt=_sys_prompt, tools=_tools
)
summary = summarize_manual_compression(
msgs,
compressed,
approx_tokens,
new_tokens,
)
_summary_aborted = bool(getattr(compressor, "_last_compress_aborted", False))
_summary_err = getattr(compressor, "_last_summary_error", None)
_aux_fail_model = getattr(compressor, "_last_aux_model_failure_model", None)
_aux_fail_err = getattr(compressor, "_last_aux_model_failure_error", None)
finally:
self._evict_cached_agent(session_key)
self._cleanup_agent_resources(tmp_agent)
lines = [f"🗜️ {summary['headline']}"]
if focus_topic:
lines.append(t("gateway.compress.focus_line", topic=focus_topic))
lines.append(summary["token_line"])
if summary["note"]:
lines.append(summary["note"])
if _summary_aborted:
lines.append(
t(
"gateway.compress.aborted",
error=(_summary_err or "unknown error"),
)
)
elif _aux_fail_model:
lines.append(
t(
"gateway.compress.aux_failed",
model=_aux_fail_model,
error=(_aux_fail_err or "unknown error"),
)
)
return "\n".join(lines)
except Exception as e:
logger.warning("Manual compress failed: %s", e)
return t("gateway.compress.failed", error=e)
async def _get_telegram_topic_capabilities(self, source: SessionSource) -> dict:
"""Read Telegram private-topic capability flags via Bot API getMe."""
adapter = self.adapters.get(source.platform) if getattr(self, "adapters", None) else None
bot = getattr(adapter, "_bot", None)
if bot is None or not hasattr(bot, "get_me"):
return {"checked": False}
try:
me = await bot.get_me()
except Exception:
logger.debug("Failed to fetch Telegram getMe topic capabilities", exc_info=True)
return {"checked": False}
def _field(name: str):
if hasattr(me, name):
return getattr(me, name)
api_kwargs = getattr(me, "api_kwargs", None)
if isinstance(api_kwargs, dict) and name in api_kwargs:
return api_kwargs.get(name)
if isinstance(me, dict):
return me.get(name)
return None
return {
"checked": True,
"has_topics_enabled": _field("has_topics_enabled"),
"allows_users_to_create_topics": _field("allows_users_to_create_topics"),
}
async def _ensure_telegram_system_topic(self, source: SessionSource) -> None:
"""Create/pin the managed System topic after /topic activation when possible."""
adapter = self.adapters.get(source.platform) if getattr(self, "adapters", None) else None
if adapter is None or not source.chat_id:
return
thread_id = None
create_topic = getattr(adapter, "_create_dm_topic", None)
if callable(create_topic):
try:
thread_id = await create_topic(int(source.chat_id), "System")
except Exception:
logger.debug("Failed to create Telegram System topic", exc_info=True)
if not thread_id:
return
message_id = None
try:
send_result = await adapter.send(
source.chat_id,
"System topic for Hermes commands and status.",
metadata={"thread_id": str(thread_id)},
)
message_id = getattr(send_result, "message_id", None)
except Exception:
logger.debug("Failed to send Telegram System topic intro", exc_info=True)
if not message_id:
return
bot = getattr(adapter, "_bot", None)
if bot is None or not hasattr(bot, "pin_chat_message"):
return
try:
await bot.pin_chat_message(
chat_id=int(source.chat_id),
message_id=int(message_id),
disable_notification=True,
)
except Exception:
logger.debug("Failed to pin Telegram System topic intro", exc_info=True)
async def _send_telegram_topic_setup_image(self, source: SessionSource) -> None:
"""Send the bundled BotFather Threads Settings screenshot when available."""
adapter = self.adapters.get(source.platform) if getattr(self, "adapters", None) else None
if adapter is None or not source.chat_id or not hasattr(adapter, "send_image_file"):
return
image_path = Path(__file__).resolve().parent / "assets" / "telegram-botfather-threads-settings.jpg"
if not image_path.exists():
return
try:
await adapter.send_image_file(
chat_id=source.chat_id,
image_path=str(image_path),
caption="BotFather → Bot Settings → Threads Settings",
metadata={"thread_id": str(source.thread_id)} if source.thread_id else None,
)
except Exception:
logger.debug("Failed to send Telegram topic setup image", exc_info=True)
def _sanitize_telegram_topic_title(self, title: str) -> str:
"""Return a Bot API-safe forum topic name from a generated session title."""
cleaned = re.sub(r"\s+", " ", str(title or "")).strip()
if not cleaned:
return "Hermes Chat"
if len(cleaned) > 120:
cleaned = cleaned[:117].rstrip() + "..."
return cleaned
async def _rename_telegram_topic_for_session_title(
self,
source: SessionSource,
session_id: str,
title: str,
) -> None:
"""Best-effort rename of a Telegram DM topic when Hermes auto-titles a session."""
if not self._is_telegram_topic_lane(source) or not source.chat_id or not source.thread_id:
return
if self._telegram_topic_auto_rename_disabled(source):
return
adapter = self.adapters.get(source.platform) if getattr(self, "adapters", None) else None
if adapter is not None:
get_info = getattr(type(adapter), "_get_dm_topic_info", None)
if callable(get_info):
try:
operator_topic = get_info(adapter, str(source.chat_id), str(source.thread_id))
except Exception:
operator_topic = None
if isinstance(operator_topic, dict):
return
session_db = getattr(self, "_session_db", None)
if session_db is not None:
try:
binding = session_db.get_telegram_topic_binding(
chat_id=str(source.chat_id),
thread_id=str(source.thread_id),
)
if binding and str(binding.get("session_id") or "") != str(session_id):
return
except Exception:
logger.debug("Failed to verify Telegram topic binding before rename", exc_info=True)
return
if adapter is None:
return
topic_name = self._sanitize_telegram_topic_title(title)
try:
rename_topic = getattr(adapter, "rename_dm_topic", None)
if rename_topic is not None:
await rename_topic(
chat_id=str(source.chat_id),
thread_id=str(source.thread_id),
name=topic_name,
)
return
bot = getattr(adapter, "_bot", None)
edit_forum_topic = getattr(bot, "edit_forum_topic", None) if bot is not None else None
if edit_forum_topic is None:
edit_forum_topic = getattr(bot, "editForumTopic", None) if bot is not None else None
if edit_forum_topic is None:
return
try:
await edit_forum_topic(
chat_id=int(source.chat_id),
message_thread_id=int(source.thread_id),
name=topic_name,
)
except (TypeError, ValueError):
await edit_forum_topic(
chat_id=source.chat_id,
message_thread_id=source.thread_id,
name=topic_name,
)
except Exception:
logger.debug("Failed to rename Telegram topic for auto-generated title", exc_info=True)
def _telegram_topic_auto_rename_disabled(self, source: SessionSource) -> bool:
"""Return True when operator disabled per-topic auto-rename for this Telegram chat.
Controlled via ``gateway.platforms.telegram.extra.disable_topic_auto_rename``.
Default is False (auto-rename enabled, preserves prior behaviour).
"""
platform_cfg = (
self.config.platforms.get(source.platform)
if getattr(self, "config", None) and getattr(self.config, "platforms", None)
else None
)
if platform_cfg is None:
return False
extra = getattr(platform_cfg, "extra", None) or {}
value = extra.get("disable_topic_auto_rename")
if value is None:
return False
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in {"1", "true", "yes", "on"}
return bool(value)
def _schedule_telegram_topic_title_rename(
self,
source: SessionSource,
session_id: str,
title: str,
) -> None:
"""Schedule a topic rename from the auto-title background thread."""
if not title or not self._is_telegram_topic_lane(source):
return
if self._telegram_topic_auto_rename_disabled(source):
return
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = getattr(self, "_gateway_loop", None)
if loop is None or loop.is_closed():
return
try:
copied_source = dataclasses.replace(source)
except Exception:
copied_source = source
future = safe_schedule_threadsafe(
self._rename_telegram_topic_for_session_title(copied_source, session_id, title),
loop,
logger=logger,
log_message="Telegram topic title rename failed to schedule",
)
if future is None:
return
def _log_rename_failure(fut) -> None:
try:
fut.result()
except Exception:
logger.debug("Telegram topic title rename failed", exc_info=True)
future.add_done_callback(_log_rename_failure)
_TELEGRAM_CAPABILITY_HINT_COOLDOWN_S = 300.0
def _should_send_telegram_capability_hint(self, source: SessionSource) -> bool:
"""Rate-limit the BotFather Threads Settings screenshot.
If a user sends /topic repeatedly while Threads Settings are still
off, we shouldn't keep re-uploading the screenshot every time.
"""
if not hasattr(self, "_telegram_capability_hint_ts"):
self._telegram_capability_hint_ts = {}
chat_id = str(source.chat_id or "")
if not chat_id:
return True
import time as _time
now = _time.monotonic()
last = self._telegram_capability_hint_ts.get(chat_id, 0.0)
if now - last < self._TELEGRAM_CAPABILITY_HINT_COOLDOWN_S:
return False
self._telegram_capability_hint_ts[chat_id] = now
return True
def _telegram_topic_help_text(self) -> str:
return (
"/topic — enable multi-session DM mode (one bot, many parallel chats)\n"
"\n"
"Usage:\n"
" /topic Enable topic mode, or show status if already on\n"
" /topic help Show this message\n"
" /topic off Disable topic mode and clear topic bindings\n"
" /topic <id> Inside a topic: restore a previous session by ID\n"
"\n"
"How it works:\n"
"1. Run /topic once in this DM — Hermes checks BotFather Threads\n"
" Settings are enabled and flips on multi-session mode.\n"
"2. Tap All Messages at the top of the bot and send any message.\n"
" Telegram creates a new topic for that message; each topic is\n"
" an independent Hermes session (fresh history, fresh context).\n"
"3. The root DM becomes a system lobby — send /topic, /status,\n"
" /help, /usage there. Normal prompts go in a topic.\n"
"4. /new inside a topic resets just that topic's session.\n"
"5. /topic <id> inside a topic restores an old session into it."
)
def _disable_telegram_topic_mode_for_chat(self, source: SessionSource) -> str:
"""Cleanly disable topic mode for a chat via /topic off."""
if not self._session_db:
from hermes_state import format_session_db_unavailable
return format_session_db_unavailable(prefix=t("gateway.shared.session_db_unavailable_prefix"))
chat_id = str(source.chat_id or "")
if not chat_id:
return "Could not determine chat ID."
try:
currently_enabled = self._session_db.is_telegram_topic_mode_enabled(
chat_id=chat_id,
user_id=str(source.user_id or ""),
)
except Exception:
currently_enabled = False
if not currently_enabled:
return "Multi-session topic mode is not currently enabled for this chat."
try:
self._session_db.disable_telegram_topic_mode(chat_id=chat_id)
except Exception as exc:
logger.exception("Failed to disable Telegram topic mode")
return f"Failed to disable topic mode: {exc}"
for attr in ("_telegram_lobby_reminder_ts", "_telegram_capability_hint_ts"):
store = getattr(self, attr, None)
if isinstance(store, dict):
store.pop(chat_id, None)
return (
"Multi-session topic mode is now OFF for this chat.\n\n"
"Existing topics in Telegram aren't removed — they'll just stop "
"being gated as independent sessions. The root DM works as a "
"normal Hermes chat again. Run /topic to re-enable later."
)
async def _handle_topic_command(self, event: MessageEvent, args: str = "") -> str:
"""Handle /topic for Telegram DM user-managed topic sessions."""
source = event.source
if source.platform != Platform.TELEGRAM or source.chat_type != "dm":
return t("gateway.topic.not_telegram_dm")
if not self._session_db:
from hermes_state import format_session_db_unavailable
return format_session_db_unavailable(prefix=t("gateway.shared.session_db_unavailable_prefix"))
auth_fn = getattr(self, "_is_user_authorized", None)
if callable(auth_fn):
try:
if not auth_fn(source):
return t("gateway.topic.unauthorized")
except Exception:
logger.debug("Topic auth check failed", exc_info=True)
args = event.get_command_args().strip()
if args.lower() in {"help", "?", "-h", "--help"}:
return self._telegram_topic_help_text()
if args.lower() in {"off", "disable", "stop"}:
return self._disable_telegram_topic_mode_for_chat(source)
if args:
if not source.thread_id:
return t("gateway.topic.restore_needs_topic")
return await self._restore_telegram_topic_session(event, args)
capabilities = await self._get_telegram_topic_capabilities(source)
if capabilities.get("checked"):
if capabilities.get("has_topics_enabled") is False:
if self._should_send_telegram_capability_hint(source):
await self._send_telegram_topic_setup_image(source)
return t("gateway.topic.topics_disabled")
if capabilities.get("allows_users_to_create_topics") is False:
if self._should_send_telegram_capability_hint(source):
await self._send_telegram_topic_setup_image(source)
return t("gateway.topic.topics_user_disallowed")
try:
self._session_db.enable_telegram_topic_mode(
chat_id=str(source.chat_id),
user_id=str(source.user_id),
has_topics_enabled=capabilities.get("has_topics_enabled"),
allows_users_to_create_topics=capabilities.get("allows_users_to_create_topics"),
)
except Exception as exc:
logger.exception("Failed to enable Telegram topic mode")
return t("gateway.topic.enable_failed", error=exc)
if not source.thread_id:
await self._ensure_telegram_system_topic(source)
if source.thread_id:
try:
binding = self._session_db.get_telegram_topic_binding(
chat_id=str(source.chat_id),
thread_id=str(source.thread_id),
)
except Exception:
logger.debug("Failed to read Telegram topic binding", exc_info=True)
binding = None
if binding:
session_id = str(binding.get("session_id") or "")
title = None
try:
title = self._session_db.get_session_title(session_id)
except Exception:
title = None
session_label = title or t("gateway.topic.untitled_session")
return t(
"gateway.topic.bound_status",
label=session_label,
session_id=session_id,
)
return t("gateway.topic.thread_ready")
return self._telegram_topic_root_status_message(source)
def _telegram_topic_root_status_message(self, source: SessionSource) -> str:
lines = [
"Telegram multi-session topics are enabled.",
"",
"To create a new Hermes chat, open All Messages at the top of this "
"bot interface and send any message there. Telegram will create a "
"new topic for it.",
"",
]
try:
sessions = self._session_db.list_unlinked_telegram_sessions_for_user(
chat_id=str(source.chat_id),
user_id=str(source.user_id),
limit=10,
)
except Exception:
logger.debug("Failed to list unlinked Telegram sessions", exc_info=True)
sessions = []
if sessions:
lines.append("Previous unlinked sessions:")
for session in sessions:
session_id = str(session.get("id") or "")
title = str(session.get("title") or "Untitled session")
preview = str(session.get("preview") or "").strip()
line = f"- {title} — `{session_id}`"
if preview:
line += f" — {preview}"
lines.append(line)
lines.extend([
"",
"To restore one:",
"1. Create or open a topic. To create a new one, open All Messages and send any message there.",
"2. Send /topic <session-id> inside that topic.",
f"Example: Send /topic {sessions[0].get('id')} inside a topic.",
])
else:
lines.extend([
"No previous unlinked Telegram sessions found.",
"",
"To restore a previous session later:",
"1. Create or open a topic. To create a new one, open All Messages and send any message there.",
"2. Send /topic <session-id> inside that topic.",
])
return "\n".join(lines)
async def _restore_telegram_topic_session(self, event: MessageEvent, raw_session_id: str) -> str:
"""Restore an existing Telegram-owned Hermes session into this topic."""
source = event.source
session_id = self._session_db.resolve_session_id(raw_session_id.strip())
if not session_id:
return f"Session not found: {raw_session_id.strip()}"
session = self._session_db.get_session(session_id)
if not session:
return f"Session not found: {raw_session_id.strip()}"
if str(session.get("source") or "") != "telegram":
return "That session is not a Telegram session and cannot be restored into this topic."
if str(session.get("user_id") or "") != str(source.user_id):
return "That session does not belong to this Telegram user."
linked = self._session_db.is_telegram_session_linked_to_topic(session_id=session_id)
current_binding = self._session_db.get_telegram_topic_binding(
chat_id=str(source.chat_id),
thread_id=str(source.thread_id),
)
if linked:
if not current_binding or current_binding.get("session_id") != session_id:
return "That session is already linked to another Telegram topic."
session_key = self._session_key_for_source(source)
try:
self._session_db.bind_telegram_topic(
chat_id=str(source.chat_id),
thread_id=str(source.thread_id),
user_id=str(source.user_id),
session_key=session_key,
session_id=session_id,
managed_mode="restored",
)
except ValueError as exc:
if "already linked" in str(exc):
return "That session is already linked to another Telegram topic."
raise
title = self._session_db.get_session_title(session_id) or session_id
last_assistant = None
try:
for message in reversed(self._session_db.get_messages(session_id)):
if message.get("role") == "assistant" and message.get("content"):
last_assistant = str(message.get("content"))
break
except Exception:
last_assistant = None
response = f"Session restored: {title}"
if last_assistant:
response += f"\n\nLast Hermes message:\n{last_assistant}"
return response
async def _handle_title_command(self, event: MessageEvent) -> str:
"""Handle /title command — set or show the current session's title."""
source = event.source
session_entry = self.session_store.get_or_create_session(source)
session_id = session_entry.session_id
if not self._session_db:
from hermes_state import format_session_db_unavailable
return format_session_db_unavailable(prefix=t("gateway.shared.session_db_unavailable_prefix"))
existing_title = self._session_db.get_session_title(session_id)
if existing_title is None:
try:
self._session_db.create_session(
session_id=session_id,
source=source.platform.value if source.platform else "unknown",
user_id=source.user_id,
)
except Exception:
pass
title_arg = event.get_command_args().strip()
if title_arg:
try:
sanitized = self._session_db.sanitize_title(title_arg)
except ValueError as e:
return t("gateway.shared.warn_passthrough", error=e)
if not sanitized:
return t("gateway.title.empty_after_clean")
try:
if self._session_db.set_session_title(session_id, sanitized):
return t("gateway.title.set_to", title=sanitized)
else:
return t("gateway.title.not_found")
except ValueError as e:
return t("gateway.shared.warn_passthrough", error=e)
else:
title = self._session_db.get_session_title(session_id)
if title:
return t("gateway.title.current_with_title", session_id=session_id, title=title)
else:
return t("gateway.title.current_no_title", session_id=session_id)
async def _handle_resume_command(self, event: MessageEvent) -> str:
"""Handle /resume command — switch to a previously-named session."""
if not self._session_db:
from hermes_state import format_session_db_unavailable
return format_session_db_unavailable(prefix=t("gateway.shared.session_db_unavailable_prefix"))
source = event.source
session_key = self._session_key_for_source(source)
name = event.get_command_args().strip()
if not name:
try:
user_source = source.platform.value if source.platform else None
sessions = self._session_db.list_sessions_rich(
source=user_source, limit=10
)
titled = [s for s in sessions if s.get("title")]
if not titled:
return t("gateway.resume.no_named_sessions")
lines = [t("gateway.resume.list_header")]
for s in titled[:10]:
title = s["title"]
preview = s.get("preview", "")[:40]
preview_part = t("gateway.resume.list_preview_suffix", preview=preview) if preview else ""
lines.append(t("gateway.resume.list_item", title=title, preview_part=preview_part))
lines.append(t("gateway.resume.list_footer"))
return "\n".join(lines)
except Exception as e:
logger.debug("Failed to list titled sessions: %s", e)
return t("gateway.resume.list_failed", error=e)
target_id = self._session_db.resolve_session_by_title(name)
if not target_id:
return t("gateway.resume.not_found", name=name)
try:
target_id = self._session_db.resolve_resume_session_id(target_id)
except Exception as e:
logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e)
current_entry = self.session_store.get_or_create_session(source)
if current_entry.session_id == target_id:
return t("gateway.resume.already_on", name=name)
self._release_running_agent_state(session_key)
new_entry = self.session_store.switch_session(session_key, target_id)
if not new_entry:
return t("gateway.resume.switch_failed")
self._clear_session_boundary_security_state(session_key)
self._evict_cached_agent(session_key)
title = self._session_db.get_session_title(target_id) or name
history = self.session_store.load_transcript(target_id)
msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
if not msg_count:
return t("gateway.resume.resumed_no_count", title=title)
if msg_count == 1:
return t("gateway.resume.resumed_one", title=title, count=msg_count)
return t("gateway.resume.resumed_many", title=title, count=msg_count)
async def _handle_branch_command(self, event: MessageEvent) -> str:
"""Handle /branch [name] — fork the current session into a new independent copy.
Copies conversation history to a new session so the user can explore
a different approach without losing the original.
Inspired by Claude Code's /branch command.
"""
import uuid as _uuid
if not self._session_db:
from hermes_state import format_session_db_unavailable
return format_session_db_unavailable(prefix=t("gateway.shared.session_db_unavailable_prefix"))
source = event.source
session_key = self._session_key_for_source(source)
current_entry = self.session_store.get_or_create_session(source)
history = self.session_store.load_transcript(current_entry.session_id)
if not history:
return t("gateway.branch.no_conversation")
branch_name = event.get_command_args().strip()
from datetime import datetime as _dt
now = _dt.now()
timestamp_str = now.strftime("%Y%m%d_%H%M%S")
short_uuid = _uuid.uuid4().hex[:6]
new_session_id = f"{timestamp_str}_{short_uuid}"
if branch_name:
branch_title = branch_name
else:
current_title = self._session_db.get_session_title(current_entry.session_id)
base = current_title or "branch"
branch_title = self._session_db.get_next_title_in_lineage(base)
parent_session_id = current_entry.session_id
try:
self._session_db.create_session(
session_id=new_session_id,
source=source.platform.value if source.platform else "gateway",
model=(self.config.get("model", {}) or {}).get("default") if isinstance(self.config, dict) else None,
parent_session_id=parent_session_id,
)
except Exception as e:
logger.error("Failed to create branch session: %s", e)
return t("gateway.branch.create_failed", error=e)
for msg in history:
try:
self._session_db.append_message(
session_id=new_session_id,
role=msg.get("role", "user"),
content=msg.get("content"),
tool_name=msg.get("tool_name") or msg.get("name"),
tool_calls=msg.get("tool_calls"),
tool_call_id=msg.get("tool_call_id"),
finish_reason=msg.get("finish_reason"),
reasoning=msg.get("reasoning"),
reasoning_content=msg.get("reasoning_content"),
reasoning_details=msg.get("reasoning_details"),
codex_reasoning_items=msg.get("codex_reasoning_items"),
codex_message_items=msg.get("codex_message_items"),
)
except Exception:
pass
try:
self._session_db.set_session_title(new_session_id, branch_title)
except Exception:
pass
new_entry = self.session_store.switch_session(session_key, new_session_id)
if not new_entry:
return t("gateway.branch.switch_failed")
self._clear_session_boundary_security_state(session_key)
self._evict_cached_agent(session_key)
msg_count = len([m for m in history if m.get("role") == "user"])
key = "gateway.branch.branched_one" if msg_count == 1 else "gateway.branch.branched_many"
return t(key, title=branch_title, count=msg_count, parent=parent_session_id, new=new_session_id)
async def _handle_usage_command(self, event: MessageEvent) -> str:
"""Handle /usage command -- show token usage for the current session.
Checks both _running_agents (mid-turn) and _agent_cache (between turns)
so that rate limits, cost estimates, and detailed token breakdowns are
available whenever the user asks, not only while the agent is running.
"""
source = event.source
session_key = self._session_key_for_source(source)
agent = self._running_agents.get(session_key)
if not agent or agent is _AGENT_PENDING_SENTINEL:
_cache_lock = getattr(self, "_agent_cache_lock", None)
_cache = getattr(self, "_agent_cache", None)
if _cache_lock and _cache is not None:
with _cache_lock:
cached = _cache.get(session_key)
if cached:
agent = cached[0]
provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
if not provider and getattr(self, "_session_db", None) is not None:
try:
_entry_for_billing = self.session_store.get_or_create_session(source)
persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
except Exception:
persisted = {}
provider = provider or persisted.get("billing_provider")
base_url = base_url or persisted.get("billing_base_url")
account_lines: list[str] = []
if provider:
try:
account_snapshot = await asyncio.to_thread(
fetch_account_usage,
provider,
base_url=base_url,
api_key=api_key,
)
except Exception:
account_snapshot = None
if account_snapshot:
account_lines = render_account_usage_lines(account_snapshot, markdown=True)
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
lines = []
rl_state = agent.get_rate_limit_state()
if rl_state and rl_state.has_data:
from agent.rate_limit_tracker import format_rate_limit_compact
lines.append(t("gateway.usage.rate_limits", state=format_rate_limit_compact(rl_state)))
lines.append("")
input_tokens = getattr(agent, "session_input_tokens", 0) or 0
output_tokens = getattr(agent, "session_output_tokens", 0) or 0
cache_read = getattr(agent, "session_cache_read_tokens", 0) or 0
cache_write = getattr(agent, "session_cache_write_tokens", 0) or 0
lines.append(t("gateway.usage.header_session"))
lines.append(t("gateway.usage.label_model", model=agent.model))
lines.append(t("gateway.usage.label_input_tokens", count=f"{input_tokens:,}"))
if cache_read:
lines.append(t("gateway.usage.label_cache_read", count=f"{cache_read:,}"))
if cache_write:
lines.append(t("gateway.usage.label_cache_write", count=f"{cache_write:,}"))
lines.append(t("gateway.usage.label_output_tokens", count=f"{output_tokens:,}"))
lines.append(t("gateway.usage.label_total", count=f"{agent.session_total_tokens:,}"))
lines.append(t("gateway.usage.label_api_calls", count=agent.session_api_calls))
try:
from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
cost_result = estimate_usage_cost(
agent.model,
CanonicalUsage(
input_tokens=input_tokens,
output_tokens=output_tokens,
cache_read_tokens=cache_read,
cache_write_tokens=cache_write,
),
provider=getattr(agent, "provider", None),
base_url=getattr(agent, "base_url", None),
)
if cost_result.amount_usd is not None:
prefix = "~" if cost_result.status == "estimated" else ""
lines.append(t("gateway.usage.label_cost", prefix=prefix, amount=f"{float(cost_result.amount_usd):.4f}"))
elif cost_result.status == "included":
lines.append(t("gateway.usage.label_cost_included"))
except Exception:
pass
ctx = agent.context_compressor
if ctx.last_prompt_tokens:
pct = min(100, ctx.last_prompt_tokens / ctx.context_length * 100) if ctx.context_length else 0
lines.append(t("gateway.usage.label_context", used=f"{ctx.last_prompt_tokens:,}", total=f"{ctx.context_length:,}", pct=f"{pct:.0f}"))
if ctx.compression_count:
lines.append(t("gateway.usage.label_compressions", count=ctx.compression_count))
if account_lines:
lines.append("")
lines.extend(account_lines)
return "\n".join(lines)
session_entry = self.session_store.get_or_create_session(source)
history = self.session_store.load_transcript(session_entry.session_id)
if history:
from agent.model_metadata import estimate_messages_tokens_rough
msgs = [m for m in history if m.get("role") in {"user", "assistant"} and m.get("content")]
approx = estimate_messages_tokens_rough(msgs)
lines = [
t("gateway.usage.header_session_info"),
t("gateway.usage.label_messages", count=len(msgs)),
t("gateway.usage.label_estimated_context", count=f"{approx:,}"),
t("gateway.usage.detailed_after_first"),
]
if account_lines:
lines.append("")
lines.extend(account_lines)
return "\n".join(lines)
if account_lines:
return "\n".join(account_lines)
return t("gateway.usage.no_data")
async def _handle_insights_command(self, event: MessageEvent) -> str:
"""Handle /insights command -- show usage insights and analytics."""
args = event.get_command_args().strip()
args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
days = 30
source = None
if args:
parts = args.split()
i = 0
while i < len(parts):
if parts[i] == "--days" and i + 1 < len(parts):
try:
days = int(parts[i + 1])
except ValueError:
return t("gateway.insights.invalid_days", value=parts[i + 1])
i += 2
elif parts[i] == "--source" and i + 1 < len(parts):
source = parts[i + 1]
i += 2
elif parts[i].isdigit():
days = int(parts[i])
i += 1
else:
i += 1
try:
from hermes_state import SessionDB
from agent.insights import InsightsEngine
loop = asyncio.get_running_loop()
def _run_insights():
db = SessionDB()
engine = InsightsEngine(db)
report = engine.generate(days=days, source=source)
result = engine.format_gateway(report)
db.close()
return result
return await loop.run_in_executor(None, _run_insights)
except Exception as e:
logger.error("Insights command error: %s", e, exc_info=True)
return t("gateway.insights.error", error=e)
async def _handle_reload_mcp_command(self, event: MessageEvent) -> Optional[str]:
"""Handle /reload-mcp — reconnect MCP servers and rebuild the cached agent.
Reloading MCP tools invalidates the provider prompt cache for the
active session (tool schemas are baked into the system prompt). The
next message re-sends full input tokens, which is expensive on
long-context or high-reasoning models.
To surface that cost, the command routes through the slash-confirm
primitive: users get an Approve Once / Always Approve / Cancel
prompt before the reload actually runs. "Always Approve" persists
``approvals.mcp_reload_confirm: false`` so the prompt is silenced
for subsequent reloads in any session.
Users can also skip the confirm by flipping the config key directly.
"""
source = event.source
session_key = self._session_key_for_source(source)
user_config = self._read_user_config()
approvals = user_config.get("approvals") if isinstance(user_config, dict) else None
confirm_required = True
if isinstance(approvals, dict):
confirm_required = bool(approvals.get("mcp_reload_confirm", True))
if not confirm_required:
return await self._execute_mcp_reload(event)
async def _on_confirm(choice: str) -> Optional[str]:
if choice == "cancel":
return t("gateway.reload_mcp.cancelled")
if choice == "always":
try:
from cli import save_config_value
save_config_value("approvals.mcp_reload_confirm", False)
logger.info(
"User opted out of /reload-mcp confirmation (session=%s)",
session_key,
)
except Exception as exc:
logger.warning("Failed to persist mcp_reload_confirm=false: %s", exc)
result = await self._execute_mcp_reload(event)
if choice == "always":
return f"{result}\n\n" + t("gateway.reload_mcp.always_followup")
return result
prompt_message = t("gateway.reload_mcp.confirm_prompt")
return await self._request_slash_confirm(
event=event,
command="reload-mcp",
title="/reload-mcp",
message=prompt_message,
handler=_on_confirm,
)
async def _execute_mcp_reload(self, event: MessageEvent) -> str:
"""Actually disconnect, reconnect, and notify MCP tool changes.
Split out from ``_handle_reload_mcp_command`` so the confirmation
wrapper can invoke the same path whether the user confirmed via
button, text reply, or has the confirm gate disabled.
"""
loop = asyncio.get_running_loop()
try:
from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock
with _lock:
old_servers = set(_servers.keys())
await loop.run_in_executor(None, shutdown_mcp_servers)
new_tools = await loop.run_in_executor(None, discover_mcp_tools)
with _lock:
connected_servers = set(_servers.keys())
added = connected_servers - old_servers
removed = old_servers - connected_servers
reconnected = connected_servers & old_servers
lines = [t("gateway.reload_mcp.header")]
if reconnected:
lines.append(t("gateway.reload_mcp.reconnected", names=", ".join(sorted(reconnected))))
if added:
lines.append(t("gateway.reload_mcp.added", names=", ".join(sorted(added))))
if removed:
lines.append(t("gateway.reload_mcp.removed", names=", ".join(sorted(removed))))
if not connected_servers:
lines.append(t("gateway.reload_mcp.none_connected"))
else:
lines.append(t("gateway.reload_mcp.tools_available", tools=len(new_tools), servers=len(connected_servers)))
change_parts = []
if added:
change_parts.append(f"Added servers: {', '.join(sorted(added))}")
if removed:
change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
if reconnected:
change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
change_detail = ". ".join(change_parts) + ". " if change_parts else ""
reload_msg = {
"role": "user",
"content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
}
try:
session_entry = self.session_store.get_or_create_session(event.source)
self.session_store.append_to_transcript(
session_entry.session_id, reload_msg
)
except Exception:
pass
return "\n".join(lines)
except Exception as e:
logger.warning("MCP reload failed: %s", e)
return t("gateway.reload_mcp.failed", error=e)
async def _handle_reload_skills_command(self, event: MessageEvent) -> str:
"""Handle /reload-skills — rescan skills dir, queue a note for next turn.
Skills don't need to be in the system prompt for the model to use
them (they're invoked via ``/skill-name``, ``skills_list``, or
``skill_view`` at runtime), so this does NOT clear the prompt cache
— prefix caching stays intact.
If any skills were added or removed, a one-shot note is queued on
``self._pending_skills_reload_notes[session_key]``. The gateway
prepends it to the NEXT user message in this session (see the
consumer at ~L11025 in ``_run_agent_turn``), then clears it. Nothing
is written to the session transcript out-of-band, so message
alternation is preserved.
"""
loop = asyncio.get_running_loop()
try:
from agent.skill_commands import reload_skills
result = await loop.run_in_executor(None, reload_skills)
added = result.get("added", [])
removed = result.get("removed", [])
total = result.get("total", 0)
for adapter in list(self.adapters.values()):
refresh = getattr(adapter, "refresh_skill_group", None)
if not callable(refresh):
continue
try:
maybe = refresh()
if inspect.isawaitable(maybe):
await maybe
except Exception as exc:
logger.warning(
"Adapter %s refresh_skill_group raised: %s",
getattr(adapter, "name", adapter), exc,
)
lines = [t("gateway.reload_skills.header")]
if not added and not removed:
lines.append(t("gateway.reload_skills.no_new"))
lines.append(t("gateway.reload_skills.total", count=total))
return "\n".join(lines)
def _fmt_line(item: dict) -> str:
nm = item.get("name", "")
desc = item.get("description", "")
if desc:
return t("gateway.reload_skills.item_with_desc", name=nm, desc=desc)
return t("gateway.reload_skills.item_no_desc", name=nm)
if added:
lines.append(t("gateway.reload_skills.added_header"))
for item in added:
lines.append(_fmt_line(item))
if removed:
lines.append(t("gateway.reload_skills.removed_header"))
for item in removed:
lines.append(_fmt_line(item))
lines.append(t("gateway.reload_skills.total", count=total))
sections = ["[USER INITIATED SKILLS RELOAD:"]
if added:
sections.append("")
sections.append("Added Skills:")
for item in added:
sections.append(_fmt_line(item))
if removed:
sections.append("")
sections.append("Removed Skills:")
for item in removed:
sections.append(_fmt_line(item))
sections.append("")
sections.append("Use skills_list to see the updated catalog.]")
note = "\n".join(sections)
session_key = self._session_key_for_source(event.source)
if not hasattr(self, "_pending_skills_reload_notes"):
self._pending_skills_reload_notes = {}
if session_key:
self._pending_skills_reload_notes[session_key] = note
return "\n".join(lines)
except Exception as e:
logger.warning("Skills reload failed: %s", e)
return t("gateway.reload_skills.failed", error=e)
async def _handle_bundles_command(self, event: MessageEvent) -> str:
"""Handle /bundles — list installed skill bundles.
Mirrors the CLI ``/bundles`` handler. Returns a single text
message suitable for any gateway adapter; bundles are loaded by
invoking the bundle's own ``/<slug>`` command, not by this one.
"""
try:
from agent.skill_bundles import list_bundles, _bundles_dir
except Exception as exc:
logger.warning("Bundles command unavailable: %s", exc)
return f"Bundles subsystem unavailable: {exc}"
bundles = list_bundles()
if not bundles:
return (
"No skill bundles installed.\n"
"Create one on the host with:\n"
" `hermes bundles create <name> --skill <s1> --skill <s2>`\n"
f"Directory: `{_bundles_dir()}`"
)
lines = [f"**Skill Bundles** ({len(bundles)} installed):", ""]
for info in bundles:
skill_count = len(info.get("skills", []))
desc = info.get("description") or f"Load {skill_count} skills"
lines.append(
f"• `/{info['slug']}` — {desc} _({skill_count} skills)_"
)
for s in info.get("skills", []):
lines.append(f" · {s}")
lines.append("")
lines.append("Invoke a bundle with `/<slug>` to load all its skills.")
return "\n".join(lines)
async def _maybe_confirm_destructive_slash(
self,
*,
event: MessageEvent,
command: str,
title: str,
detail: str,
execute,
) -> Union[str, "EphemeralReply", None]:
"""Gate a destructive session slash command (/new, /reset, /undo).
``execute`` is an async callable ``execute() -> str | EphemeralReply``
that performs the destructive action. If the
``approvals.destructive_slash_confirm`` config gate is off, ``execute``
runs immediately (returning its result). Otherwise this routes
through ``_request_slash_confirm`` — native yes/no buttons on
Telegram/Discord/Slack, text fallback elsewhere.
Three-option resolution:
- ``once`` — run ``execute`` and return its result
- ``always`` — persist ``approvals.destructive_slash_confirm: false``,
then run ``execute``
- ``cancel`` — return a "cancelled" message; do not run ``execute``
"""
confirm_required = True
try:
cfg = self._read_user_config()
approvals = cfg.get("approvals") if isinstance(cfg, dict) else None
if isinstance(approvals, dict):
confirm_required = bool(approvals.get("destructive_slash_confirm", True))
except Exception:
pass
if not confirm_required:
return await execute()
session_key = self._session_key_for_source(event.source)
async def _on_confirm(choice: str):
if choice == "cancel":
return f"🟡 /{command} cancelled. Conversation unchanged."
if choice == "always":
try:
from cli import save_config_value
save_config_value("approvals.destructive_slash_confirm", False)
logger.info(
"User opted out of destructive slash confirm (session=%s)",
session_key,
)
except Exception as exc:
logger.warning(
"Failed to persist destructive_slash_confirm=false: %s", exc,
)
result = await execute()
if choice == "always":
note = (
"\n\nℹ️ Future /clear, /new, /reset, and /undo will run "
"without confirmation. Re-enable via "
"`approvals.destructive_slash_confirm: true` in config.yaml."
)
if isinstance(result, str):
return result + note
return result
return result
prompt_message = (
f"⚠️ **Confirm /{command}**\n\n"
f"{detail}\n\n"
"Choose:\n"
"• **Approve Once** — proceed this time only\n"
"• **Always Approve** — proceed and silence this prompt permanently\n"
"• **Cancel** — keep current conversation\n\n"
"_Text fallback: reply `/approve`, `/always`, or `/cancel`._"
)
return await self._request_slash_confirm(
event=event,
command=command,
title=title,
message=prompt_message,
handler=_on_confirm,
)
async def _request_slash_confirm(
self,
*,
event: MessageEvent,
command: str,
title: str,
message: str,
handler,
) -> Optional[str]:
"""Ask the user to confirm an expensive slash command.
``handler`` is an async callable ``handler(choice: str) -> str``
where ``choice`` is ``"once"``, ``"always"``, or ``"cancel"``.
The handler runs on the event loop when the user responds; its
return value is sent back as a gateway message.
Returns a short acknowledgment string to send immediately (before
the user's response). If buttons rendered successfully the ack
is ``None`` (buttons are self-explanatory); if we fell back to
text the message itself IS the ack.
"""
from tools import slash_confirm as _slash_confirm_mod
source = event.source
session_key = self._session_key_for_source(source)
counter = getattr(self, "_slash_confirm_counter", None)
if counter is None:
import itertools as _itertools
counter = _itertools.count(1)
self._slash_confirm_counter = counter
confirm_id = f"{next(counter)}"
_slash_confirm_mod.register(session_key, confirm_id, command, handler)
adapter = self.adapters.get(source.platform)
metadata = self._thread_metadata_for_source(source, self._reply_anchor_for_event(event))
used_buttons = False
if adapter is not None:
try:
button_result = await adapter.send_slash_confirm(
chat_id=source.chat_id,
title=title,
message=message,
session_key=session_key,
confirm_id=confirm_id,
metadata=metadata,
)
if button_result and getattr(button_result, "success", False):
used_buttons = True
except Exception as exc:
logger.debug(
"send_slash_confirm failed for %s on %s: %s",
command, source.platform, exc,
)
if used_buttons:
return None
return message
def _read_user_config(self) -> Dict[str, Any]:
"""Read the user's raw config.yaml (cached) for gate lookups.
Used by slash-confirm gates that must reflect on-disk state changes
(e.g. a prior "Always Approve" click) without a gateway restart.
"""
try:
from hermes_cli.config import load_config
cfg = load_config()
return cfg if isinstance(cfg, dict) else {}
except Exception:
return {}
def _thread_metadata_for_source(
self,
source,
reply_to_message_id: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
"""Build the metadata dict platforms need for thread-aware replies."""
thread_id = getattr(source, "thread_id", None)
if thread_id is None:
return None
metadata: Dict[str, Any] = {"thread_id": thread_id}
if (
getattr(source, "platform", None) == Platform.TELEGRAM
and getattr(source, "chat_type", None) == "dm"
):
metadata["telegram_dm_topic_reply_fallback"] = True
tid = str(thread_id)
if tid and tid not in {"", "1"}:
metadata["direct_messages_topic_id"] = tid
anchor = reply_to_message_id or getattr(source, "message_id", None)
if anchor is not None:
metadata["telegram_reply_to_message_id"] = str(anchor)
return metadata
@staticmethod
def _reply_anchor_for_event(event: MessageEvent) -> Optional[str]:
"""Return the platform-specific reply anchor for GatewayRunner sends."""
return _reply_anchor_for_event(event)
_APPROVAL_TIMEOUT_SECONDS = 300
async def _handle_approve_command(self, event: MessageEvent) -> Optional[str]:
"""Handle /approve command — unblock waiting agent thread(s).
The agent thread(s) are blocked inside tools/approval.py waiting for
the user to respond. This handler signals the event so the agent
resumes and the terminal_tool executes the command inline — the same
flow as the CLI's synchronous input() approval.
Supports multiple concurrent approvals (parallel subagents,
execute_code). ``/approve`` resolves the oldest pending command;
``/approve all`` resolves every pending command at once.
Usage:
/approve — approve oldest pending command once
/approve all — approve ALL pending commands at once
/approve session — approve oldest + remember for session
/approve all session — approve all + remember for session
/approve always — approve oldest + remember permanently
/approve all always — approve all + remember permanently
"""
source = event.source
session_key = self._session_key_for_source(source)
from tools.approval import (
resolve_gateway_approval, has_blocking_approval,
)
if not has_blocking_approval(session_key):
if session_key in self._pending_approvals:
self._pending_approvals.pop(session_key)
return t("gateway.approval_expired")
return t("gateway.approve.no_pending")
args = event.get_command_args().strip().lower().split()
resolve_all = "all" in args
remaining = [a for a in args if a != "all"]
if any(a in {"always", "permanent", "permanently"} for a in remaining):
choice = "always"
elif any(a in {"session", "ses"} for a in remaining):
choice = "session"
else:
choice = "once"
count = resolve_gateway_approval(session_key, choice, resolve_all=resolve_all)
if not count:
return t("gateway.approve.no_pending")
_adapter = self.adapters.get(source.platform)
if _adapter:
_adapter.resume_typing_for_chat(source.chat_id)
logger.info("User approved %d dangerous command(s) via /approve (%s)", count, choice)
plural = "plural" if count > 1 else "singular"
return t(f"gateway.approve.{choice}_{plural}", count=count)
async def _handle_deny_command(self, event: MessageEvent) -> str:
"""Handle /deny command — reject pending dangerous command(s).
Signals blocked agent thread(s) with a 'deny' result so they receive
a definitive BLOCKED message, same as the CLI deny flow.
``/deny`` denies the oldest; ``/deny all`` denies everything.
"""
source = event.source
session_key = self._session_key_for_source(source)
from tools.approval import (
resolve_gateway_approval, has_blocking_approval,
)
if not has_blocking_approval(session_key):
if session_key in self._pending_approvals:
self._pending_approvals.pop(session_key)
return t("gateway.deny.stale")
return t("gateway.deny.no_pending")
args = event.get_command_args().strip().lower()
resolve_all = "all" in args
count = resolve_gateway_approval(session_key, "deny", resolve_all=resolve_all)
if not count:
return t("gateway.deny.no_pending")
_adapter = self.adapters.get(source.platform)
if _adapter:
_adapter.resume_typing_for_chat(source.chat_id)
logger.info("User denied %d dangerous command(s) via /deny", count)
if count > 1:
return t("gateway.deny.denied_plural", count=count)
return t("gateway.deny.denied_singular")
_UPDATE_ALLOWED_PLATFORMS = frozenset({
Platform.TELEGRAM, Platform.DISCORD, Platform.SLACK, Platform.WHATSAPP,
Platform.SIGNAL, Platform.MATTERMOST, Platform.MATRIX,
Platform.HOMEASSISTANT, Platform.EMAIL, Platform.SMS, Platform.DINGTALK,
Platform.FEISHU, Platform.WECOM, Platform.WECOM_CALLBACK, Platform.WEIXIN, Platform.BLUEBUBBLES, Platform.QQBOT, Platform.LOCAL,
})
async def _handle_debug_command(self, event: MessageEvent) -> str:
"""Handle /debug — upload debug report (summary only) and return paste URLs.
Gateway uploads ONLY the summary report (system info + log tails),
NOT full log files, to protect conversation privacy. Users who need
full log uploads should use ``hermes debug share`` from the CLI.
"""
import asyncio
from hermes_cli.debug import (
_capture_dump, collect_debug_report,
upload_to_pastebin, _schedule_auto_delete,
_GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
)
loop = asyncio.get_running_loop()
def _collect_and_upload():
_best_effort_sweep_expired_pastes()
dump_text = _capture_dump()
report = collect_debug_report(log_lines=200, dump_text=dump_text)
urls = {}
try:
urls["Report"] = upload_to_pastebin(report)
except Exception as exc:
return t("gateway.debug.upload_failed", error=exc)
_schedule_auto_delete(list(urls.values()))
lines = [_GATEWAY_PRIVACY_NOTICE, "", t("gateway.debug.header"), ""]
label_width = max(len(k) for k in urls)
for label, url in urls.items():
lines.append(f"`{label:<{label_width}}` {url}")
lines.append("")
lines.append(t("gateway.debug.auto_delete"))
lines.append(t("gateway.debug.full_logs_hint"))
lines.append(t("gateway.debug.share_hint"))
return "\n".join(lines)
return await loop.run_in_executor(None, _collect_and_upload)
async def _handle_update_command(self, event: MessageEvent) -> str:
"""Handle /update command — update Hermes Agent to the latest version.
Spawns ``hermes update`` in a detached session (via ``setsid``) so it
survives the gateway restart that ``hermes update`` may trigger. Marker
files are written so either the current gateway process or the next one
can notify the user when the update finishes.
"""
import json
import shutil
import subprocess
from datetime import datetime
from hermes_cli.config import is_managed, format_managed_message
platform = event.source.platform
_allowed = self._UPDATE_ALLOWED_PLATFORMS
if platform not in _allowed:
try:
from gateway.platform_registry import platform_registry
entry = platform_registry.get(platform.value)
if not entry or not entry.allow_update_command:
return t("gateway.update.platform_not_messaging")
except Exception:
return t("gateway.update.platform_not_messaging")
if is_managed():
return f"✗ {format_managed_message('update Hermes Agent')}"
project_root = Path(__file__).parent.parent.resolve()
git_dir = project_root / '.git'
if not git_dir.exists():
return t("gateway.update.not_git_repo")
hermes_cmd = _resolve_hermes_bin()
if not hermes_cmd:
return t("gateway.update.hermes_cmd_not_found")
pending_path = _hermes_home / ".update_pending.json"
output_path = _hermes_home / ".update_output.txt"
exit_code_path = _hermes_home / ".update_exit_code"
session_key = self._session_key_for_source(event.source)
pending = {
"platform": event.source.platform.value,
"chat_id": event.source.chat_id,
"user_id": event.source.user_id,
"session_key": session_key,
"timestamp": datetime.now().isoformat(),
}
if event.source.thread_id:
pending["thread_id"] = event.source.thread_id
_tmp_pending = pending_path.with_suffix(".tmp")
_tmp_pending.write_text(json.dumps(pending))
_tmp_pending.replace(pending_path)
exit_code_path.unlink(missing_ok=True)
try:
if sys.platform == "win32":
import textwrap
from hermes_cli._subprocess_compat import windows_detach_popen_kwargs
helper = textwrap.dedent(
"""
import os, subprocess, sys
output_path = sys.argv[1]
exit_code_path = sys.argv[2]
cmd = sys.argv[3:]
env = dict(os.environ)
env["PYTHONUNBUFFERED"] = "1"
with open(output_path, "wb") as f:
proc = subprocess.Popen(cmd, stdout=f, stderr=subprocess.STDOUT, env=env)
rc = proc.wait()
with open(exit_code_path, "w") as f:
f.write(str(rc))
"""
).strip()
subprocess.Popen(
[
sys.executable, "-c", helper,
str(output_path), str(exit_code_path),
*hermes_cmd, "update", "--gateway",
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
**windows_detach_popen_kwargs(),
)
else:
hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
update_cmd = (
f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
f" > {shlex.quote(str(output_path))} 2>&1; "
f"rc=$?; printf '%s' \"$rc\" > {shlex.quote(str(exit_code_path))}"
)
setsid_bin = shutil.which("setsid")
if setsid_bin:
subprocess.Popen(
[setsid_bin, "bash", "-c", update_cmd],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
else:
subprocess.Popen(
["bash", "-c", update_cmd],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
except Exception as e:
pending_path.unlink(missing_ok=True)
exit_code_path.unlink(missing_ok=True)
return t("gateway.update.start_failed", error=e)
self._schedule_update_notification_watch()
return t("gateway.update.starting")
def _schedule_update_notification_watch(self) -> None:
"""Ensure a background task is watching for update completion."""
existing_task = getattr(self, "_update_notification_task", None)
if existing_task and not existing_task.done():
return
try:
self._update_notification_task = asyncio.create_task(
self._watch_update_progress()
)
except RuntimeError:
logger.debug("Skipping update notification watcher: no running event loop")
async def _watch_update_progress(
self,
poll_interval: float = 2.0,
stream_interval: float = 4.0,
timeout: float = 1800.0,
) -> None:
"""Watch ``hermes update --gateway``, streaming output + forwarding prompts.
Polls ``.update_output.txt`` for new content and sends chunks to the
user periodically. Detects ``.update_prompt.json`` (written by the
update process when it needs user input) and forwards the prompt to
the messenger. The user's next message is intercepted by
``_handle_message`` and written to ``.update_response``.
"""
pending_path = _hermes_home / ".update_pending.json"
claimed_path = _hermes_home / ".update_pending.claimed.json"
output_path = _hermes_home / ".update_output.txt"
exit_code_path = _hermes_home / ".update_exit_code"
prompt_path = _hermes_home / ".update_prompt.json"
loop = asyncio.get_running_loop()
deadline = loop.time() + timeout
adapter = None
chat_id = None
session_key = None
metadata = None
for path in (claimed_path, pending_path):
if path.exists():
try:
pending = json.loads(path.read_text())
platform_str = pending.get("platform")
chat_id = pending.get("chat_id")
session_key = pending.get("session_key")
thread_id = pending.get("thread_id")
metadata = {"thread_id": thread_id} if thread_id else None
if platform_str and chat_id:
platform = Platform(platform_str)
adapter = self.adapters.get(platform)
if not session_key:
session_key = f"{platform_str}:{chat_id}"
break
except Exception:
pass
if not adapter or not chat_id:
logger.warning("Update watcher: cannot resolve adapter/chat_id, falling back to completion-only")
while (pending_path.exists() or claimed_path.exists()) and loop.time() < deadline:
if exit_code_path.exists():
await self._send_update_notification()
return
await asyncio.sleep(poll_interval)
if (pending_path.exists() or claimed_path.exists()) and not exit_code_path.exists():
exit_code_path.write_text("124")
await self._send_update_notification()
return
def _strip_ansi(text: str) -> str:
return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
bytes_sent = 0
last_stream_time = loop.time()
buffer = ""
async def _flush_buffer() -> None:
"""Send buffered output to the user."""
nonlocal buffer, last_stream_time
if not buffer.strip():
buffer = ""
return
clean = _strip_ansi(buffer).strip()
buffer = ""
last_stream_time = loop.time()
if not clean:
return
max_chunk = 3500
chunks = [clean[i:i + max_chunk] for i in range(0, len(clean), max_chunk)]
for chunk in chunks:
try:
await adapter.send(chat_id, f"```\n{chunk}\n```", metadata=metadata)
except Exception as e:
logger.debug("Update stream send failed: %s", e)
while loop.time() < deadline:
if exit_code_path.exists():
if output_path.exists():
try:
content = output_path.read_text()
if len(content) > bytes_sent:
buffer += content[bytes_sent:]
bytes_sent = len(content)
except OSError:
pass
await _flush_buffer()
try:
exit_code_raw = exit_code_path.read_text().strip() or "1"
exit_code = int(exit_code_raw)
if exit_code == 0:
await adapter.send(chat_id, "✅ Hermes update finished.", metadata=metadata)
else:
await adapter.send(
chat_id,
"❌ Hermes update failed (exit code {}).".format(exit_code),
metadata=metadata,
)
logger.info("Update finished (exit=%s), notified %s", exit_code, session_key)
except Exception as e:
logger.warning("Update final notification failed: %s", e)
for p in (pending_path, claimed_path, output_path,
exit_code_path, prompt_path):
p.unlink(missing_ok=True)
(_hermes_home / ".update_response").unlink(missing_ok=True)
self._update_prompt_pending.pop(session_key, None)
return
if output_path.exists():
try:
content = output_path.read_text()
if len(content) > bytes_sent:
buffer += content[bytes_sent:]
bytes_sent = len(content)
except OSError:
pass
if buffer.strip() and (loop.time() - last_stream_time) >= stream_interval:
await _flush_buffer()
if (prompt_path.exists() and session_key
and not self._update_prompt_pending.get(session_key)):
try:
prompt_data = json.loads(prompt_path.read_text())
prompt_text = prompt_data.get("prompt", "")
default = prompt_data.get("default", "")
if prompt_text:
await _flush_buffer()
sent_buttons = False
if getattr(type(adapter), "send_update_prompt", None) is not None:
try:
await adapter.send_update_prompt(
chat_id=chat_id,
prompt=prompt_text,
default=default,
session_key=session_key,
metadata=metadata,
)
sent_buttons = True
except Exception as btn_err:
logger.debug("Button-based update prompt failed: %s", btn_err)
if not sent_buttons:
default_hint = f" (default: {default})" if default else ""
await adapter.send(
chat_id,
f"⚕ **Update needs your input:**\n\n"
f"{prompt_text}{default_hint}\n\n"
f"Reply `/approve` (yes) or `/deny` (no), "
f"or type your answer directly.",
metadata=metadata,
)
self._update_prompt_pending[session_key] = True
logger.info("Forwarded update prompt to %s: %s", session_key, prompt_text[:80])
except (json.JSONDecodeError, OSError) as e:
logger.debug("Failed to read update prompt: %s", e)
await asyncio.sleep(poll_interval)
if not exit_code_path.exists():
logger.warning("Update watcher timed out after %.0fs", timeout)
exit_code_path.write_text("124")
await _flush_buffer()
try:
await adapter.send(
chat_id,
"❌ Hermes update timed out after 30 minutes.",
metadata=metadata,
)
except Exception:
pass
for p in (pending_path, claimed_path, output_path,
exit_code_path, prompt_path):
p.unlink(missing_ok=True)
(_hermes_home / ".update_response").unlink(missing_ok=True)
self._update_prompt_pending.pop(session_key, None)
async def _send_update_notification(self) -> bool:
"""If an update finished, notify the user.
Returns False when the update is still running so a caller can retry
later. Returns True after a definitive send/skip decision.
This is the legacy notification path used when the streaming watcher
cannot resolve the adapter (e.g. after a gateway restart where the
platform hasn't reconnected yet).
"""
pending_path = _hermes_home / ".update_pending.json"
claimed_path = _hermes_home / ".update_pending.claimed.json"
output_path = _hermes_home / ".update_output.txt"
exit_code_path = _hermes_home / ".update_exit_code"
if not pending_path.exists() and not claimed_path.exists():
return False
cleanup = True
active_pending_path = claimed_path
try:
if pending_path.exists():
try:
pending_path.replace(claimed_path)
except FileNotFoundError:
if not claimed_path.exists():
return True
elif not claimed_path.exists():
return True
pending = json.loads(claimed_path.read_text())
platform_str = pending.get("platform")
chat_id = pending.get("chat_id")
thread_id = pending.get("thread_id")
if not exit_code_path.exists():
logger.info("Update notification deferred: update still running")
cleanup = False
active_pending_path = pending_path
claimed_path.replace(pending_path)
return False
exit_code_raw = exit_code_path.read_text().strip() or "1"
exit_code = int(exit_code_raw)
output = ""
if output_path.exists():
output = output_path.read_text()
platform = Platform(platform_str)
adapter = self.adapters.get(platform)
if adapter and chat_id:
metadata = {"thread_id": thread_id} if thread_id else None
output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
if output:
if len(output) > 3500:
output = "…" + output[-3500:]
if exit_code == 0:
msg = f"✅ Hermes update finished.\n\n```\n{output}\n```"
else:
msg = f"❌ Hermes update failed.\n\n```\n{output}\n```"
elif exit_code == 0:
msg = "✅ Hermes update finished successfully."
else:
msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
await adapter.send(chat_id, msg, metadata=metadata)
logger.info(
"Sent post-update notification to %s:%s (exit=%s)",
platform_str,
chat_id,
exit_code,
)
except Exception as e:
logger.warning("Post-update notification failed: %s", e)
finally:
if cleanup:
active_pending_path.unlink(missing_ok=True)
claimed_path.unlink(missing_ok=True)
output_path.unlink(missing_ok=True)
exit_code_path.unlink(missing_ok=True)
return True
async def _send_restart_notification(self) -> Optional[tuple[str, str, Optional[str]]]:
"""Notify the chat that initiated /restart that the gateway is back."""
notify_path = _hermes_home / ".restart_notify.json"
if not notify_path.exists():
return None
try:
data = json.loads(notify_path.read_text())
platform_str = data.get("platform")
chat_id = data.get("chat_id")
thread_id = data.get("thread_id")
if not platform_str or not chat_id:
return None
platform = Platform(platform_str)
adapter = self.adapters.get(platform)
if not adapter:
logger.debug(
"Restart notification skipped: %s adapter not connected",
platform_str,
)
return None
platform_cfg = self.config.platforms.get(platform)
if platform_cfg is not None and not platform_cfg.gateway_restart_notification:
logger.info(
"Restart notification suppressed: %s has gateway_restart_notification=false",
platform_str,
)
return None
metadata = {"thread_id": thread_id} if thread_id else None
result = await adapter.send(
str(chat_id),
"♻ Gateway restarted successfully. Your session continues.",
metadata=metadata,
)
if result is not None and getattr(result, "success", True) is False:
logger.warning(
"Restart notification to %s:%s was not delivered: %s",
platform_str,
chat_id,
getattr(result, "error", "send returned success=False"),
)
return None
logger.info(
"Sent restart notification to %s:%s",
platform_str,
chat_id,
)
return str(platform_str), str(chat_id), str(thread_id) if thread_id else None
except Exception as e:
logger.warning("Restart notification failed: %s", e)
return None
finally:
notify_path.unlink(missing_ok=True)
async def _send_home_channel_startup_notifications(
self,
*,
skip_targets: Optional[set[tuple[str, str, Optional[str]]]] = None,
) -> set[tuple[str, str, Optional[str]]]:
"""Notify configured home channels that the gateway is back online.
The notification is best-effort and sent once per connected platform
home channel. ``skip_targets`` lets startup avoid duplicate messages
when a more specific restart notification is queued for the same chat.
"""
delivered: set[tuple[str, str, Optional[str]]] = set()
skipped = skip_targets or set()
message = "♻️ Gateway online — Hermes is back and ready."
for platform, adapter in self.adapters.items():
home = self.config.get_home_channel(platform)
if not home or not home.chat_id:
continue
platform_cfg = self.config.platforms.get(platform)
if platform_cfg is not None and not platform_cfg.gateway_restart_notification:
logger.info(
"Home-channel startup notification suppressed: %s has gateway_restart_notification=false",
platform.value,
)
continue
target = (platform.value, str(home.chat_id), str(home.thread_id) if home.thread_id else None)
if target in skipped or target in delivered:
continue
try:
metadata = {"thread_id": home.thread_id} if home.thread_id else None
if metadata:
result = await adapter.send(str(home.chat_id), message, metadata=metadata)
else:
result = await adapter.send(str(home.chat_id), message)
if result is not None and getattr(result, "success", True) is False:
logger.warning(
"Home-channel startup notification failed for %s:%s: %s",
platform.value,
home.chat_id,
getattr(result, "error", "send returned success=False"),
)
continue
delivered.add(target)
logger.info(
"Sent home-channel startup notification to %s:%s",
platform.value,
home.chat_id,
)
except Exception as exc:
logger.warning(
"Home-channel startup notification failed for %s:%s: %s",
platform.value,
home.chat_id,
exc,
)
return delivered
def _set_session_env(self, context: SessionContext) -> list:
"""Set session context variables for the current async task.
Uses ``contextvars`` instead of ``os.environ`` so that concurrent
gateway messages cannot overwrite each other's session state.
Returns a list of reset tokens; pass them to ``_clear_session_env``
in a ``finally`` block.
"""
from gateway.session_context import set_session_vars
return set_session_vars(
platform=context.source.platform.value,
chat_id=context.source.chat_id,
chat_name=context.source.chat_name or "",
thread_id=str(context.source.thread_id) if context.source.thread_id else "",
user_id=str(context.source.user_id) if context.source.user_id else "",
user_name=str(context.source.user_name) if context.source.user_name else "",
session_key=context.session_key,
message_id=str(context.source.message_id) if context.source.message_id else "",
)
def _clear_session_env(self, tokens: list) -> None:
"""Restore session context variables to their pre-handler values."""
from gateway.session_context import clear_session_vars
clear_session_vars(tokens)
async def _run_in_executor_with_context(self, func, *args):
"""Run blocking work in the thread pool while preserving session contextvars."""
loop = asyncio.get_running_loop()
ctx = copy_context()
return await loop.run_in_executor(None, ctx.run, func, *args)
def _decide_image_input_mode(self) -> str:
"""Resolve the image-input routing for the currently active model.
Returns ``"native"`` (attach pixels on the user turn) or ``"text"``
(pre-analyze with vision_analyze and prepend the description). See
agent/image_routing.py for the full decision table.
The active provider/model are read from config.yaml so the decision
tracks ``/model`` switches automatically on the next message.
"""
try:
from agent.image_routing import decide_image_input_mode
from agent.auxiliary_client import _read_main_model, _read_main_provider
from hermes_cli.config import load_config
cfg = load_config()
provider = _read_main_provider()
model = _read_main_model()
return decide_image_input_mode(provider, model, cfg)
except Exception as exc:
logger.debug("image_routing: decision failed, falling back to text — %s", exc)
return "text"
async def _enrich_message_with_vision(
self,
user_text: str,
image_paths: List[str],
) -> str:
"""
Auto-analyze user-attached images with the vision tool and prepend
the descriptions to the message text.
Each image is analyzed with a general-purpose prompt. The resulting
description *and* the local cache path are injected so the model can:
1. Immediately understand what the user sent (no extra tool call).
2. Re-examine the image with vision_analyze if it needs more detail.
Args:
user_text: The user's original caption / message text.
image_paths: List of local file paths to cached images.
Returns:
The enriched message string with vision descriptions prepended.
"""
from tools.vision_tools import vision_analyze_tool
from agent.memory_manager import sanitize_context
analysis_prompt = (
"Describe everything visible in this image in thorough detail. "
"Include any text, code, data, objects, people, layout, colors, "
"and any other notable visual information."
)
enriched_parts = []
for path in image_paths:
try:
logger.debug("Auto-analyzing user image: %s", path)
result_json = await vision_analyze_tool(
image_url=path,
user_prompt=analysis_prompt,
)
result = json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "")
description = sanitize_context(description)
enriched_parts.append(
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
f"[If you need a closer look, use vision_analyze with "
f"image_url: {path} ~]"
)
else:
enriched_parts.append(
"[The user sent an image but I couldn't quite see it "
"this time (>_<) You can try looking at it yourself "
f"with vision_analyze using image_url: {path}]"
)
except Exception as e:
logger.error("Vision auto-analysis error: %s", e)
enriched_parts.append(
f"[The user sent an image but something went wrong when I "
f"tried to look at it~ You can try examining it yourself "
f"with vision_analyze using image_url: {path}]"
)
if enriched_parts:
prefix = "\n\n".join(enriched_parts)
if user_text:
return f"{prefix}\n\n{user_text}"
return prefix
return user_text
async def _enrich_message_with_transcription(
self,
user_text: str,
audio_paths: List[str],
) -> str:
"""
Auto-transcribe user voice/audio messages using the configured STT provider
and prepend the transcript to the message text.
Args:
user_text: The user's original caption / message text.
audio_paths: List of local file paths to cached audio files.
Returns:
The enriched message string with transcriptions prepended.
"""
if not getattr(self.config, "stt_enabled", True):
notes = []
for path in audio_paths:
abs_path = os.path.abspath(path)
duration_str = await _probe_audio_duration(abs_path)
if duration_str:
notes.append(
f"[The user sent a voice message: {abs_path} (duration: {duration_str})]"
)
else:
notes.append(f"[The user sent a voice message: {abs_path}]")
if not notes:
return user_text
prefix = "\n\n".join(notes)
_placeholder = "(The user sent a message with no text content)"
if user_text and user_text.strip() == _placeholder:
return prefix
if user_text:
return f"{prefix}\n\n{user_text}"
return prefix
from tools.transcription_tools import transcribe_audio
enriched_parts = []
for path in audio_paths:
try:
logger.debug("Transcribing user voice: %s", path)
result = await asyncio.to_thread(transcribe_audio, path)
if result["success"]:
transcript = result["transcript"]
enriched_parts.append(
f'[The user sent a voice message~ '
f'Here\'s what they said: "{transcript}"]'
)
else:
error = result.get("error", "unknown error")
if (
"No STT provider" in error
or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
):
_no_stt_note = (
"[The user sent a voice message but I can't listen "
"to it right now — no STT provider is configured. "
"A direct message has already been sent to the user "
"with setup instructions."
)
if self._has_setup_skill():
_no_stt_note += (
" You have a skill called hermes-agent-setup "
"that can help users configure Hermes features "
"including voice, tools, and more."
)
_no_stt_note += "]"
enriched_parts.append(_no_stt_note)
else:
enriched_parts.append(
"[The user sent a voice message but I had trouble "
f"transcribing it~ ({error})]"
)
except Exception as e:
logger.error("Transcription error: %s", e)
enriched_parts.append(
"[The user sent a voice message but something went wrong "
"when I tried to listen to it~ Let them know!]"
)
if enriched_parts:
prefix = "\n\n".join(enriched_parts)
_placeholder = "(The user sent a message with no text content)"
if user_text and user_text.strip() == _placeholder:
return prefix
if user_text:
return f"{prefix}\n\n{user_text}"
return prefix
return user_text
def _build_process_event_source(self, evt: dict):
"""Resolve the canonical source for a synthetic background-process event.
Prefer the persisted session-store origin for the event's session key.
Falling back to the currently active foreground event is what causes
cross-topic bleed, so don't do that.
"""
from gateway.session import SessionSource
session_key = str(evt.get("session_key") or "").strip()
derived_platform = ""
derived_chat_type = ""
derived_chat_id = ""
if session_key:
try:
self.session_store._ensure_loaded()
entry = self.session_store._entries.get(session_key)
if entry and getattr(entry, "origin", None):
return entry.origin
except Exception as exc:
logger.debug(
"Synthetic process-event session-store lookup failed for %s: %s",
session_key,
exc,
)
cached_source = self._get_cached_session_source(session_key)
if cached_source is not None:
return cached_source
_parsed = _parse_session_key(session_key)
if _parsed:
derived_platform = _parsed["platform"]
derived_chat_type = _parsed["chat_type"]
derived_chat_id = _parsed["chat_id"]
platform_name = str(evt.get("platform") or derived_platform or "").strip().lower()
chat_type = str(evt.get("chat_type") or derived_chat_type or "").strip().lower()
chat_id = str(evt.get("chat_id") or derived_chat_id or "").strip()
if not platform_name or not chat_type or not chat_id:
return None
try:
platform = Platform(platform_name)
if platform.value not in _BUILTIN_PLATFORM_VALUES:
try:
from gateway.platform_registry import platform_registry
if not platform_registry.is_registered(platform.value):
raise ValueError(platform_name)
except Exception:
raise ValueError(platform_name)
except Exception:
logger.warning(
"Synthetic process event has invalid platform metadata: %r",
platform_name,
)
return None
return SessionSource(
platform=platform,
chat_id=chat_id,
chat_type=chat_type,
thread_id=str(evt.get("thread_id") or "").strip() or None,
user_id=str(evt.get("user_id") or "").strip() or None,
user_name=str(evt.get("user_name") or "").strip() or None,
)
async def _inject_watch_notification(self, synth_text: str, evt: dict) -> None:
"""Inject a watch-pattern notification as a synthetic message event.
Routing must come from the queued watch event itself, not from whatever
foreground message happened to be active when the queue was drained.
"""
source = self._build_process_event_source(evt)
if not source:
logger.warning(
"Dropping watch notification with no routing metadata for process %s",
evt.get("session_id", "unknown"),
)
return
platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
adapter = None
for p, a in self.adapters.items():
if p.value == platform_name:
adapter = a
break
if not adapter:
return
try:
synth_event = MessageEvent(
text=synth_text,
message_type=MessageType.TEXT,
source=source,
internal=True,
message_id=str(evt.get("message_id") or "").strip() or None,
)
logger.info(
"Watch pattern notification — injecting for %s chat=%s thread=%s",
platform_name,
source.chat_id,
source.thread_id,
)
await adapter.handle_message(synth_event)
except Exception as e:
logger.error("Watch notification injection error: %s", e)
async def _run_process_watcher(self, watcher: dict) -> None:
"""
Periodically check a background process and push updates to the user.
Runs as an asyncio task. Stays silent when nothing changed.
Auto-removes when the process exits or is killed.
Notification mode (from ``display.background_process_notifications``):
- ``all`` — running-output updates + final message
- ``result`` — final completion message only
- ``error`` — final message only when exit code != 0
- ``off`` — no messages at all
"""
from tools.process_registry import process_registry
session_id = watcher["session_id"]
interval = watcher["check_interval"]
session_key = watcher.get("session_key", "")
platform_name = watcher.get("platform", "")
chat_id = watcher.get("chat_id", "")
thread_id = watcher.get("thread_id", "")
user_id = watcher.get("user_id", "")
user_name = watcher.get("user_name", "")
message_id = str(watcher.get("message_id") or "").strip() or None
agent_notify = watcher.get("notify_on_complete", False)
notify_mode = self._load_background_notifications_mode()
logger.debug("Process watcher started: %s (every %ss, notify=%s, agent_notify=%s)",
session_id, interval, notify_mode, agent_notify)
if notify_mode == "off" and not agent_notify:
while True:
await asyncio.sleep(interval)
session = process_registry.get(session_id)
if session is None or session.exited:
break
logger.debug("Process watcher ended (silent): %s", session_id)
return
last_output_len = 0
while True:
await asyncio.sleep(interval)
session = process_registry.get(session_id)
if session is None:
break
current_output_len = len(session.output_buffer)
has_new_output = current_output_len > last_output_len
last_output_len = current_output_len
if session.exited:
from tools.process_registry import process_registry as _pr_check
if agent_notify and not _pr_check.is_completion_consumed(session_id):
from tools.ansi_strip import strip_ansi
_raw = strip_ansi(session.output_buffer) if session.output_buffer else ""
_LIMIT = 2000
if len(_raw) > _LIMIT:
_tail = _raw[-_LIMIT:]
_nl = _tail.find("\n")
_tail = _tail[_nl + 1:] if _nl != -1 else _tail
_out = f"[… output truncated — showing last {len(_tail)} chars]\n{_tail}"
else:
_out = _raw
synth_text = (
f"[IMPORTANT: Background process {session_id} completed "
f"(exit code {session.exit_code}).\n"
f"Command: {session.command}\n"
f"Output:\n{_out}]"
)
source = self._build_process_event_source({
"session_id": session_id,
"session_key": session_key,
"platform": platform_name,
"chat_id": chat_id,
"thread_id": thread_id,
"user_id": user_id,
"user_name": user_name,
})
if not source:
logger.warning(
"Dropping completion notification with no routing metadata for process %s",
session_id,
)
break
adapter = None
for p, a in self.adapters.items():
if p == source.platform:
adapter = a
break
if adapter and source.chat_id:
try:
synth_event = MessageEvent(
text=synth_text,
message_type=MessageType.TEXT,
source=source,
internal=True,
message_id=message_id,
)
logger.info(
"Process %s finished — injecting agent notification for session %s chat=%s thread=%s",
session_id,
session_key,
source.chat_id,
source.thread_id,
)
await adapter.handle_message(synth_event)
except Exception as e:
logger.error("Agent notify injection error: %s", e)
break
should_notify = (
notify_mode in {"all", "result"}
or (notify_mode == "error" and session.exit_code not in {0, None})
)
if should_notify:
new_output = session.output_buffer[-1000:] if session.output_buffer else ""
message_text = (
f"[Background process {session_id} finished with exit code {session.exit_code}~ "
f"Here's the final output:\n{new_output}]"
)
adapter = None
for p, a in self.adapters.items():
if p.value == platform_name:
adapter = a
break
if adapter and chat_id:
try:
send_meta = {"thread_id": thread_id} if thread_id else None
await adapter.send(chat_id, message_text, metadata=send_meta)
except Exception as e:
logger.error("Watcher delivery error: %s", e)
break
elif has_new_output and notify_mode == "all" and not agent_notify:
new_output = session.output_buffer[-500:] if session.output_buffer else ""
message_text = (
f"[Background process {session_id} is still running~ "
f"New output:\n{new_output}]"
)
adapter = None
for p, a in self.adapters.items():
if p.value == platform_name:
adapter = a
break
if adapter and chat_id:
try:
send_meta = {"thread_id": thread_id} if thread_id else None
await adapter.send(chat_id, message_text, metadata=send_meta)
except Exception as e:
logger.error("Watcher delivery error: %s", e)
logger.debug("Process watcher ended: %s", session_id)
_MAX_INTERRUPT_DEPTH = 3
_CACHE_BUSTING_CONFIG_KEYS: tuple = (
("model", "context_length"),
("model", "max_tokens"),
("compression", "enabled"),
("compression", "threshold"),
("compression", "target_ratio"),
("compression", "protect_last_n"),
("agent", "disabled_toolsets"),
)
@classmethod
def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
"""Pull values that must bust the cached agent.
Returns a flat dict keyed by 'section.key'. Missing config keys and
non-dict sections yield None values, which still contribute to the
signature (so 'absent' vs 'present-and-null' differ).
The live tool registry generation is included too. MCP reloads and
dynamic MCP tool-list changes mutate the registry without necessarily
changing config.yaml. Cached AIAgent instances freeze their tool
schemas at construction time, so a registry generation change must
rebuild the agent before the next turn.
"""
out: Dict[str, Any] = {}
cfg = user_config if isinstance(user_config, dict) else {}
for section, key in cls._CACHE_BUSTING_CONFIG_KEYS:
section_val = cfg.get(section)
if isinstance(section_val, dict):
out[f"{section}.{key}"] = section_val.get(key)
else:
out[f"{section}.{key}"] = None
try:
from tools.registry import registry
out["tools.registry_generation"] = getattr(registry, "_generation", None)
except Exception:
out["tools.registry_generation"] = None
return out
@staticmethod
def _agent_config_signature(
model: str,
runtime: dict,
enabled_toolsets: list,
ephemeral_prompt: str,
cache_keys: dict | None = None,
) -> str:
"""Compute a stable string key from agent config values.
When this signature changes between messages, the cached AIAgent is
discarded and rebuilt. When it stays the same, the cached agent is
reused — preserving the frozen system prompt and tool schemas for
prompt cache hits.
``cache_keys`` is an optional flat dict of additional config values
that should invalidate the cache when they change. Callers pass
the output of ``_extract_cache_busting_config(user_config)`` so
edits to model.context_length / compression.* in config.yaml are
picked up on the next gateway message without a manual restart.
"""
import hashlib, json as _j
_api_key = str(runtime.get("api_key", "") or "")
_api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""
_cache_keys_sorted = sorted((cache_keys or {}).items())
blob = _j.dumps(
[
model,
_api_key_fingerprint,
runtime.get("base_url", ""),
runtime.get("provider", ""),
runtime.get("api_mode", ""),
sorted(enabled_toolsets) if enabled_toolsets else [],
ephemeral_prompt or "",
_cache_keys_sorted,
],
sort_keys=True,
default=str,
)
return hashlib.sha256(blob.encode()).hexdigest()[:16]
def _apply_session_model_override(
self, session_key: str, model: str, runtime_kwargs: dict
) -> tuple:
"""Apply /model session overrides if present, returning (model, runtime_kwargs).
The gateway /model command stores per-session overrides in
``_session_model_overrides``. These must take precedence over
config.yaml defaults so the switched model is actually used for
subsequent messages. Fields with ``None`` values are skipped so
partial overrides don't clobber valid config defaults.
"""
override = self._session_model_overrides.get(session_key)
if not override:
return model, runtime_kwargs
model = override.get("model", model)
for key in ("provider", "api_key", "base_url", "api_mode"):
val = override.get(key)
if val is not None:
runtime_kwargs[key] = val
return model, runtime_kwargs
def _is_intentional_model_switch(self, session_key: str, agent_model: str) -> bool:
"""Return True if *agent_model* matches an active /model session override."""
override = self._session_model_overrides.get(session_key)
return override is not None and override.get("model") == agent_model
def _release_running_agent_state(
self,
session_key: str,
*,
run_generation: Optional[int] = None,
) -> bool:
"""Pop ALL per-running-agent state entries for ``session_key``.
Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
across the gateway. Those sites had drifted: some popped only
``_running_agents``; some also ``_running_agents_ts``; only one
path also cleared ``_busy_ack_ts``. Each missed entry was a
small, persistent leak — a (str_key → float) tuple per session
per gateway lifetime.
Use this at every site that ends a running turn, regardless of
cause (normal completion, /stop, /reset, /resume, sentinel
cleanup, stale-eviction). Per-session state that PERSISTS
across turns (``_session_model_overrides``, ``_voice_mode``,
``_pending_approvals``, ``_update_prompt_pending``) is NOT
touched here — those have their own lifecycles.
When ``run_generation`` is provided, only clear the slot if that
generation is still current for the session. This prevents an
older async run whose generation was bumped by /stop or /new from
clobbering a newer run's state during its own unwind. Returns
True when the slot was cleared, False when an ownership guard
blocked it.
"""
if not session_key:
return False
if run_generation is not None and not self._is_session_run_current(
session_key, run_generation
):
return False
self._running_agents.pop(session_key, None)
self._running_agents_ts.pop(session_key, None)
if hasattr(self, "_busy_ack_ts"):
self._busy_ack_ts.pop(session_key, None)
return True
def _clear_session_boundary_security_state(self, session_key: str) -> None:
"""Clear per-session control state that must not survive a boundary switch."""
if not session_key:
return
pending_skills_reload_notes = getattr(
self, "_pending_skills_reload_notes", None
)
if isinstance(pending_skills_reload_notes, dict):
pending_skills_reload_notes.pop(session_key, None)
pending_approvals = getattr(self, "_pending_approvals", None)
if isinstance(pending_approvals, dict):
pending_approvals.pop(session_key, None)
update_prompt_pending = getattr(self, "_update_prompt_pending", None)
if isinstance(update_prompt_pending, dict):
update_prompt_pending.pop(session_key, None)
try:
from tools import slash_confirm as _slash_confirm_mod
except Exception:
_slash_confirm_mod = None
if _slash_confirm_mod is not None:
try:
_slash_confirm_mod.clear(session_key)
except Exception as e:
logger.debug(
"Failed to clear slash-confirm state for session boundary %s: %s",
session_key,
e,
)
try:
from tools.approval import clear_session as _clear_approval_session
except Exception:
return
try:
_clear_approval_session(session_key)
except Exception as e:
logger.debug(
"Failed to clear approval state for session boundary %s: %s",
session_key,
e,
)
def _begin_session_run_generation(self, session_key: str) -> int:
"""Claim a fresh run generation token for ``session_key``.
Every top-level gateway turn gets a monotonically increasing token.
If a later command like /stop or /new invalidates that token while the
old worker is still unwinding, the late result can be recognized and
dropped instead of bleeding into the fresh session.
"""
if not session_key:
return 0
generations = self.__dict__.get("_session_run_generation")
if generations is None:
generations = {}
self._session_run_generation = generations
next_generation = int(generations.get(session_key, 0)) + 1
generations[session_key] = next_generation
return next_generation
def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int:
"""Invalidate any in-flight run token for ``session_key``."""
generation = self._begin_session_run_generation(session_key)
if reason:
logger.info(
"Invalidated run generation for %s → %d (%s)",
session_key,
generation,
reason,
)
return generation
def _is_session_run_current(self, session_key: str, generation: int) -> bool:
"""Return True when ``generation`` is still current for ``session_key``."""
if not session_key:
return True
generations = self.__dict__.get("_session_run_generation") or {}
return int(generations.get(session_key, 0)) == int(generation)
def _bind_adapter_run_generation(
self,
adapter: Any,
session_key: str,
generation: int | None,
) -> None:
"""Bind a gateway run generation to the adapter's active-session event."""
if not adapter or not session_key or generation is None:
return
try:
interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key)
if interrupt_event is not None:
setattr(interrupt_event, "_hermes_run_generation", int(generation))
except Exception:
pass
async def _interrupt_and_clear_session(
self,
session_key: str,
source: SessionSource,
*,
interrupt_reason: str,
invalidation_reason: str,
release_running_state: bool = True,
) -> None:
"""Interrupt the current run and clear queued session state consistently."""
if not session_key:
return
running_agent = self._running_agents.get(session_key)
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
running_agent.interrupt(interrupt_reason)
self._invalidate_session_run_generation(session_key, reason=invalidation_reason)
adapter = self.adapters.get(source.platform)
if adapter and hasattr(adapter, "interrupt_session_activity"):
await adapter.interrupt_session_activity(session_key, source.chat_id)
if adapter and hasattr(adapter, "get_pending_message"):
adapter.get_pending_message(session_key)
self._pending_messages.pop(session_key, None)
if release_running_state:
self._release_running_agent_state(session_key)
def _evict_cached_agent(self, session_key: str) -> None:
"""Remove a cached agent for a session (called on /new, /model, etc)."""
_lock = getattr(self, "_agent_cache_lock", None)
if _lock:
with _lock:
self._agent_cache.pop(session_key, None)
@staticmethod
def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None:
"""Reset per-turn state on a cached agent before a new turn starts.
Both _last_activity_ts and _last_activity_desc are only reset for
fresh external turns (depth 0); they are semantically paired —
desc describes the activity *at* ts, so updating one without the
other would make get_activity_summary() misleading.
For interrupt-recursive turns both are preserved so the inactivity
watchdog can accumulate stuck-turn idle time and fire the 30-min
timeout (#15654). The depth-0 reset is still needed: a session
idle for 29 min would otherwise trip the watchdog before the new
turn makes its first API call (#9051).
"""
if interrupt_depth == 0:
agent._last_activity_ts = time.time()
agent._last_activity_desc = "starting new turn (cached)"
agent._api_call_count = 0
def _release_evicted_agent_soft(self, agent: Any) -> None:
"""Soft cleanup for cache-evicted agents — preserves session tool state.
Called from _enforce_agent_cache_cap and _sweep_idle_cached_agents.
Distinct from _cleanup_agent_resources (full teardown) because a
cache-evicted session may resume at any time — its terminal
sandbox, browser daemon, and tracked bg processes must outlive
the Python AIAgent instance so the next agent built for the
same task_id inherits them.
"""
if agent is None:
return
try:
if hasattr(agent, "release_clients"):
agent.release_clients()
else:
self._cleanup_agent_resources(agent)
except Exception:
pass
def _enforce_agent_cache_cap(self) -> None:
"""Evict oldest cached agents when cache exceeds _AGENT_CACHE_MAX_SIZE.
Must be called with _agent_cache_lock held. Resource cleanup
(memory provider shutdown, tool resource close) is scheduled
on a daemon thread so the caller doesn't block on slow teardown
while holding the cache lock.
Agents currently in _running_agents are SKIPPED — their clients,
terminal sandboxes, background processes, and child subagents
are all in active use by the running turn. Evicting them would
tear down those resources mid-turn and crash the request. If
every candidate in the LRU order is active, we simply leave the
cache over the cap; it will be re-checked on the next insert.
"""
_cache = getattr(self, "_agent_cache", None)
if _cache is None:
return
if not hasattr(_cache, "move_to_end"):
return
running_ids = {
id(a)
for a in getattr(self, "_running_agents", {}).values()
if a is not None and a is not _AGENT_PENDING_SENTINEL
}
excess = max(0, len(_cache) - _AGENT_CACHE_MAX_SIZE)
evict_plan: List[tuple] = []
if excess > 0:
ordered_keys = list(_cache.keys())
for key in ordered_keys[:excess]:
entry = _cache.get(key)
agent = entry[0] if isinstance(entry, tuple) and entry else None
if agent is not None and id(agent) in running_ids:
continue
evict_plan.append((key, agent))
for key, _ in evict_plan:
_cache.pop(key, None)
remaining_over_cap = len(_cache) - _AGENT_CACHE_MAX_SIZE
if remaining_over_cap > 0:
logger.warning(
"Agent cache over cap (%d > %d); %d excess slot(s) held by "
"mid-turn agents — will re-check on next insert.",
len(_cache), _AGENT_CACHE_MAX_SIZE, remaining_over_cap,
)
for key, agent in evict_plan:
logger.info(
"Agent cache at cap; evicting LRU session=%s (cache_size=%d)",
key, len(_cache),
)
if agent is not None:
threading.Thread(
target=self._release_evicted_agent_soft,
args=(agent,),
daemon=True,
name=f"agent-cache-evict-{key[:24]}",
).start()
def _sweep_idle_cached_agents(self) -> int:
"""Evict cached agents whose AIAgent has been idle > _AGENT_CACHE_IDLE_TTL_SECS.
Safe to call from the session expiry watcher without holding the
cache lock — acquires it internally. Returns the number of entries
evicted. Resource cleanup is scheduled on daemon threads.
Agents currently in _running_agents are SKIPPED for the same reason
as _enforce_agent_cache_cap: tearing down an active turn's clients
mid-flight would crash the request.
"""
_cache = getattr(self, "_agent_cache", None)
_lock = getattr(self, "_agent_cache_lock", None)
if _cache is None or _lock is None:
return 0
now = time.time()
to_evict: List[tuple] = []
running_ids = {
id(a)
for a in getattr(self, "_running_agents", {}).values()
if a is not None and a is not _AGENT_PENDING_SENTINEL
}
with _lock:
for key, entry in list(_cache.items()):
agent = entry[0] if isinstance(entry, tuple) and entry else None
if agent is None:
continue
if id(agent) in running_ids:
continue
last_activity = getattr(agent, "_last_activity_ts", None)
if last_activity is None:
continue
if (now - last_activity) > _AGENT_CACHE_IDLE_TTL_SECS:
to_evict.append((key, agent))
for key, _ in to_evict:
_cache.pop(key, None)
for key, agent in to_evict:
logger.info(
"Agent cache idle-TTL evict: session=%s (idle=%.0fs)",
key, now - getattr(agent, "_last_activity_ts", now),
)
threading.Thread(
target=self._release_evicted_agent_soft,
args=(agent,),
daemon=True,
name=f"agent-cache-idle-{key[:24]}",
).start()
return len(to_evict)
def _get_proxy_url(self) -> Optional[str]:
"""Return the proxy URL if proxy mode is configured, else None.
Checks GATEWAY_PROXY_URL env var first (convenient for Docker),
then ``gateway.proxy_url`` in config.yaml.
"""
url = os.getenv("GATEWAY_PROXY_URL", "").strip()
if url:
return url.rstrip("/")
cfg = _load_gateway_config()
url = (cfg.get("gateway") or {}).get("proxy_url", "").strip()
if url:
return url.rstrip("/")
return None
async def _run_agent_via_proxy(
self,
message: str,
context_prompt: str,
history: List[Dict[str, Any]],
source: "SessionSource",
session_id: str,
session_key: str = None,
run_generation: Optional[int] = None,
event_message_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Forward the message to a remote Hermes API server instead of
running a local AIAgent.
When ``GATEWAY_PROXY_URL`` (or ``gateway.proxy_url`` in config.yaml)
is set, the gateway becomes a thin relay: it handles platform I/O
(encryption, threading, media) and delegates all agent work to the
remote server via ``POST /v1/chat/completions`` with SSE streaming.
This lets a Docker container handle Matrix E2EE while the actual
agent runs on the host with full access to local files, memory,
skills, and a unified session store.
"""
try:
from aiohttp import ClientSession as _AioClientSession, ClientTimeout
except ImportError:
return {
"final_response": "⚠️ Proxy mode requires aiohttp. Install with: pip install aiohttp",
"messages": [],
"api_calls": 0,
"tools": [],
}
proxy_url = self._get_proxy_url()
if not proxy_url:
return {
"final_response": "⚠️ Proxy URL not configured (GATEWAY_PROXY_URL or gateway.proxy_url)",
"messages": [],
"api_calls": 0,
"tools": [],
}
proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip()
def _run_still_current() -> bool:
if run_generation is None or not session_key:
return True
return self._is_session_run_current(session_key, run_generation)
api_messages: List[Dict[str, str]] = []
if context_prompt:
api_messages.append({"role": "system", "content": context_prompt})
for msg in history:
role = msg.get("role")
content = msg.get("content")
if role in {"user", "assistant"} and content:
api_messages.append({"role": role, "content": content})
api_messages.append({"role": "user", "content": message})
headers: Dict[str, str] = {"Content-Type": "application/json"}
if proxy_key:
headers["Authorization"] = f"Bearer {proxy_key}"
if session_id:
headers["X-Hermes-Session-Id"] = session_id
body = {
"model": "hermes-agent",
"messages": api_messages,
"stream": True,
}
_stream_consumer = None
_scfg = getattr(getattr(self, "config", None), "streaming", None)
if _scfg is None:
from gateway.config import StreamingConfig
_scfg = StreamingConfig()
platform_key = _platform_config_key(source.platform)
user_config = _load_gateway_config()
from gateway.display_config import resolve_display_setting
_plat_streaming = resolve_display_setting(
user_config, platform_key, "streaming"
)
_streaming_enabled = (
_scfg.enabled and _scfg.transport != "off"
if _plat_streaming is None
else bool(_plat_streaming)
)
_thread_metadata: Optional[Dict[str, Any]] = self._thread_metadata_for_source(source, event_message_id)
if _streaming_enabled:
try:
from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
_adapter = self.adapters.get(source.platform)
if _adapter:
_adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
_effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
_buffer_only = False
if source.platform == Platform.MATRIX:
_effective_cursor = ""
_buffer_only = True
_fresh_final_secs = (
float(getattr(_scfg, "fresh_final_after_seconds", 0.0) or 0.0)
if source.platform == Platform.TELEGRAM
else 0.0
)
_consumer_cfg = StreamConsumerConfig(
edit_interval=_scfg.edit_interval,
buffer_threshold=_scfg.buffer_threshold,
cursor=_effective_cursor,
buffer_only=_buffer_only,
fresh_final_after_seconds=_fresh_final_secs,
transport=_scfg.transport or "edit",
chat_type=getattr(source, "chat_type", "") or "",
)
_stream_consumer = GatewayStreamConsumer(
adapter=_adapter,
chat_id=source.chat_id,
config=_consumer_cfg,
metadata=_thread_metadata,
initial_reply_to_id=event_message_id,
)
except Exception as _sc_err:
logger.debug("Proxy: could not set up stream consumer: %s", _sc_err)
stream_task = None
if _stream_consumer:
stream_task = asyncio.create_task(_stream_consumer.run())
_adapter = self.adapters.get(source.platform)
if _adapter:
try:
await _adapter.send_typing(source.chat_id, metadata=_thread_metadata)
except Exception:
pass
full_response = ""
_start = time.time()
try:
_timeout = ClientTimeout(total=0, sock_read=1800)
async with _AioClientSession(timeout=_timeout) as session:
async with session.post(
f"{proxy_url}/v1/chat/completions",
json=body,
headers=headers,
) as resp:
if resp.status != 200:
error_text = await resp.text()
logger.warning(
"Proxy error (%d) from %s: %s",
resp.status, proxy_url, error_text[:500],
)
return {
"final_response": f"⚠️ Proxy error ({resp.status}): {error_text[:300]}",
"messages": [],
"api_calls": 0,
"tools": [],
}
buffer = ""
async for chunk in resp.content.iter_any():
if not _run_still_current():
logger.info(
"Discarding stale proxy stream for %s — generation %d is no longer current",
session_key or "?",
run_generation or 0,
)
return {
"final_response": "",
"messages": [],
"api_calls": 0,
"tools": [],
"history_offset": len(history),
"session_id": session_id,
"response_previewed": False,
}
text = chunk.decode("utf-8", errors="replace")
buffer += text
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.strip()
if not line:
continue
if line.startswith("data: "):
data = line[6:]
if data.strip() == "[DONE]":
break
try:
obj = json.loads(data)
choices = obj.get("choices", [])
if choices:
delta = choices[0].get("delta", {})
content = delta.get("content", "")
if content:
full_response += content
if _stream_consumer:
_stream_consumer.on_delta(content)
except json.JSONDecodeError:
pass
except asyncio.CancelledError:
raise
except Exception as e:
logger.error("Proxy connection error to %s: %s", proxy_url, e)
if not full_response:
return {
"final_response": f"⚠️ Proxy connection error: {e}",
"messages": [],
"api_calls": 0,
"tools": [],
}
finally:
if _stream_consumer:
_stream_consumer.finish()
if stream_task:
try:
await asyncio.wait_for(stream_task, timeout=5.0)
except (asyncio.TimeoutError, asyncio.CancelledError):
stream_task.cancel()
_elapsed = time.time() - _start
if not _run_still_current():
logger.info(
"Discarding stale proxy result for %s — generation %d is no longer current",
session_key or "?",
run_generation or 0,
)
return {
"final_response": "",
"messages": [],
"api_calls": 0,
"tools": [],
"history_offset": len(history),
"session_id": session_id,
"response_previewed": False,
}
logger.info(
"proxy response: url=%s session=%s time=%.1fs response=%d chars",
proxy_url, (session_id or "")[:20], _elapsed, len(full_response),
)
return {
"final_response": full_response or "(No response from remote agent)",
"messages": [
{"role": "user", "content": message},
{"role": "assistant", "content": full_response},
],
"api_calls": 1,
"tools": [],
"history_offset": len(history),
"session_id": session_id,
"response_previewed": _stream_consumer is not None and bool(full_response),
}
async def _run_agent(
self,
message: str,
context_prompt: str,
history: List[Dict[str, Any]],
source: SessionSource,
session_id: str,
session_key: str = None,
run_generation: Optional[int] = None,
_interrupt_depth: int = 0,
event_message_id: Optional[str] = None,
channel_prompt: Optional[str] = None,
) -> Dict[str, Any]:
"""
Run the agent with the given message and context.
Returns the full result dict from run_conversation, including:
- "final_response": str (the text to send back)
- "messages": list (full conversation including tool calls)
- "api_calls": int
- "completed": bool
This is run in a thread pool to not block the event loop.
Supports interruption via new messages.
"""
if self._get_proxy_url():
return await self._run_agent_via_proxy(
message=message,
context_prompt=context_prompt,
history=history,
source=source,
session_id=session_id,
session_key=session_key,
run_generation=run_generation,
event_message_id=event_message_id,
)
from run_agent import AIAgent
import queue
def _run_still_current() -> bool:
if run_generation is None or not session_key:
return True
return self._is_session_run_current(session_key, run_generation)
user_config = _load_gateway_config()
platform_key = _platform_config_key(source.platform)
from hermes_cli.tools_config import _get_platform_tools
enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
agent_cfg_local = user_config.get("agent") or {}
disabled_toolsets = agent_cfg_local.get("disabled_toolsets") or None
display_config = user_config.get("display", {})
if not isinstance(display_config, dict):
display_config = {}
from gateway.display_config import resolve_display_setting
try:
from agent.display import set_tool_preview_max_len
_tpl = resolve_display_setting(user_config, platform_key, "tool_preview_length", 0)
set_tool_preview_max_len(int(_tpl) if _tpl else 0)
except Exception:
pass
_resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
_env_tp = os.getenv("HERMES_TOOL_PROGRESS_MODE")
_display_cfg = display_config if isinstance(display_config, dict) else {}
_platforms_cfg = _display_cfg.get("platforms") or {}
_platform_cfg = _platforms_cfg.get(platform_key) or {}
_legacy_tp_overrides = _display_cfg.get("tool_progress_overrides") or {}
_tool_progress_configured = (
"tool_progress" in _display_cfg
or (
isinstance(_platform_cfg, dict)
and "tool_progress" in _platform_cfg
)
or (
isinstance(_legacy_tp_overrides, dict)
and platform_key in _legacy_tp_overrides
)
)
progress_mode = (
_env_tp
if _env_tp and not _tool_progress_configured
else (_resolved_tp or _env_tp or "all")
)
from gateway.config import Platform
tool_progress_enabled = progress_mode != "off" and source.platform != Platform.WEBHOOK
interim_assistant_messages_enabled = (
source.platform != Platform.WEBHOOK
and is_truthy_value(
display_config.get("interim_assistant_messages"),
default=True,
)
)
progress_queue = queue.Queue() if tool_progress_enabled else None
last_tool = [None]
last_progress_msg = [None]
repeat_count = [0]
_cleanup_progress = bool(
resolve_display_setting(user_config, platform_key, "cleanup_progress")
)
_cleanup_adapter = self.adapters.get(source.platform) if _cleanup_progress else None
if _cleanup_adapter is not None and (
type(_cleanup_adapter).delete_message is BasePlatformAdapter.delete_message
):
_cleanup_progress = False
_cleanup_adapter = None
_cleanup_msg_ids: List[str] = []
long_tool_hint_fired = [False]
_LONG_TOOL_THRESHOLD_S = 30.0
def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
"""Callback invoked by agent on tool lifecycle events."""
if not progress_queue or not _run_still_current():
return
if event_type == "tool.completed" and not long_tool_hint_fired[0]:
try:
duration = kwargs.get("duration") or 0
if duration >= _LONG_TOOL_THRESHOLD_S and progress_mode == "all":
from agent.onboarding import (
TOOL_PROGRESS_FLAG,
is_seen,
mark_seen,
tool_progress_hint_gateway,
)
_cfg = _load_gateway_config()
gate_on = is_truthy_value(
cfg_get(_cfg, "display", "tool_progress_command"),
default=False,
)
if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG):
long_tool_hint_fired[0] = True
progress_queue.put(tool_progress_hint_gateway())
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
except Exception as _hint_err:
logger.debug("tool-progress onboarding hint failed: %s", _hint_err)
return
if event_type not in {"tool.started",}:
return
try:
_agent_for_interrupt = agent_holder[0] if agent_holder else None
if _agent_for_interrupt is not None and getattr(
_agent_for_interrupt, "is_interrupted", False
):
return
except Exception:
pass
if progress_mode == "new" and tool_name == last_tool[0]:
return
last_tool[0] = tool_name
from agent.display import get_tool_emoji
emoji = get_tool_emoji(tool_name, default="⚙️")
if progress_mode == "verbose":
if args:
from agent.display import get_tool_preview_max_len
_pl = get_tool_preview_max_len()
args_str = json.dumps(args, ensure_ascii=False, default=str)
if _pl > 0 and len(args_str) > _pl:
args_str = args_str[:_pl - 3] + "..."
msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
elif preview:
msg = f"{emoji} {tool_name}: \"{preview}\""
else:
msg = f"{emoji} {tool_name}..."
progress_queue.put(msg)
return
if preview:
from agent.display import get_tool_preview_max_len
_pl = get_tool_preview_max_len()
_cap = _pl if _pl > 0 else 40
if len(preview) > _cap:
preview = preview[:_cap - 3] + "..."
msg = f"{emoji} {tool_name}: \"{preview}\""
else:
msg = f"{emoji} {tool_name}..."
if msg == last_progress_msg[0]:
repeat_count[0] += 1
progress_queue.put(("__dedup__", msg, repeat_count[0]))
return
last_progress_msg[0] = msg
repeat_count[0] = 0
progress_queue.put(msg)
if source.platform == Platform.SLACK:
_progress_thread_id = source.thread_id or event_message_id
else:
_progress_thread_id = source.thread_id
_progress_metadata = (
self._thread_metadata_for_source(source, event_message_id)
if _progress_thread_id == source.thread_id
else {"thread_id": _progress_thread_id}
) if _progress_thread_id else None
_progress_reply_to = (
event_message_id
if source.platform in (Platform.FEISHU, Platform.MATTERMOST) and source.thread_id and event_message_id
else None
)
async def send_progress_messages():
if not progress_queue:
return
adapter = self.adapters.get(source.platform)
if not adapter:
return
if type(adapter).edit_message is BasePlatformAdapter.edit_message:
while not progress_queue.empty():
try:
progress_queue.get_nowait()
except Exception:
break
return
progress_lines = []
progress_msg_id = None
can_edit = True
_last_edit_ts = 0.0
_PROGRESS_EDIT_INTERVAL = 1.5
_progress_len_fn = (
adapter.message_len_fn
if isinstance(adapter, BasePlatformAdapter)
else len
)
try:
_raw_progress_limit = int(getattr(adapter, "MAX_MESSAGE_LENGTH", 4000) or 4000)
except Exception:
_raw_progress_limit = 4000
_PROGRESS_TEXT_LIMIT = max(
1,
_raw_progress_limit - (64 if _raw_progress_limit > 128 else 0),
)
_edit_accepts_metadata = False
if _progress_metadata:
try:
_edit_params = inspect.signature(adapter.edit_message).parameters
_edit_accepts_metadata = (
"metadata" in _edit_params
or any(
param.kind is inspect.Parameter.VAR_KEYWORD
for param in _edit_params.values()
)
)
except (TypeError, ValueError):
_edit_accepts_metadata = False
async def _edit_progress_message(message_id: str, content: str):
kwargs = {
"chat_id": source.chat_id,
"message_id": message_id,
"content": content,
}
if _edit_accepts_metadata:
kwargs["metadata"] = _progress_metadata
return await adapter.edit_message(**kwargs)
def _progress_text(lines: list) -> str:
return "\n".join(str(line) for line in lines)
def _split_progress_groups(lines: list) -> list[list]:
"""Partition progress lines into platform-sized editable bubbles."""
groups: list[list] = []
current: list = []
for line in lines:
candidate = current + [line]
if current and _progress_len_fn(_progress_text(candidate)) > _PROGRESS_TEXT_LIMIT:
groups.append(current)
current = [line]
else:
current = candidate
if current:
groups.append(current)
return groups
def _track_progress_result(result) -> None:
if (
_cleanup_progress
and getattr(result, "success", False)
and getattr(result, "message_id", None)
):
_cleanup_msg_ids.append(str(result.message_id))
async def _send_progress_text(text: str):
result = await adapter.send(
chat_id=source.chat_id,
content=text,
reply_to=_progress_reply_to,
metadata=_progress_metadata,
)
_track_progress_result(result)
return result
async def _roll_progress_overflow_if_needed() -> bool:
"""Start fresh editable progress bubbles before a bubble exceeds limit.
Returns True when it delivered/split the current buffer and the
caller should skip the normal send/edit path for this tick.
"""
nonlocal progress_msg_id, progress_lines, can_edit
if not progress_lines or not can_edit:
return False
groups = _split_progress_groups(progress_lines)
if len(groups) <= 1:
return False
first_text = _progress_text(groups[0])
if progress_msg_id is not None:
result = await _edit_progress_message(progress_msg_id, first_text)
if not result.success:
can_edit = False
return False
else:
result = await _send_progress_text(first_text)
if result.success and result.message_id:
progress_msg_id = result.message_id
for group in groups[1:]:
result = await _send_progress_text(_progress_text(group))
if result.success and result.message_id:
progress_msg_id = result.message_id
progress_lines = groups[-1]
return True
while True:
try:
if not _run_still_current():
while not progress_queue.empty():
try:
progress_queue.get_nowait()
except Exception:
break
return
raw = progress_queue.get_nowait()
try:
_agent_for_interrupt = agent_holder[0] if agent_holder else None
if _agent_for_interrupt is not None and getattr(
_agent_for_interrupt, "is_interrupted", False
):
await asyncio.sleep(0)
continue
except Exception:
pass
if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
_, base_msg, count = raw
if progress_lines:
progress_lines[-1] = f"{base_msg} (×{count + 1})"
msg = progress_lines[-1] if progress_lines else base_msg
elif isinstance(raw, tuple) and len(raw) >= 1 and raw[0] == "__reset__":
progress_msg_id = None
progress_lines = []
last_progress_msg[0] = None
repeat_count[0] = 0
continue
else:
msg = raw
progress_lines.append(msg)
if await _roll_progress_overflow_if_needed():
_last_edit_ts = time.monotonic()
await asyncio.sleep(0.3)
if _run_still_current():
await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
continue
_now = time.monotonic()
_remaining = _PROGRESS_EDIT_INTERVAL - (_now - _last_edit_ts)
if _remaining > 0:
await asyncio.sleep(_remaining)
continue
if not _run_still_current():
return
if can_edit and progress_msg_id is not None:
full_text = "\n".join(progress_lines)
result = await _edit_progress_message(progress_msg_id, full_text)
if not result.success:
_err = (getattr(result, "error", "") or "").lower()
if getattr(result, "retryable", False):
logger.debug(
"[%s] Transient edit failure — keeping can_edit=True",
adapter.name,
)
continue
if "flood" in _err or "retry after" in _err:
logger.info(
"[%s] Progress edit flood control, backing off",
adapter.name,
)
_last_edit_ts = time.monotonic()
else:
can_edit = False
_flood_result = await adapter.send(
chat_id=source.chat_id,
content=msg,
reply_to=_progress_reply_to,
metadata=_progress_metadata,
)
if (
_cleanup_progress
and getattr(_flood_result, "success", False)
and getattr(_flood_result, "message_id", None)
):
_cleanup_msg_ids.append(str(_flood_result.message_id))
else:
if can_edit:
full_text = "\n".join(progress_lines)
result = await adapter.send(
chat_id=source.chat_id,
content=full_text,
reply_to=_progress_reply_to,
metadata=_progress_metadata,
)
else:
result = await adapter.send(
chat_id=source.chat_id,
content=msg,
reply_to=_progress_reply_to,
metadata=_progress_metadata,
)
if result.success and result.message_id:
progress_msg_id = result.message_id
if _cleanup_progress:
_cleanup_msg_ids.append(str(result.message_id))
_last_edit_ts = time.monotonic()
await asyncio.sleep(0.3)
if _run_still_current():
await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
except queue.Empty:
await asyncio.sleep(0.3)
except asyncio.CancelledError:
while not progress_queue.empty():
try:
raw = progress_queue.get_nowait()
if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
_, base_msg, count = raw
if progress_lines:
progress_lines[-1] = f"{base_msg} (×{count + 1})"
await _roll_progress_overflow_if_needed()
elif isinstance(raw, tuple) and len(raw) >= 1 and raw[0] == "__reset__":
await _roll_progress_overflow_if_needed()
if can_edit and progress_lines and progress_msg_id:
_pending_text = _progress_text(progress_lines)
try:
await _edit_progress_message(progress_msg_id, _pending_text)
except Exception:
pass
progress_msg_id = None
progress_lines = []
last_progress_msg[0] = None
repeat_count[0] = 0
else:
progress_lines.append(raw)
await _roll_progress_overflow_if_needed()
except Exception:
break
if can_edit and progress_lines and progress_msg_id:
await _roll_progress_overflow_if_needed()
if can_edit and progress_lines and progress_msg_id:
full_text = _progress_text(progress_lines)
try:
await _edit_progress_message(progress_msg_id, full_text)
except Exception:
pass
return
except Exception as e:
logger.error("Progress message error: %s", e)
await asyncio.sleep(1)
agent_holder = [None]
result_holder = [None]
tools_holder = [None]
stream_consumer_holder = [None]
_loop_for_step = asyncio.get_running_loop()
_hooks_ref = self.hooks
def _step_callback_sync(iteration: int, prev_tools: list) -> None:
if not _run_still_current():
return
_names: list[str] = []
for _t in (prev_tools or []):
if isinstance(_t, dict):
_names.append(_t.get("name") or "")
else:
_names.append(str(_t))
safe_schedule_threadsafe(
_hooks_ref.emit("agent:step", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_id": session_id,
"iteration": iteration,
"tool_names": _names,
"tools": prev_tools,
}),
_loop_for_step,
logger=logger,
log_message="agent:step hook scheduling error",
)
_status_adapter = self.adapters.get(source.platform)
_status_chat_id = source.chat_id
if source.platform == Platform.FEISHU and source.thread_id and event_message_id:
_status_thread_metadata: Optional[Dict[str, Any]] = {
"thread_id": _progress_thread_id,
"reply_to_message_id": event_message_id,
}
else:
_status_thread_metadata = self._thread_metadata_for_source(source, event_message_id) if _progress_thread_id else None
def _status_callback_sync(event_type: str, message: str) -> None:
if not _status_adapter or not _run_still_current():
return
prepared_message = _prepare_gateway_status_message(
source.platform,
event_type,
message,
)
if prepared_message is None:
logger.debug(
"status_callback suppressed for %s/%s: %s",
source.platform.value if source.platform else "unknown",
event_type,
_redact_gateway_user_facing_secrets(str(message or ""))[:160],
)
return
_fut = safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
prepared_message,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message=f"status_callback ({event_type}) scheduling error",
)
if _fut is None:
return
if _cleanup_progress:
def _track_status_id(fut) -> None:
try:
res = fut.result()
except Exception:
return
mid = getattr(res, "message_id", None)
if getattr(res, "success", False) and mid:
_cleanup_msg_ids.append(str(mid))
_fut.add_done_callback(_track_status_id)
def run_sync():
nonlocal message
os.environ["HERMES_SESSION_KEY"] = session_key or ""
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
combined_ephemeral = context_prompt or ""
event_channel_prompt = (channel_prompt or "").strip()
if event_channel_prompt:
combined_ephemeral = (combined_ephemeral + "\n\n" + event_channel_prompt).strip()
if self._ephemeral_system_prompt:
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
_reload_runtime_env_preserving_config_authority()
try:
model, runtime_kwargs = self._resolve_session_agent_runtime(
source=source,
session_key=session_key,
user_config=user_config,
)
logger.debug(
"run_agent resolved: model=%s provider=%s session=%s",
model, runtime_kwargs.get("provider"), session_key or "",
)
except Exception as exc:
return {
"final_response": f"⚠️ Provider authentication failed: {exc}",
"messages": [],
"api_calls": 0,
"tools": [],
}
pr = self._provider_routing
reasoning_config = self._resolve_session_reasoning_config(
source=source,
session_key=session_key,
)
self._reasoning_config = reasoning_config
self._service_tier = self._load_service_tier()
_stream_consumer = None
_stream_delta_cb = None
_scfg = getattr(getattr(self, 'config', None), 'streaming', None)
if _scfg is None:
from gateway.config import StreamingConfig
_scfg = StreamingConfig()
_plat_streaming = resolve_display_setting(
user_config, platform_key, "streaming"
)
_streaming_enabled = (
_scfg.enabled and _scfg.transport != "off"
if _plat_streaming is None
else bool(_plat_streaming)
)
_want_stream_deltas = _streaming_enabled
_want_interim_messages = interim_assistant_messages_enabled
_want_interim_consumer = _want_interim_messages
if _want_stream_deltas or _want_interim_consumer:
try:
from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
_adapter = self.adapters.get(source.platform)
if _adapter:
_adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
if not _adapter_supports_edit:
raise RuntimeError("skip streaming for non-editable platform")
_effective_cursor = _scfg.cursor
_buffer_only = False
if source.platform == Platform.MATRIX:
_effective_cursor = ""
_buffer_only = True
_fresh_final_secs = (
float(getattr(_scfg, "fresh_final_after_seconds", 0.0) or 0.0)
if source.platform == Platform.TELEGRAM
else 0.0
)
_consumer_cfg = StreamConsumerConfig(
edit_interval=_scfg.edit_interval,
buffer_threshold=_scfg.buffer_threshold,
cursor=_effective_cursor,
buffer_only=_buffer_only,
fresh_final_after_seconds=_fresh_final_secs,
transport=_scfg.transport or "edit",
chat_type=getattr(source, "chat_type", "") or "",
)
_stream_consumer = GatewayStreamConsumer(
adapter=_adapter,
chat_id=source.chat_id,
config=_consumer_cfg,
metadata=_status_thread_metadata,
on_new_message=(
(lambda: progress_queue.put(("__reset__",)))
if progress_queue is not None
else None
),
initial_reply_to_id=event_message_id,
)
if _want_stream_deltas:
def _stream_delta_cb(text: str) -> None:
if _run_still_current():
_stream_consumer.on_delta(text)
stream_consumer_holder[0] = _stream_consumer
except Exception as _sc_err:
logger.debug("Could not set up stream consumer: %s", _sc_err)
def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None:
if not _run_still_current():
return
if _stream_consumer is not None:
if already_streamed:
_stream_consumer.on_segment_break()
else:
_stream_consumer.on_commentary(text)
return
if already_streamed or not _status_adapter or not str(text or "").strip():
return
safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
text,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="interim_assistant_callback scheduling error",
)
turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
_sig = self._agent_config_signature(
turn_route["model"],
turn_route["runtime"],
enabled_toolsets,
combined_ephemeral,
cache_keys=self._extract_cache_busting_config(user_config),
)
agent = None
_cache_lock = getattr(self, "_agent_cache_lock", None)
_cache = getattr(self, "_agent_cache", None)
if _cache_lock and _cache is not None:
with _cache_lock:
cached = _cache.get(session_key)
if cached and cached[1] == _sig:
agent = cached[0]
if hasattr(_cache, "move_to_end"):
try:
_cache.move_to_end(session_key)
except KeyError:
pass
self._init_cached_agent_for_turn(agent, _interrupt_depth)
logger.debug("Reusing cached agent for session %s", session_key)
if agent is None:
agent = AIAgent(
model=turn_route["model"],
**turn_route["runtime"],
max_iterations=max_iterations,
quiet_mode=True,
verbose_logging=False,
enabled_toolsets=enabled_toolsets,
disabled_toolsets=disabled_toolsets,
ephemeral_system_prompt=combined_ephemeral or None,
prefill_messages=self._prefill_messages or None,
reasoning_config=reasoning_config,
service_tier=self._service_tier,
request_overrides=turn_route.get("request_overrides"),
providers_allowed=pr.get("only"),
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
provider_require_parameters=pr.get("require_parameters", False),
provider_data_collection=pr.get("data_collection"),
session_id=session_id,
platform=platform_key,
user_id=source.user_id,
user_name=source.user_name,
chat_id=source.chat_id,
chat_name=source.chat_name,
chat_type=source.chat_type,
thread_id=source.thread_id,
gateway_session_key=session_key,
session_db=self._session_db,
fallback_model=self._fallback_model,
)
if _cache_lock and _cache is not None:
with _cache_lock:
_cache[session_key] = (agent, _sig)
self._enforce_agent_cache_cap()
logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig)
agent.tool_progress_callback = progress_callback if tool_progress_enabled else None
agent.step_callback = _step_callback_sync if _hooks_ref.loaded_hooks else None
agent.stream_delta_callback = _stream_delta_cb
agent.interim_assistant_callback = _interim_assistant_cb if _want_interim_messages else None
agent.status_callback = _status_callback_sync
agent.reasoning_config = reasoning_config
agent.service_tier = self._service_tier
agent.request_overrides = turn_route.get("request_overrides") or {}
_bg_review_release = threading.Event()
_bg_review_pending: list[str] = []
_bg_review_pending_lock = threading.Lock()
def _deliver_bg_review_message(message: str) -> None:
if not _status_adapter or not _run_still_current():
return
safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
message,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="background_review_callback scheduling error",
)
def _release_bg_review_messages() -> None:
_bg_review_release.set()
with _bg_review_pending_lock:
pending = list(_bg_review_pending)
_bg_review_pending.clear()
for queued in pending:
_deliver_bg_review_message(queued)
def _bg_review_send(message: str) -> None:
if not _status_adapter or not _run_still_current():
return
if not _bg_review_release.is_set():
with _bg_review_pending_lock:
if not _bg_review_release.is_set():
_bg_review_pending.append(message)
return
_deliver_bg_review_message(message)
agent.background_review_callback = _bg_review_send
if _status_adapter and session_key:
if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None:
_status_adapter.register_post_delivery_callback(
session_key,
_release_bg_review_messages,
generation=run_generation,
)
else:
_pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
if _pdc is not None:
_pdc[session_key] = _release_bg_review_messages
def _clarify_callback_sync(question: str, choices) -> str:
from tools import clarify_gateway as _clarify_mod
import uuid as _uuid
if not _status_adapter:
return ""
clarify_id = _uuid.uuid4().hex[:10]
_clarify_mod.register(
clarify_id=clarify_id,
session_key=session_key or "",
question=question,
choices=list(choices) if choices else None,
)
try:
_status_adapter.pause_typing_for_chat(_status_chat_id)
except Exception:
pass
send_ok = False
fut = safe_schedule_threadsafe(
_status_adapter.send_clarify(
chat_id=_status_chat_id,
question=question,
choices=list(choices) if choices else None,
clarify_id=clarify_id,
session_key=session_key or "",
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="Clarify send failed to schedule",
)
if fut is None:
send_ok = False
else:
try:
result = fut.result(timeout=15)
send_ok = bool(getattr(result, "success", False))
except Exception as exc:
logger.warning("Clarify send failed: %s", exc)
send_ok = False
if not send_ok:
_clarify_mod.clear_session(session_key or "")
return "[clarify prompt could not be delivered]"
timeout = _clarify_mod.get_clarify_timeout()
response = _clarify_mod.wait_for_response(clarify_id, timeout=float(timeout))
if response is None or response == "":
return f"[user did not respond within {int(timeout / 60)}m]"
return response
agent.clarify_callback = _clarify_callback_sync
agent_holder[0] = agent
tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
agent_history = []
for msg in history:
role = msg.get("role")
if not role:
continue
if role in {"session_meta",}:
continue
if role == "system":
continue
has_tool_calls = "tool_calls" in msg
has_tool_call_id = "tool_call_id" in msg
is_tool_message = role == "tool"
if has_tool_calls or has_tool_call_id or is_tool_message:
clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
agent_history.append(clean_msg)
else:
content = msg.get("content")
if content:
if msg.get("mirror"):
mirror_src = msg.get("mirror_source", "another session")
content = f"[Delivered from {mirror_src}] {content}"
entry = _build_replay_entry(role, content, msg)
agent_history.append(entry)
_history_media_paths: set = set()
for _hm in agent_history:
if _hm.get("role") in {"tool", "function"}:
_hc = _hm.get("content", "")
if "MEDIA:" in _hc:
_TOOL_MEDIA_RE = re.compile(
r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
r'txt|csv|apk|ipa))',
re.IGNORECASE
)
for _match in _TOOL_MEDIA_RE.finditer(_hc):
_p = _match.group(1).strip().rstrip('",}')
if _p:
_history_media_paths.add(_p)
from tools.approval import (
register_gateway_notify,
reset_current_session_key,
set_current_session_key,
unregister_gateway_notify,
)
def _approval_notify_sync(approval_data: dict) -> None:
"""Send the approval request to the user from the agent thread.
If the adapter supports interactive button-based approvals
(e.g. Discord's ``send_exec_approval``), use that for a richer
UX. Otherwise fall back to a plain text message with
``/approve`` instructions.
"""
_status_adapter.pause_typing_for_chat(_status_chat_id)
cmd = approval_data.get("command", "")
desc = approval_data.get("description", "dangerous command")
if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
try:
_approval_fut = safe_schedule_threadsafe(
_status_adapter.send_exec_approval(
chat_id=_status_chat_id,
command=cmd,
session_key=_approval_session_key,
description=desc,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="send_exec_approval scheduling error",
)
if _approval_fut is None:
raise RuntimeError("send_exec_approval: loop unavailable")
_approval_result = _approval_fut.result(timeout=15)
if _approval_result.success:
return
logger.warning(
"Button-based approval failed (send returned error), falling back to text: %s",
_approval_result.error,
)
except Exception as _e:
logger.warning(
"Button-based approval failed, falling back to text: %s", _e
)
cmd_preview = cmd[:200] + "..." if len(cmd) > 200 else cmd
msg = (
f"⚠️ **Dangerous command requires approval:**\n"
f"```\n{cmd_preview}\n```\n"
f"Reason: {desc}\n\n"
f"Reply `/approve` to execute, `/approve session` to approve this pattern "
f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
)
try:
_approval_send_fut = safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
msg,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="Approval text-send scheduling error",
)
if _approval_send_fut is not None:
_approval_send_fut.result(timeout=15)
except Exception as _e:
logger.error("Failed to send approval request: %s", _e)
_pending_notes = getattr(self, '_pending_model_notes', {})
_msn = _pending_notes.pop(session_key, None) if session_key else None
if _msn:
message = _msn + "\n\n" + message
_freshness_window = _auto_continue_freshness_window()
_interruption_is_fresh = _is_fresh_gateway_interruption(
_last_transcript_timestamp(history),
window_secs=_freshness_window,
)
_resume_entry = None
if session_key:
try:
_resume_entry = self.session_store._entries.get(session_key)
except Exception:
_resume_entry = None
_is_resume_pending = bool(
_resume_entry is not None
and getattr(_resume_entry, "resume_pending", False)
and _interruption_is_fresh
)
_has_fresh_tool_tail = bool(
agent_history
and agent_history[-1].get("role") == "tool"
and _interruption_is_fresh
)
if _is_resume_pending:
_reason = getattr(_resume_entry, "resume_reason", None) or "restart_timeout"
_reason_phrase = (
"a gateway restart"
if _reason == "restart_timeout"
else "a gateway shutdown"
if _reason == "shutdown_timeout"
else "a gateway interruption"
)
message = (
f"[System note: Your previous turn in this session was interrupted "
f"by {_reason_phrase}. The conversation history below is intact. "
f"If it contains unfinished tool result(s), process them first and "
f"summarize what was accomplished, then address the user's new "
f"message below.]\n\n"
+ message
)
elif _has_fresh_tool_tail:
message = (
"[System note: Your previous turn was interrupted before you could "
"process the last tool result(s). The conversation history contains "
"tool outputs you haven't responded to yet. Please finish processing "
"those results and summarize what was accomplished, then address the "
"user's new message below.]\n\n"
+ message
)
_pending_notes = getattr(self, "_pending_skills_reload_notes", None)
if _pending_notes and session_key and session_key in _pending_notes:
_srn = _pending_notes.pop(session_key, None)
if _srn:
message = _srn + "\n\n" + message
_approval_session_key = session_key or ""
_approval_session_token = set_current_session_key(_approval_session_key)
register_gateway_notify(_approval_session_key, _approval_notify_sync)
try:
_native_imgs = self._consume_pending_native_image_paths(session_key)
if _native_imgs:
try:
from agent.image_routing import build_native_content_parts
_parts, _skipped = build_native_content_parts(
message,
_native_imgs,
)
if _skipped:
logger.warning(
"Native image attachment: skipped %d unreadable path(s): %s",
len(_skipped), _skipped,
)
if any(p.get("type") == "image_url" for p in _parts):
_run_message: Any = _parts
else:
_run_message = message
except Exception as _img_exc:
logger.warning(
"Native image attachment failed, falling back to text: %s",
_img_exc,
)
_run_message = message
else:
_run_message = message
result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
finally:
unregister_gateway_notify(_approval_session_key)
try:
from tools.clarify_gateway import clear_session as _clear_clarify_session
_clear_clarify_session(_approval_session_key)
except Exception:
pass
reset_current_session_key(_approval_session_token)
result_holder[0] = result
if _stream_consumer is not None:
_stream_consumer.finish()
final_response = result.get("final_response")
_last_prompt_toks = 0
_input_toks = 0
_output_toks = 0
_context_length = 0
_agent = agent_holder[0]
if _agent and hasattr(_agent, "context_compressor"):
_last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
_input_toks = getattr(_agent, "session_prompt_tokens", 0)
_output_toks = getattr(_agent, "session_completion_tokens", 0)
_context_length = getattr(_agent.context_compressor, "context_length", 0) or 0
_resolved_model = getattr(_agent, "model", None) if _agent else None
if not final_response:
error_msg = f"⚠️ {result['error']}" if result.get("error") else ""
return {
"final_response": error_msg,
"messages": result.get("messages", []),
"api_calls": result.get("api_calls", 0),
"failed": result.get("failed", False),
"partial": result.get("partial", False),
"completed": result.get("completed"),
"interrupted": result.get("interrupted", False),
"interrupt_message": result.get("interrupt_message"),
"error": result.get("error"),
"compression_exhausted": result.get("compression_exhausted", False),
"tools": tools_holder[0] or [],
"history_offset": len(agent_history),
"last_prompt_tokens": _last_prompt_toks,
"input_tokens": _input_toks,
"output_tokens": _output_toks,
"model": _resolved_model,
"context_length": _context_length,
}
if "MEDIA:" not in final_response:
media_tags = []
has_voice_directive = False
for msg in result.get("messages", []):
if msg.get("role") in {"tool", "function"}:
content = msg.get("content", "")
if "MEDIA:" in content:
_TOOL_MEDIA_RE = re.compile(
r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
r'txt|csv|apk|ipa))',
re.IGNORECASE
)
for match in _TOOL_MEDIA_RE.finditer(content):
path = match.group(1).strip().rstrip('",}')
if path and path not in _history_media_paths:
media_tags.append(f"MEDIA:{path}")
if "[[audio_as_voice]]" in content:
has_voice_directive = True
if media_tags:
seen = set()
unique_tags = []
for tag in media_tags:
if tag not in seen:
seen.add(tag)
unique_tags.append(tag)
if has_voice_directive:
unique_tags.insert(0, "[[audio_as_voice]]")
final_response = final_response + "\n" + "\n".join(unique_tags)
agent = agent_holder[0]
_session_was_split = False
if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
_session_was_split = True
logger.info(
"Session split detected: %s → %s (compression)",
session_id, agent.session_id,
)
entry = self.session_store._entries.get(session_key)
if entry:
entry.session_id = agent.session_id
self.session_store._save()
if (
getattr(source, "platform", None) == Platform.TELEGRAM
and getattr(source, "chat_type", None) == "dm"
and getattr(source, "thread_id", None) is None
and self._session_db is not None
):
try:
_binding = self._session_db.get_telegram_topic_binding_by_session(
session_id=agent.session_id,
)
if _binding and _binding.get("thread_id"):
source.thread_id = str(_binding["thread_id"])
logger.debug(
"Restored source.thread_id=%s from binding after session split %s → %s",
source.thread_id,
session_id,
agent.session_id,
)
except Exception:
logger.debug(
"Failed to restore thread_id from binding after session split",
exc_info=True,
)
effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
_effective_history_offset = 0 if _session_was_split else len(agent_history)
if final_response and self._session_db:
try:
from agent.title_generator import maybe_auto_title
all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
def _title_failure_cb(task: str, exc: BaseException) -> None:
logger.debug(
"Gateway auto-title failure suppressed (not user-visible): %s: %s",
task, exc,
)
maybe_auto_title_kwargs = {
"failure_callback": _title_failure_cb,
"main_runtime": {
"model": getattr(agent, "model", None),
"provider": getattr(agent, "provider", None),
"base_url": getattr(agent, "base_url", None),
"api_key": getattr(agent, "api_key", None),
"api_mode": getattr(agent, "api_mode", None),
} if agent else None,
}
if self._is_telegram_topic_lane(source):
maybe_auto_title_kwargs["title_callback"] = lambda title: self._schedule_telegram_topic_title_rename(
source,
effective_session_id,
title,
)
maybe_auto_title(
self._session_db,
effective_session_id,
message,
final_response,
all_msgs,
**maybe_auto_title_kwargs,
)
except Exception:
pass
return {
"final_response": final_response,
"last_reasoning": result.get("last_reasoning"),
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
"api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
"completed": result_holder[0].get("completed") if result_holder[0] else None,
"interrupted": result_holder[0].get("interrupted", False) if result_holder[0] else False,
"partial": result_holder[0].get("partial", False) if result_holder[0] else False,
"error": result_holder[0].get("error") if result_holder[0] else None,
"interrupt_message": result_holder[0].get("interrupt_message") if result_holder[0] else None,
"tools": tools_holder[0] or [],
"history_offset": _effective_history_offset,
"last_prompt_tokens": _last_prompt_toks,
"input_tokens": _input_toks,
"output_tokens": _output_toks,
"model": _resolved_model,
"context_length": _context_length,
"session_id": effective_session_id,
"response_previewed": result.get("response_previewed", False),
}
progress_task = None
if tool_progress_enabled:
progress_task = asyncio.create_task(send_progress_messages())
stream_task = None
async def _start_stream_consumer():
"""Wait for the stream consumer to be created, then run it."""
for _ in range(200):
if stream_consumer_holder[0] is not None:
await stream_consumer_holder[0].run()
return
await asyncio.sleep(0.05)
stream_task = asyncio.create_task(_start_stream_consumer())
async def track_agent():
while agent_holder[0] is None:
await asyncio.sleep(0.05)
if not session_key:
return
if run_generation is not None and not self._is_session_run_current(
session_key, run_generation
):
logger.info(
"Skipping stale agent promotion for %s — generation %s is no longer current",
session_key or "",
run_generation,
)
return
self._running_agents[session_key] = agent_holder[0]
if self._draining:
self._update_runtime_status("draining")
tracking_task = asyncio.create_task(track_agent())
_interrupt_detected = asyncio.Event()
async def monitor_for_interrupt():
if not session_key:
return
while True:
await asyncio.sleep(0.2)
try:
_adapter = self.adapters.get(source.platform)
if not _adapter:
continue
if hasattr(_adapter, 'has_pending_interrupt') and _adapter.has_pending_interrupt(session_key):
agent = agent_holder[0]
if agent:
_peek_event = _adapter._pending_messages.get(session_key)
pending_text = _peek_event.text if _peek_event else None
logger.debug("Interrupt detected from adapter, signaling agent...")
agent.interrupt(pending_text)
_interrupt_detected.set()
break
except asyncio.CancelledError:
raise
except Exception as _mon_err:
logger.debug("monitor_for_interrupt error (will retry): %s", _mon_err)
interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
_NOTIFY_INTERVAL_RAW = _float_env("HERMES_AGENT_NOTIFY_INTERVAL", 180)
_NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
_notify_start = time.time()
async def _notify_long_running():
if _NOTIFY_INTERVAL is None:
return
_notify_adapter = self.adapters.get(source.platform)
if not _notify_adapter:
return
while True:
await asyncio.sleep(_NOTIFY_INTERVAL)
_elapsed_mins = int((time.time() - _notify_start) // 60)
_agent_ref = agent_holder[0]
_status_detail = ""
if _agent_ref and hasattr(_agent_ref, "get_activity_summary"):
try:
_a = _agent_ref.get_activity_summary()
_parts = [f"iteration {_a['api_call_count']}/{_a['max_iterations']}"]
if _a.get("current_tool"):
_parts.append(f"running: {_a['current_tool']}")
else:
_parts.append(_a.get("last_activity_desc", ""))
_status_detail = " — " + ", ".join(_parts)
except Exception:
pass
try:
_notify_res = await _notify_adapter.send(
source.chat_id,
f"⏳ Still working... ({_elapsed_mins} min elapsed{_status_detail})",
metadata=_status_thread_metadata,
)
if (
_cleanup_progress
and getattr(_notify_res, "success", False)
and getattr(_notify_res, "message_id", None)
):
_cleanup_msg_ids.append(str(_notify_res.message_id))
except Exception as _ne:
logger.debug("Long-running notification error: %s", _ne)
_notify_task = asyncio.create_task(_notify_long_running())
try:
_agent_timeout_raw = _float_env("HERMES_AGENT_TIMEOUT", 1800)
_agent_timeout = _agent_timeout_raw if _agent_timeout_raw > 0 else None
_agent_warning_raw = _float_env("HERMES_AGENT_TIMEOUT_WARNING", 900)
_agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None
_warning_fired = False
_executor_task = asyncio.ensure_future(
self._run_in_executor_with_context(run_sync)
)
_inactivity_timeout = False
_POLL_INTERVAL = 5.0
if _agent_timeout is None:
response = None
while True:
done, _ = await asyncio.wait(
{_executor_task}, timeout=_POLL_INTERVAL
)
if done:
response = _executor_task.result()
break
if not _interrupt_detected.is_set() and session_key:
_backup_adapter = self.adapters.get(source.platform)
_backup_agent = agent_holder[0]
if (_backup_adapter and _backup_agent
and hasattr(_backup_adapter, 'has_pending_interrupt')
and _backup_adapter.has_pending_interrupt(session_key)):
_bp_event = _backup_adapter._pending_messages.get(session_key)
_bp_text = _bp_event.text if _bp_event else None
logger.info(
"Backup interrupt detected for session %s "
"(monitor task state: %s)",
session_key,
"done" if interrupt_monitor.done() else "running",
)
_backup_agent.interrupt(_bp_text)
_interrupt_detected.set()
else:
response = None
while True:
done, _ = await asyncio.wait(
{_executor_task}, timeout=_POLL_INTERVAL
)
if done:
response = _executor_task.result()
break
_agent_ref = agent_holder[0]
_idle_secs = 0.0
if _agent_ref and hasattr(_agent_ref, "get_activity_summary"):
try:
_act = _agent_ref.get_activity_summary()
_idle_secs = _act.get("seconds_since_activity", 0.0)
except Exception:
pass
if (not _warning_fired and _agent_warning is not None
and _idle_secs >= _agent_warning):
_warning_fired = True
_warn_adapter = self.adapters.get(source.platform)
if _warn_adapter:
_elapsed_warn = int(_agent_warning // 60) or 1
_remaining_mins = int((_agent_timeout - _agent_warning) // 60) or 1
try:
await _warn_adapter.send(
source.chat_id,
f"⚠️ No activity for {_elapsed_warn} min. "
f"If the agent does not respond soon, it will "
f"be timed out in {_remaining_mins} min. "
f"You can continue waiting or use /reset.",
metadata=_status_thread_metadata,
)
except Exception as _warn_err:
logger.debug("Inactivity warning send error: %s", _warn_err)
if _idle_secs >= _agent_timeout:
_inactivity_timeout = True
break
if not _interrupt_detected.is_set() and session_key:
_backup_adapter = self.adapters.get(source.platform)
_backup_agent = agent_holder[0]
if (_backup_adapter and _backup_agent
and hasattr(_backup_adapter, 'has_pending_interrupt')
and _backup_adapter.has_pending_interrupt(session_key)):
_bp_event = _backup_adapter._pending_messages.get(session_key)
_bp_text = _bp_event.text if _bp_event else None
logger.info(
"Backup interrupt detected for session %s "
"(monitor task state: %s)",
session_key,
"done" if interrupt_monitor.done() else "running",
)
_backup_agent.interrupt(_bp_text)
_interrupt_detected.set()
if _inactivity_timeout:
_timed_out_agent = agent_holder[0]
_activity = {}
if _timed_out_agent and hasattr(_timed_out_agent, "get_activity_summary"):
try:
_activity = _timed_out_agent.get_activity_summary()
except Exception:
pass
_last_desc = _activity.get("last_activity_desc", "unknown")
_secs_ago = _activity.get("seconds_since_activity", 0)
_cur_tool = _activity.get("current_tool")
_iter_n = _activity.get("api_call_count", 0)
_iter_max = _activity.get("max_iterations", 0)
logger.error(
"Agent idle for %.0fs (timeout %.0fs) in session %s "
"| last_activity=%s | iteration=%s/%s | tool=%s",
_secs_ago, _agent_timeout, session_key,
_last_desc, _iter_n, _iter_max,
_cur_tool or "none",
)
if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
_timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)
_timeout_mins = int(_agent_timeout // 60) or 1
_diag_lines = [
f"⏱️ Agent inactive for {_timeout_mins} min — no tool calls "
f"or API responses."
]
if _cur_tool:
_diag_lines.append(
f"The agent appears stuck on tool `{_cur_tool}` "
f"({_secs_ago:.0f}s since last activity, "
f"iteration {_iter_n}/{_iter_max})."
)
else:
_diag_lines.append(
f"Last activity: {_last_desc} ({_secs_ago:.0f}s ago, "
f"iteration {_iter_n}/{_iter_max}). "
"The agent may have been waiting on an API response."
)
_diag_lines.append(
"To increase the limit, set agent.gateway_timeout in config.yaml "
"(value in seconds, 0 = no limit) and restart the gateway.\n"
"Try again, or use /reset to start fresh."
)
response = {
"final_response": "\n".join(_diag_lines),
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
"api_calls": _iter_n,
"tools": tools_holder[0] or [],
"history_offset": 0,
"failed": True,
}
_agent = agent_holder[0]
_result_for_fb = result_holder[0]
_run_failed = _result_for_fb.get("failed") if _result_for_fb else False
if _agent is not None and hasattr(_agent, 'model') and not _run_failed:
_cfg_model = _resolve_gateway_model()
if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model):
self._evict_cached_agent(session_key)
result = result_holder[0]
adapter = self.adapters.get(source.platform)
pending_event = None
pending = None
if result and adapter and session_key:
pending_event = _dequeue_pending_event(adapter, session_key)
pending_event = self._promote_queued_event(session_key, adapter, pending_event)
if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
interrupt_message = result.get("interrupt_message")
if _is_control_interrupt_message(interrupt_message):
logger.info(
"Ignoring control interrupt message for session %s: %s",
session_key or "?",
interrupt_message,
)
else:
pending = interrupt_message
elif pending_event:
pending = pending_event.text or _build_media_placeholder(pending_event)
logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
if result and not pending and not pending_event:
_leftover_steer = result.get("pending_steer")
if _leftover_steer:
pending = _leftover_steer
logger.debug("Delivering leftover /steer as next turn: '%s...'", pending[:40])
if pending and pending.strip().startswith("/"):
_pending_parts = pending.strip().split(None, 1)
_pending_cmd_word = _pending_parts[0][1:].lower() if _pending_parts else ""
if _pending_cmd_word:
try:
from hermes_cli.commands import resolve_command as _rc_pending
if _rc_pending(_pending_cmd_word):
logger.info(
"Discarding command '/%s' from pending queue — "
"commands must not be passed as agent input",
_pending_cmd_word,
)
pending_event = None
pending = None
except Exception:
pass
if self._draining and (pending_event or pending):
logger.info(
"Discarding pending follow-up for session %s during gateway %s",
session_key or "?",
self._status_action_label(),
)
pending_event = None
pending = None
if pending_event or pending:
logger.debug("Processing pending message: '%s...'", pending[:40])
if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
adapter._active_sessions[session_key].clear()
if _interrupt_depth >= self._MAX_INTERRUPT_DEPTH:
logger.warning(
"Interrupt recursion depth %d reached for session %s — "
"queueing message instead of recursing.",
_interrupt_depth, session_key,
)
adapter = self.adapters.get(source.platform)
if adapter and pending_event:
merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
elif adapter and hasattr(adapter, 'queue_message'):
adapter.queue_message(session_key, pending)
return result_holder[0] or {"final_response": response, "messages": history}
was_interrupted = result.get("interrupted")
if not was_interrupted:
_sc = stream_consumer_holder[0]
if _sc and stream_task:
try:
await asyncio.wait_for(stream_task, timeout=5.0)
except (asyncio.TimeoutError, asyncio.CancelledError):
stream_task.cancel()
try:
await stream_task
except asyncio.CancelledError:
pass
except Exception as e:
logger.debug("Stream consumer wait before queued message failed: %s", e)
_previewed = bool(result.get("response_previewed"))
_already_streamed = bool(
(_sc and getattr(_sc, "final_response_sent", False))
or _previewed
or (_sc and getattr(_sc, "final_content_delivered", False))
)
first_response = result.get("final_response", "")
if first_response and not _already_streamed:
try:
logger.info(
"Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
session_key or "?",
)
await adapter.send(
source.chat_id,
first_response,
metadata=_status_thread_metadata,
)
except Exception as e:
logger.warning("Failed to send first response before queued message: %s", e)
elif first_response:
logger.info(
"Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
session_key or "?",
)
if getattr(type(adapter), "pop_post_delivery_callback", None) is not None:
_bg_cb = adapter.pop_post_delivery_callback(
session_key,
generation=run_generation,
)
if callable(_bg_cb):
try:
_bg_result = _bg_cb()
if inspect.isawaitable(_bg_result):
await _bg_result
except Exception:
pass
elif adapter and hasattr(adapter, "_post_delivery_callbacks"):
_bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
if callable(_bg_cb):
try:
_bg_result = _bg_cb()
if inspect.isawaitable(_bg_result):
await _bg_result
except Exception:
pass
updated_history = result.get("messages", history)
next_source = source
next_message = pending
next_message_id = None
next_channel_prompt = None
if pending_event is not None:
next_source = getattr(pending_event, "source", None) or source
if self._is_goal_continuation_event(pending_event) and not self._goal_still_active_for_session(session_id):
logger.info(
"Discarding stale goal continuation for session %s — goal is no longer active",
session_key or "?",
)
return result
next_message = await self._prepare_inbound_message_text(
event=pending_event,
source=next_source,
history=updated_history,
)
if next_message is None:
return result
next_message_id = self._reply_anchor_for_event(pending_event)
next_channel_prompt = getattr(pending_event, "channel_prompt", None)
_followup_adapter = self.adapters.get(source.platform)
if _followup_adapter:
try:
await _followup_adapter.send_typing(
source.chat_id,
metadata=_status_thread_metadata,
)
except Exception:
pass
followup_result = await self._run_agent(
message=next_message,
context_prompt=context_prompt,
history=updated_history,
source=next_source,
session_id=session_id,
session_key=session_key,
run_generation=run_generation,
_interrupt_depth=_interrupt_depth + 1,
event_message_id=next_message_id,
channel_prompt=next_channel_prompt,
)
return _preserve_queued_followup_history_offset(result, followup_result)
finally:
if progress_task:
progress_task.cancel()
interrupt_monitor.cancel()
_notify_task.cancel()
if stream_task:
_has_stream_consumer = (
stream_consumer_holder
and stream_consumer_holder[0] is not None
)
if not _has_stream_consumer:
stream_task.cancel()
try:
await stream_task
except asyncio.CancelledError:
pass
else:
try:
await asyncio.wait_for(stream_task, timeout=5.0)
except (asyncio.TimeoutError, asyncio.CancelledError):
stream_task.cancel()
try:
await stream_task
except asyncio.CancelledError:
pass
tracking_task.cancel()
if session_key:
self._release_running_agent_state(
session_key, run_generation=run_generation
)
if self._draining:
self._update_runtime_status("draining")
for task in [progress_task, interrupt_monitor, tracking_task, _notify_task]:
if task:
try:
await task
except asyncio.CancelledError:
pass
_sc = stream_consumer_holder[0]
if isinstance(response, dict) and not response.get("failed"):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
_streamed = bool(
_sc and getattr(_sc, "final_response_sent", False)
)
_previewed = bool(response.get("response_previewed"))
_content_delivered = bool(
_sc and getattr(_sc, "final_content_delivered", False)
)
if not _is_empty_sentinel and (_streamed or _previewed or _content_delivered):
logger.info(
"Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s content_delivered=%s).",
session_key or "?",
_streamed,
_previewed,
_content_delivered,
)
response["already_sent"] = True
if (
_cleanup_progress
and _cleanup_adapter is not None
and _cleanup_msg_ids
and session_key
and isinstance(response, dict)
and not response.get("failed")
and hasattr(_cleanup_adapter, "register_post_delivery_callback")
):
_ids_snapshot = list(_cleanup_msg_ids)
_chat_id_snapshot = source.chat_id
_adapter_snapshot = _cleanup_adapter
_loop_snapshot = asyncio.get_running_loop()
def _cleanup_temp_bubbles() -> None:
async def _delete_all() -> None:
for _mid in _ids_snapshot:
try:
await _adapter_snapshot.delete_message(
_chat_id_snapshot, _mid
)
except Exception:
pass
try:
safe_schedule_threadsafe(
_delete_all(), _loop_snapshot,
logger=logger,
log_message="Temp bubble cleanup scheduling error",
)
except Exception:
pass
try:
_cleanup_adapter.register_post_delivery_callback(
session_key,
_cleanup_temp_bubbles,
generation=run_generation,
)
except Exception as _rpe:
logger.debug("Post-delivery cleanup registration failed: %s", _rpe)
return response
def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
"""
Background thread that ticks the cron scheduler at a regular interval.
Runs inside the gateway process so cronjobs fire automatically without
needing a separate `hermes cron daemon` or system cron entry.
When ``adapters`` and ``loop`` are provided, passes them through to the
cron delivery path so live adapters can be used for E2EE rooms.
Also refreshes the channel directory every 5 minutes and prunes the
image/audio/document cache + expired ``hermes debug share`` pastes
once per hour.
"""
from cron.scheduler import tick as cron_tick
from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
from hermes_cli.debug import _sweep_expired_pastes
IMAGE_CACHE_EVERY = 60
CHANNEL_DIR_EVERY = 5
PASTE_SWEEP_EVERY = 60
CURATOR_EVERY = 60
logger.info("Cron ticker started (interval=%ds)", interval)
tick_count = 0
while not stop_event.is_set():
try:
cron_tick(verbose=False, adapters=adapters, loop=loop)
except Exception as e:
logger.debug("Cron tick error: %s", e)
tick_count += 1
if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
try:
from gateway.channel_directory import build_channel_directory
if loop is not None:
fut = safe_schedule_threadsafe(
build_channel_directory(adapters), loop,
logger=logger,
log_message="Channel directory refresh scheduling error",
)
if fut is not None:
fut.result(timeout=30)
except Exception as e:
logger.debug("Channel directory refresh error: %s", e)
if tick_count % IMAGE_CACHE_EVERY == 0:
try:
removed = cleanup_image_cache(max_age_hours=24)
if removed:
logger.info("Image cache cleanup: removed %d stale file(s)", removed)
except Exception as e:
logger.debug("Image cache cleanup error: %s", e)
try:
removed = cleanup_document_cache(max_age_hours=24)
if removed:
logger.info("Document cache cleanup: removed %d stale file(s)", removed)
except Exception as e:
logger.debug("Document cache cleanup error: %s", e)
if tick_count % PASTE_SWEEP_EVERY == 0:
try:
deleted, remaining = _sweep_expired_pastes()
if deleted:
logger.info(
"Paste sweep: deleted %d expired paste(s), %d pending",
deleted, remaining,
)
except Exception as e:
logger.debug("Paste sweep error: %s", e)
if tick_count % CURATOR_EVERY == 0:
try:
from agent.curator import maybe_run_curator
maybe_run_curator(
idle_for_seconds=float("inf"),
on_summary=lambda msg: logger.info("curator: %s", msg),
)
except Exception as e:
logger.debug("Curator tick error: %s", e)
stop_event.wait(timeout=interval)
logger.info("Cron ticker stopped")
async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False, verbosity: Optional[int] = 0) -> bool:
"""
Start the gateway and run until interrupted.
This is the main entry point for running the gateway.
Returns True if the gateway ran successfully, False if it failed to start.
A False return causes a non-zero exit code so systemd can auto-restart.
Args:
config: Optional gateway configuration override.
replace: If True, kill any existing gateway instance before starting.
Useful for systemd services to avoid restart-loop deadlocks
when the previous process hasn't fully exited yet.
"""
from gateway.status import (
acquire_gateway_runtime_lock,
get_running_pid,
get_process_start_time,
release_gateway_runtime_lock,
remove_pid_file,
terminate_pid,
)
existing_pid = get_running_pid()
if existing_pid is not None and existing_pid != os.getpid():
if replace:
existing_start_time = get_process_start_time(existing_pid)
logger.info(
"Replacing existing gateway instance (PID %d) with --replace.",
existing_pid,
)
try:
from gateway.status import write_takeover_marker
write_takeover_marker(existing_pid)
except Exception as e:
logger.debug("Could not write takeover marker: %s", e)
try:
terminate_pid(existing_pid, force=False)
except ProcessLookupError:
pass
except (PermissionError, OSError):
logger.error(
"Permission denied killing PID %d. Cannot replace.",
existing_pid,
)
try:
from gateway.status import clear_takeover_marker
clear_takeover_marker()
except Exception:
pass
return False
from gateway.status import _pid_exists
for _ in range(20):
if not _pid_exists(existing_pid):
break
time.sleep(0.5)
else:
logger.warning(
"Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
existing_pid,
)
try:
terminate_pid(existing_pid, force=True)
time.sleep(0.5)
except (ProcessLookupError, PermissionError, OSError):
pass
remove_pid_file()
try:
(get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
except Exception:
pass
try:
from gateway.status import clear_takeover_marker
clear_takeover_marker()
except Exception:
pass
try:
from gateway.status import release_all_scoped_locks
_released = release_all_scoped_locks(
owner_pid=existing_pid,
owner_start_time=existing_start_time,
)
if _released:
logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
except Exception:
pass
else:
hermes_home = str(get_hermes_home())
logger.error(
"Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
"Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
existing_pid, hermes_home,
)
print(
f"\n❌ Gateway already running (PID {existing_pid}).\n"
f" Use 'hermes gateway restart' to replace it,\n"
f" or 'hermes gateway stop' to kill it first.\n"
f" Or use 'hermes gateway run --replace' to auto-replace.\n"
)
return False
try:
from tools.skills_sync import sync_skills
sync_skills(quiet=True)
except Exception:
pass
from hermes_logging import setup_logging
setup_logging(hermes_home=_hermes_home, mode="gateway")
try:
from gateway import memory_monitor as _memory_monitor
_mm_cfg = {}
try:
from hermes_cli.config import load_config as _load_cli_config
_mm_cfg = (_load_cli_config() or {}).get("logging", {}).get("memory_monitor", {}) or {}
except Exception:
_mm_cfg = {}
if _mm_cfg.get("enabled", True):
try:
_mm_interval = float(_mm_cfg.get("interval_seconds", 300))
except (TypeError, ValueError):
_mm_interval = 300.0
_memory_monitor.start_memory_monitoring(interval_seconds=_mm_interval)
except Exception as _mm_exc:
logger.debug("Failed to start memory monitor: %s", _mm_exc)
if verbosity is not None:
from agent.redact import RedactingFormatter
_stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
_stderr_handler = logging.StreamHandler()
_stderr_handler.setLevel(_stderr_level)
_stderr_handler.setFormatter(RedactingFormatter('%(levelname)s %(name)s: %(message)s'))
logging.getLogger().addHandler(_stderr_handler)
if _stderr_level < logging.getLogger().level:
logging.getLogger().setLevel(_stderr_level)
runner = GatewayRunner(config)
_signal_initiated_shutdown = False
def shutdown_signal_handler(received_signal=None):
nonlocal _signal_initiated_shutdown
planned_takeover = False
try:
from gateway.status import consume_takeover_marker_for_self
planned_takeover = consume_takeover_marker_for_self()
except Exception as e:
logger.debug("Takeover marker check failed: %s", e)
planned_stop = False
if received_signal == signal.SIGINT:
planned_stop = True
elif not planned_takeover:
try:
from gateway.status import consume_planned_stop_marker_for_self
planned_stop = consume_planned_stop_marker_for_self()
except Exception as e:
logger.debug("Planned stop marker check failed: %s", e)
try:
from gateway.shutdown_forensics import (
format_context_for_log,
snapshot_shutdown_context,
spawn_async_diagnostic,
)
_shutdown_ctx = snapshot_shutdown_context(received_signal)
except Exception as _e:
_shutdown_ctx = None
logger.debug("snapshot_shutdown_context failed: %s", _e)
if planned_takeover:
logger.info(
"Received %s as a planned --replace takeover — exiting cleanly",
_shutdown_ctx["signal"] if _shutdown_ctx else "SIGTERM",
)
elif planned_stop:
logger.info(
"Received %s as a planned gateway stop — exiting cleanly",
_shutdown_ctx["signal"] if _shutdown_ctx else "SIGTERM/SIGINT",
)
else:
_signal_initiated_shutdown = True
logger.info(
"Received %s — initiating shutdown",
_shutdown_ctx["signal"] if _shutdown_ctx else "SIGTERM/SIGINT",
)
if _shutdown_ctx is not None:
try:
logger.warning(
"Shutdown context: %s", format_context_for_log(_shutdown_ctx)
)
except Exception as _e:
logger.debug("format_context_for_log failed: %s", _e)
try:
_diag_log = _hermes_home / "logs" / "gateway-shutdown-diag.log"
spawn_async_diagnostic(
_diag_log, _shutdown_ctx["signal"], timeout_seconds=5.0
)
except Exception as _e:
logger.debug("spawn_async_diagnostic failed: %s", _e)
asyncio.create_task(runner.stop())
def restart_signal_handler():
runner.request_restart(detached=False, via_service=True)
loop = asyncio.get_running_loop()
if threading.current_thread() is threading.main_thread():
for sig in (signal.SIGINT, signal.SIGTERM):
try:
loop.add_signal_handler(sig, shutdown_signal_handler, sig)
except NotImplementedError:
pass
if hasattr(signal, "SIGUSR1"):
try:
loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler)
except NotImplementedError:
pass
else:
logger.info("Skipping signal handlers (not running in main thread).")
import atexit
from gateway.status import write_pid_file, remove_pid_file, get_running_pid
_current_pid = get_running_pid()
if _current_pid is not None and _current_pid != os.getpid():
logger.error(
"Another gateway instance (PID %d) started during our startup. "
"Exiting to avoid double-running.", _current_pid
)
return False
if not acquire_gateway_runtime_lock():
logger.error(
"Gateway runtime lock is already held by another instance. Exiting."
)
return False
try:
write_pid_file()
except FileExistsError:
release_gateway_runtime_lock()
logger.error(
"PID file race lost to another gateway instance. Exiting."
)
return False
atexit.register(remove_pid_file)
atexit.register(release_gateway_runtime_lock)
try:
from tools.mcp_tool import discover_mcp_tools
_loop = asyncio.get_running_loop()
await _loop.run_in_executor(None, discover_mcp_tools)
except Exception as e:
logger.debug("MCP tool discovery failed: %s", e)
success = await runner.start()
if not success:
return False
if runner.should_exit_cleanly:
if runner.exit_reason:
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
return True
cron_stop = threading.Event()
cron_thread = threading.Thread(
target=_start_cron_ticker,
args=(cron_stop,),
kwargs={"adapters": runner.adapters, "loop": asyncio.get_running_loop()},
daemon=True,
name="cron-ticker",
)
cron_thread.start()
await runner.wait_for_shutdown()
if runner.should_exit_with_failure:
if runner.exit_reason:
logger.error("Gateway exiting with failure: %s", runner.exit_reason)
return False
cron_stop.set()
cron_thread.join(timeout=5)
try:
from tools.mcp_tool import shutdown_mcp_servers
shutdown_mcp_servers()
except Exception:
pass
try:
from gateway import memory_monitor as _memory_monitor
_memory_monitor.stop_memory_monitoring(timeout=2.0)
except Exception:
pass
if runner.exit_code is not None:
raise SystemExit(runner.exit_code)
if _signal_initiated_shutdown and not runner._restart_requested:
logger.info(
"Exiting with code 1 (signal-initiated shutdown without restart "
"request) so systemd Restart=on-failure can revive the gateway."
)
return False
if runner._restart_via_service:
logger.info(
"Exiting with code 75 (service-restart requested) so "
"launchd KeepAlive relaunches the gateway."
)
raise SystemExit(75)
return True
def main():
"""CLI entry point for the gateway."""
try:
from hermes_cli.stdio import configure_windows_stdio
configure_windows_stdio()
except Exception:
pass
import argparse
parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
parser.add_argument("--config", "-c", help="Path to gateway config file")
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
args = parser.parse_args()
config = None
if args.config:
import yaml
with open(args.config, encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
config = GatewayConfig.from_dict(data)
success = asyncio.run(start_gateway(config))
if not success:
sys.exit(1)
if __name__ == "__main__":
main()