packages_ai_agent/include/llm/llm_cache.h-代码预览-packages_ai_agent:基于 openvela 系统的 AI Agent 框架项目 - AtomGit

/*
 * Copyright (C) 2025 Xiaomi Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
 * Simple LLM response cache — 8-slot LRU with TTL.
 *
 * Key: djb2 hash of first 64 chars of prompt.
 * Value: cached response text (strdup'd).
 * TTL: 300 seconds.
 */

#define LLM_CACHE_SLOTS 8
#define LLM_CACHE_TTL_SEC 300
#define LLM_CACHE_KEY_LEN 64

/**
 * Initialize the cache (call once at startup).
 */
void llm_cache_init(void);

/**
 * Look up a cached response for the given prompt.
 * @param prompt  User prompt text
 * @param prompt_len  Length of prompt
 * @return Cached response (caller must free), or NULL on miss
 */
char* llm_cache_get(const char* prompt, size_t prompt_len);

/**
 * Store a response in the cache with token info.
 * @param prompt  User prompt text
 * @param prompt_len  Length of prompt
 * @param response  Response text to cache
 */
void llm_cache_put(const char* prompt, size_t prompt_len,
    const char* response);

/**
 * Store token count alongside cached response.
 * Call after llm_cache_put for the same prompt.
 * @param prompt  User prompt text
 * @param prompt_len  Length of prompt
 * @param total_tokens  Token count to associate
 */
void llm_cache_put_tokens(const char* prompt, size_t prompt_len,
    int total_tokens);

/**
 * Get cumulative tokens saved by cache hits.
 */
uint32_t llm_cache_tokens_saved(void);

/**
 * Get total cache hit count.
 */
uint32_t llm_cache_hit_count(void);

/**
 * Free all cached responses (call during shutdown).
 */
void llm_cache_cleanup(void);

#ifdef __cplusplus
}
#endif