* Copyright (C) 2026 Xiaomi Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* This file contains code derived from MimiClaw (https://github.com/memovai/mimiclaw)
* Copyright (c) 2026 Ziboyan Wang, licensed under the MIT License.
* See NOTICE file for the original MIT License terms.
*/
#include "tools/tool_fetch_url.h"
#include "agent_config.h"
#include "agent_compat.h"
#include "infra/url_parse.h"
#include "infra/vela_tls.h"
#include "infra/http_proxy.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <arpa/inet.h>
#include "cJSON.h"
static const char *TAG = "tool_fetch";
#define FETCH_RESP_SIZE (32 * 1024)
#define BINARY_CHECK_BYTES 512
* Returns true if the buffer looks like binary (contains bytes that
* are neither printable ASCII, TAB, CR, nor LF, and are not valid
* UTF-8 lead/continuation bytes in a well-formed sequence). */
static bool is_binary_content(const char *buf, size_t len)
{
size_t check = (len < BINARY_CHECK_BYTES) ? len : BINARY_CHECK_BYTES;
for (size_t i = 0; i < check; i++) {
unsigned char c = (unsigned char)buf[i];
if (c == '\t' || c == '\n' || c == '\r') {
continue;
}
if (c >= 0x20 && c <= 0x7E) {
continue;
}
if (c >= 0xC0 && c <= 0xF7) {
continue;
}
if (c >= 0x80 && c <= 0xBF) {
continue;
}
return true;
}
return false;
}
static bool is_private_host(const char *host)
{
if (strcmp(host, "localhost") == 0) return true;
struct in_addr addr;
if (inet_pton(AF_INET, host, &addr) == 1) {
uint32_t ip = ntohl(addr.s_addr);
if ((ip >> 24) == 127) return true;
if ((ip >> 24) == 10) return true;
if ((ip >> 20) == (172 << 4 | 1)) return true;
if ((ip & 0xFFF00000) == 0xAC100000) return true;
if ((ip >> 16) == (192 << 8 | 168)) return true;
if ((ip & 0xFFFF0000) == 0xC0A80000) return true;
if ((ip >> 16) == (169 << 8 | 254)) return true;
if ((ip & 0xFFFF0000) == 0xA9FE0000) return true;
if (ip == 0) return true;
}
return false;
}
int tool_fetch_url_execute(const char *input_json, char *output, size_t output_size)
{
cJSON *root = cJSON_Parse(input_json);
if (!root) {
snprintf(output, output_size, "Error: invalid JSON input");
return ERROR;
}
const char *url = cJSON_GetStringValue(cJSON_GetObjectItem(root, "url"));
if (!url) {
snprintf(output, output_size, "Error: missing 'url' field");
cJSON_Delete(root);
return ERROR;
}
parsed_url_t pu;
if (url_parse(url, &pu) != 0 || !pu.use_tls
|| strncmp(url, "https://", 8) != 0) {
snprintf(output, output_size, "Error: only https:// URLs supported");
cJSON_Delete(root);
return ERROR;
}
if (is_private_host(pu.host)) {
syslog(LOG_WARNING, "[%s] Blocked SSRF attempt to private host: %s\n", TAG, pu.host);
snprintf(output, output_size, "Error: access to private/internal addresses is not allowed");
cJSON_Delete(root);
return ERROR;
}
syslog(LOG_INFO, "[%s] Fetching: %s (host=%s port=%s)\n", TAG, url, pu.host, pu.port);
char *resp = malloc(FETCH_RESP_SIZE);
if (!resp) {
snprintf(output, output_size, "Error: out of memory");
cJSON_Delete(root);
return ERROR;
}
int status = vela_https_get(pu.host, pu.port, pu.path, resp, FETCH_RESP_SIZE);
if (status != 200) {
snprintf(output, output_size, "Error: HTTP %d from %s", status, pu.host);
free(resp);
cJSON_Delete(root);
return ERROR;
}
size_t resp_len = strlen(resp);
* sent to the LLM API (observed: fetching .tar.gz returns gzip
* magic bytes which cause "400: error parsing body"). */
if (resp_len < 4 || is_binary_content(resp, resp_len)) {
syslog(LOG_WARNING, "[%s] Binary content detected (%d bytes), "
"returning placeholder\n", TAG, (int)resp_len);
snprintf(output, output_size,
"[binary data: %d bytes from %s, "
"not displayable as text]", (int)resp_len, pu.host);
free(resp);
cJSON_Delete(root);
return OK;
}
if (resp_len >= output_size) {
resp_len = output_size - 1;
}
memcpy(output, resp, resp_len);
output[resp_len] = '\0';
syslog(LOG_INFO, "[%s] Fetched %d bytes from %s\n", TAG, (int)resp_len, pu.host);
free(resp);
cJSON_Delete(root);
return OK;
}