* Copyright (C) 2026 Xiaomi Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* tool_camera.c — Camera capture tool for AI Agent
*
* Captures a photo from the device camera via NuttX V4L2 interface,
* then sends the JPEG data to a Vision LLM for analysis.
*
* Designed for BES1700/BES2800 AON camera which outputs hardware-
* compressed JPEG via V4L2_PIX_FMT_ENTROPY.
*/
#include "tools/tool_camera.h"
#include "llm/llm_proxy.h"
#include "agent_compat.h"
#include "agent_config.h"
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <poll.h>
#include <unistd.h>
#include "cJSON.h"
#ifdef CONFIG_AI_AGENT_CAMERA
#include <nuttx/video/video.h>
static const char *TAG = "camera";
#ifndef AGENT_CAMERA_DEV
#define AGENT_CAMERA_DEV "/dev/video0"
#endif
#define CAM_WIDTH_HIGH 1280
#define CAM_HEIGHT_HIGH 720
#define CAM_WIDTH_LOW 320
#define CAM_HEIGHT_LOW 180
#define CAM_BUF_SIZE (160 * 1024)
#define CAM_NUM_BUFFERS 2
#define CAM_DQBUF_TIMEOUT_MS 5000
* Verify this value against your BSP's aoncam_v4l2.h if capture fails. */
#ifndef V4L2_PIX_FMT_ENTROPY
#define V4L2_PIX_FMT_ENTROPY v4l2_fourcc('G', 'R', 'E', 'P')
#endif
#define CAM_DEFAULT_PROMPT \
"Describe what you see in this image in detail. " \
"If there is text, read it. If there are objects, identify them."
* Capture one JPEG frame from the camera via V4L2.
*
* @param width Desired width (320 or 1280)
* @param height Desired height (180 or 720)
* @param out_data Pointer to receive malloc'd JPEG data (caller frees)
* @param out_size Receives the JPEG data size in bytes
* @return OK on success, ERROR on failure
*/
static int camera_v4l2_capture(int width, int height,
uint8_t **out_data, size_t *out_size)
{
int fd = -1;
int ret = ERROR;
void *buffers[CAM_NUM_BUFFERS] = { NULL };
size_t buf_sizes[CAM_NUM_BUFFERS] = { 0 };
uint32_t nbuffers = 0;
*out_data = NULL;
*out_size = 0;
fd = open(AGENT_CAMERA_DEV, O_RDWR);
if (fd < 0) {
syslog(LOG_ERR, "[%s] Cannot open %s: %d\n",
TAG, AGENT_CAMERA_DEV, errno);
return ERROR;
}
struct v4l2_capability cap;
memset(&cap, 0, sizeof(cap));
if (ioctl(fd, VIDIOC_QUERYCAP, (uintptr_t)&cap) < 0) {
syslog(LOG_ERR, "[%s] VIDIOC_QUERYCAP failed: %d\n", TAG, errno);
goto cleanup;
}
if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
syslog(LOG_ERR, "[%s] Device does not support video capture\n", TAG);
goto cleanup;
}
struct v4l2_format fmt;
memset(&fmt, 0, sizeof(fmt));
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
fmt.fmt.pix.width = width;
fmt.fmt.pix.height = height;
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_JPEG;
fmt.fmt.pix.sizeimage = CAM_BUF_SIZE;
fmt.fmt.pix.field = V4L2_FIELD_NONE;
if (ioctl(fd, VIDIOC_S_FMT, (uintptr_t)&fmt) < 0) {
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_ENTROPY;
if (ioctl(fd, VIDIOC_S_FMT, (uintptr_t)&fmt) < 0) {
syslog(LOG_ERR, "[%s] VIDIOC_S_FMT failed: %d\n", TAG, errno);
goto cleanup;
}
}
syslog(LOG_INFO, "[%s] Format set: %dx%d\n", TAG, width, height);
struct v4l2_requestbuffers req;
memset(&req, 0, sizeof(req));
req.count = CAM_NUM_BUFFERS;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (ioctl(fd, VIDIOC_REQBUFS, (uintptr_t)&req) < 0) {
syslog(LOG_ERR, "[%s] VIDIOC_REQBUFS failed: %d\n", TAG, errno);
goto cleanup;
}
nbuffers = req.count;
for (uint32_t i = 0; i < nbuffers; i++) {
struct v4l2_buffer buf;
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (ioctl(fd, VIDIOC_QUERYBUF, (uintptr_t)&buf) < 0) {
syslog(LOG_ERR, "[%s] VIDIOC_QUERYBUF %u failed\n", TAG, i);
goto cleanup;
}
buf_sizes[i] = buf.length;
buffers[i] = mmap(NULL, buf.length, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, buf.m.offset);
if (buffers[i] == MAP_FAILED) {
buffers[i] = NULL;
syslog(LOG_ERR, "[%s] mmap buffer %u failed\n", TAG, i);
goto cleanup;
}
}
for (uint32_t i = 0; i < nbuffers; i++) {
struct v4l2_buffer buf;
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (ioctl(fd, VIDIOC_QBUF, (uintptr_t)&buf) < 0) {
syslog(LOG_ERR, "[%s] VIDIOC_QBUF %u failed\n", TAG, i);
goto cleanup;
}
}
enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (ioctl(fd, VIDIOC_STREAMON, (uintptr_t)&type) < 0) {
syslog(LOG_ERR, "[%s] VIDIOC_STREAMON failed: %d\n", TAG, errno);
goto cleanup;
}
struct pollfd pfd = { .fd = fd, .events = POLLIN };
int poll_ret = poll(&pfd, 1, CAM_DQBUF_TIMEOUT_MS);
if (poll_ret <= 0) {
syslog(LOG_ERR, "[%s] Frame capture timeout (%d ms)\n",
TAG, CAM_DQBUF_TIMEOUT_MS);
ioctl(fd, VIDIOC_STREAMOFF, (uintptr_t)&type);
goto cleanup;
}
struct v4l2_buffer dqbuf;
memset(&dqbuf, 0, sizeof(dqbuf));
dqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
dqbuf.memory = V4L2_MEMORY_MMAP;
if (ioctl(fd, VIDIOC_DQBUF, (uintptr_t)&dqbuf) < 0) {
syslog(LOG_ERR, "[%s] VIDIOC_DQBUF failed: %d\n", TAG, errno);
ioctl(fd, VIDIOC_STREAMOFF, (uintptr_t)&type);
goto cleanup;
}
syslog(LOG_INFO, "[%s] Captured frame: %u bytes from buffer %u\n",
TAG, dqbuf.bytesused, dqbuf.index);
if (dqbuf.bytesused > 0 && dqbuf.index < nbuffers) {
*out_data = malloc(dqbuf.bytesused);
if (*out_data) {
memcpy(*out_data, buffers[dqbuf.index], dqbuf.bytesused);
*out_size = dqbuf.bytesused;
ret = OK;
} else {
syslog(LOG_ERR, "[%s] OOM copying frame (%u bytes)\n",
TAG, dqbuf.bytesused);
}
}
ioctl(fd, VIDIOC_STREAMOFF, (uintptr_t)&type);
cleanup:
for (uint32_t i = 0; i < nbuffers; i++) {
if (buffers[i] && buffers[i] != MAP_FAILED) {
munmap(buffers[i], buf_sizes[i]);
}
}
if (fd >= 0) {
struct v4l2_requestbuffers rel;
memset(&rel, 0, sizeof(rel));
rel.count = 0;
rel.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
rel.memory = V4L2_MEMORY_MMAP;
ioctl(fd, VIDIOC_REQBUFS, (uintptr_t)&rel);
close(fd);
}
return ret;
}
int tool_camera_capture_execute(const char *input_json,
char *output, size_t output_size)
{
const char *prompt = CAM_DEFAULT_PROMPT;
char *prompt_copy = NULL;
int width = CAM_WIDTH_LOW;
int height = CAM_HEIGHT_LOW;
cJSON *root = cJSON_Parse(input_json);
if (root) {
cJSON *p = cJSON_GetObjectItem(root, "prompt");
if (p && cJSON_IsString(p) && p->valuestring[0]) {
prompt_copy = strdup(p->valuestring);
if (prompt_copy) {
prompt = prompt_copy;
}
}
cJSON *r = cJSON_GetObjectItem(root, "resolution");
if (r && cJSON_IsString(r)) {
if (strcmp(r->valuestring, "high") == 0) {
width = CAM_WIDTH_HIGH;
height = CAM_HEIGHT_HIGH;
}
}
}
cJSON_Delete(root);
syslog(LOG_INFO, "[%s] Capturing %dx%d\n", TAG, width, height);
uint8_t *jpeg_data = NULL;
size_t jpeg_size = 0;
int ret = camera_v4l2_capture(width, height, &jpeg_data, &jpeg_size);
if (ret != OK || !jpeg_data || jpeg_size == 0) {
snprintf(output, output_size,
"{\"error\":\"Camera capture failed. "
"Is %s available?\"}", AGENT_CAMERA_DEV);
free(prompt_copy);
return ERROR;
}
syslog(LOG_INFO, "[%s] Captured %zu bytes JPEG, sending to Vision LLM\n",
TAG, jpeg_size);
const char *mime = "image/jpeg";
if (jpeg_size >= 4 && jpeg_data[0] == 0x89 && jpeg_data[1] == 'P') {
mime = "image/png";
}
char *resp_buf = calloc(1, output_size);
if (!resp_buf) {
free(jpeg_data);
snprintf(output, output_size, "{\"error\":\"OOM for LLM response\"}");
free(prompt_copy);
return ERROR;
}
ret = llm_chat_vision_raw(prompt, jpeg_data, jpeg_size, mime,
resp_buf, output_size);
free(jpeg_data);
free(prompt_copy);
if (ret != OK) {
snprintf(output, output_size, "{\"error\":\"%s\"}", resp_buf);
free(resp_buf);
return ERROR;
}
cJSON *result = cJSON_CreateObject();
cJSON_AddStringToObject(result, "analysis", resp_buf);
free(resp_buf);
char *json_str = cJSON_PrintUnformatted(result);
cJSON_Delete(result);
if (json_str) {
strncpy(output, json_str, output_size - 1);
output[output_size - 1] = '\0';
free(json_str);
}
return OK;
}
#else
int tool_camera_capture_execute(const char *input_json,
char *output, size_t output_size)
{
(void)input_json;
snprintf(output, output_size,
"{\"error\":\"Camera not supported (CONFIG_AI_AGENT_CAMERA disabled)\"}");
return ERROR;
}
#endif