use anyhow::Result;
use async_trait::async_trait;
use serde::Deserialize;
use serde_json::json;
use tokio::process::Command;
use super::{ApprovalRequirement, Tool, ToolContext, ToolDef, ToolResult};
fn ceil_char_boundary(s: &str, index: usize) -> usize {
if index >= s.len() {
return s.len();
}
let mut i = index;
while i < s.len() && !s.is_char_boundary(i) {
i += 1;
}
i
}
fn floor_char_boundary(s: &str, index: usize) -> usize {
if index >= s.len() {
return s.len();
}
let mut i = index;
while i > 0 && !s.is_char_boundary(i) {
i -= 1;
}
i
}
pub struct WebSearchTool;
#[derive(Deserialize)]
struct WebSearchArgs {
query: String,
#[serde(default = "default_max")]
max_results: usize,
}
fn default_max() -> usize {
8
}
#[async_trait]
impl Tool for WebSearchTool {
fn definition(&self) -> ToolDef {
ToolDef {
name: "web_search",
description: "Search the web for information. Returns titles, URLs, and snippets.\n\
Use when you need to find documentation, look up APIs, research libraries, \
or find information not available locally.\n\
Examples:\n\
- {\"query\": \"openclaw github\"}\n\
- {\"query\": \"tailwindcss v4 installation guide\"}\n\
- {\"query\": \"rust reqwest POST example\"}"
.to_string(),
parameters: json!({
"type": "object",
"properties": {
"query": { "type": "string", "description": "Search query" },
"max_results": { "type": "integer", "description": "Max results (default 8)" }
},
"required": ["query"]
}),
}
}
fn approval(&self, _args: &str) -> ApprovalRequirement {
ApprovalRequirement::AutoApprove
}
async fn execute(&self, args: &str, _ctx: &ToolContext) -> Result<ToolResult> {
let parsed: WebSearchArgs = serde_json::from_str(args)?;
let max = parsed.max_results.min(20);
let query_encoded = parsed.query.replace(' ', "+");
let curl_bin = if cfg!(target_os = "windows") {
"curl.exe"
} else {
"curl"
};
let mut cmd = Command::new(curl_bin);
cmd.args(&[
"-s", "-X", "POST",
"https://html.duckduckgo.com/html/",
"-d", &format!("q={}", query_encoded),
"-A", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)",
"--max-time", "15",
"-L",
]);
cmd.kill_on_drop(true);
crate::process_utils::suppress_console_window(&mut cmd);
crate::ctrace!("TOOL", "web_search before cmd.output().await query={:?}", parsed.query);
let output = match tokio::time::timeout(
std::time::Duration::from_secs(20),
cmd.output(),
)
.await
{
Ok(r) => {
crate::ctrace!("TOOL", "web_search after cmd.output().await is_ok={}", r.is_ok());
r
}
Err(_) => {
crate::ctrace!("TOOL", "web_search tokio timeout (20s) fired");
return Ok(ToolResult {
call_id: String::new(),
output: format!(
"Search timed out after 20s for '{}'. Network may be unreachable or DuckDuckGo is slow — try a different query or use web_fetch on a known URL.",
parsed.query
),
success: false,
});
}
};
let html = match output {
Ok(o) => String::from_utf8_lossy(&o.stdout).to_string(),
Err(e) => {
return Ok(ToolResult {
call_id: String::new(),
output: format!("Search failed: {}", e),
success: false,
});
}
};
if html.is_empty() {
return Ok(ToolResult {
call_id: String::new(),
output: format!("Search returned empty response for '{}'", parsed.query),
success: false,
});
}
let results = parse_ddg_results(&html, max);
if results.is_empty() {
return Ok(ToolResult {
call_id: String::new(),
output: format!(
"No results found for '{}' ({} bytes received)",
parsed.query,
html.len()
),
success: false,
});
}
let mut out = format!("Search results for \"{}\":\n\n", parsed.query);
for (i, r) in results.iter().enumerate() {
out.push_str(&format!(
"{}. {}\n {}\n {}\n\n",
i + 1,
r.title,
r.url,
r.snippet
));
}
Ok(ToolResult {
call_id: String::new(),
output: out,
success: true,
})
}
}
struct SearchResult {
title: String,
url: String,
snippet: String,
}
fn parse_ddg_results(html: &str, max: usize) -> Vec<SearchResult> {
let mut results = Vec::new();
let mut pos = 0;
while results.len() < max {
let link_marker = "class=\"result__a\"";
let safe_pos = ceil_char_boundary(html, pos);
let marker_pos = match html[safe_pos..].find(link_marker) {
Some(p) => safe_pos + p,
None => break,
};
let after_marker = ceil_char_boundary(html, marker_pos + link_marker.len());
let tag_start = html[..marker_pos].rfind('<').unwrap_or(marker_pos);
let tag_end = html[after_marker..]
.find("</a>")
.map(|p| after_marker + p)
.unwrap_or(after_marker);
let safe_tag_end_plus4 = ceil_char_boundary(html, tag_end + 4);
let tag_region = &html[tag_start..safe_tag_end_plus4];
let url = if let Some(hp) = tag_region.find("href=\"") {
let hs = hp + 6;
let he = tag_region[hs..].find('"').map(|e| hs + e).unwrap_or(hs);
extract_ddg_url(&tag_region[hs..he])
} else {
pos = safe_tag_end_plus4;
continue;
};
let content_start = html[after_marker..tag_end]
.find('>')
.map(|p| after_marker + p + 1)
.unwrap_or(after_marker);
let safe_content_start = ceil_char_boundary(html, content_start);
let safe_tag_end = floor_char_boundary(html, tag_end);
let title = if safe_content_start <= safe_tag_end {
strip_html_tags(&html[safe_content_start..safe_tag_end])
} else {
String::new()
};
let snippet_marker = "class=\"result__snippet\"";
let search_end = ceil_char_boundary(html, (tag_end + 2000).min(html.len()));
let safe_tag_end2 = ceil_char_boundary(html, tag_end);
let snippet = if let Some(sp) = html[safe_tag_end2..search_end].find(snippet_marker) {
let snippet_pos = safe_tag_end2 + sp;
let s_start = ceil_char_boundary(
html,
html[snippet_pos..]
.find('>')
.map(|p| snippet_pos + p + 1)
.unwrap_or(snippet_pos),
);
let s_end = floor_char_boundary(
html,
html[s_start..]
.find("</a>")
.map(|p| s_start + p)
.unwrap_or(s_start),
);
if s_start <= s_end {
strip_html_tags(&html[s_start..s_end])
} else {
String::new()
}
} else {
String::new()
};
if !title.trim().is_empty() && !url.is_empty() && url.starts_with("http") {
results.push(SearchResult {
title: title.trim().to_string(),
url,
snippet: snippet.trim().to_string(),
});
}
pos = ceil_char_boundary(html, tag_end + 4);
}
results
}
fn extract_ddg_url(raw: &str) -> String {
if let Some(uddg_pos) = raw.find("uddg=") {
let start = uddg_pos + 5;
let end = raw[start..]
.find('&')
.map(|e| start + e)
.unwrap_or(raw.len());
let encoded = &raw[start..end];
url_decode(encoded)
} else if raw.starts_with("http") {
raw.to_string()
} else if raw.starts_with("//") {
format!("https:{}", raw)
} else {
raw.to_string()
}
}
fn url_decode(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '%' {
let hex: String = chars.by_ref().take(2).collect();
if let Ok(byte) = u8::from_str_radix(&hex, 16) {
result.push(byte as char);
} else {
result.push('%');
result.push_str(&hex);
}
} else if c == '+' {
result.push(' ');
} else {
result.push(c);
}
}
result
}
fn strip_html_tags(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut in_tag = false;
for c in s.chars() {
match c {
'<' => in_tag = true,
'>' => in_tag = false,
_ if !in_tag => result.push(c),
_ => {}
}
}
result
.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace(""", "\"")
.replace("'", "'")
.replace(" ", " ")
.replace("'", "'")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_ddg_results() {
let html = r#"
<h2 class="result__title">
<a rel="nofollow" class="result__a" href="https://github.com/openclaw">openclaw · GitHub</a>
</h2>
<a class="result__snippet" href="https://github.com/openclaw">Your personal AI assistant. openclaw has 23 repos.</a>
<h2 class="result__title">
<a rel="nofollow" class="result__a" href="https://openclaw.ai/">OpenClaw — Personal AI</a>
</h2>
<a class="result__snippet" href="https://openclaw.ai/">The AI that does things.</a>
"#;
let results = parse_ddg_results(html, 10);
assert_eq!(results.len(), 2);
assert_eq!(results[0].title, "openclaw · GitHub");
assert_eq!(results[0].url, "https://github.com/openclaw");
assert!(results[0].snippet.contains("23 repos"));
assert_eq!(results[1].title, "OpenClaw — Personal AI");
assert_eq!(results[1].url, "https://openclaw.ai/");
}
#[test]
fn test_parse_ddg_empty() {
let results = parse_ddg_results("<html><body>no results</body></html>", 10);
assert!(results.is_empty());
}
#[test]
fn test_strip_html_tags() {
assert_eq!(strip_html_tags("hello <b>world</b>"), "hello world");
assert_eq!(strip_html_tags("& <"), "& <");
}
}