pub mod cache;
pub mod language;
use std::path::Path;
use tree_sitter::{Query, QueryCursor, StreamingIterator};
use cache::ASTCache;
use language::{Lang, LanguageRegistry};
#[derive(Debug, Clone)]
pub struct Symbol {
pub name: String,
pub start_line: usize,
pub end_line: usize,
pub start_byte: usize,
pub end_byte: usize,
pub kind: String,
}
impl Symbol {
pub fn is_chinese(&self) -> bool {
contains_chinese(&self.name)
}
pub fn is_pinyin(&self) -> bool {
is_pinyin_identifier(&self.name)
}
pub fn is_chinese_related(&self) -> bool {
self.is_chinese() || self.is_pinyin()
}
}
fn is_chinese(c: char) -> bool {
matches!(c,
'\u{4E00}'..='\u{9FFF}' |
'\u{3400}'..='\u{4DBF}' |
'\u{20000}'..='\u{2A6DF}' |
'\u{F900}'..='\u{FAFF}' |
'\u{2F800}'..='\u{2FA1F}'
)
}
fn contains_chinese(s: &str) -> bool {
s.chars().any(is_chinese)
}
fn is_pinyin_identifier(s: &str) -> bool {
if s.is_empty() {
return false;
}
let first = s.chars().next().unwrap();
if !first.is_ascii_alphabetic() {
return false;
}
let pinyin_syllables = [
"ba", "bai", "bei", "biao", "chang", "chu", "da", "dan", "di", "ding",
"dong", "duan", "duo", "er", "fen", "gao", "guo", "hao", "hou", "hu",
"huai", "ji", "jian", "jiu", "kuai", "kuan", "leng", "li", "lie", "lu",
"man", "miao", "ming", "mu", "nan", "nei", "nian", "qi", "qian", "re",
"ren", "ri", "san", "shang", "shao", "shen", "shi", "shu", "si", "tian",
"wai", "wan", "wen", "wu", "xi", "xia", "xiao", "xin", "xing", "yi",
"yong", "you", "yue", "zhai", "zhong", "zuo",
];
let lower = s.to_lowercase();
let remaining_str = lower.as_str();
let mut pos = 0usize;
let mut consumed_count = 0usize;
let mut syllable_count = 0usize;
while pos < remaining_str.len() {
let mut matched_len = 0usize;
for len in (1..=5.min(remaining_str.len() - pos)).rev() {
let candidate = &remaining_str[pos..pos + len];
if pinyin_syllables.binary_search(&candidate).is_ok() {
matched_len = len;
break;
}
}
if matched_len > 0 {
pos += matched_len;
consumed_count += matched_len;
syllable_count += 1;
} else {
break;
}
}
syllable_count >= 2 && consumed_count as f64 / lower.len() as f64 > 0.8
}
pub struct SemanticSearcher {
cache: ASTCache,
}
impl SemanticSearcher {
pub fn new() -> Self {
Self {
cache: ASTCache::new(),
}
}
pub fn list_symbols(&mut self, path: &Path) -> Option<Vec<Symbol>> {
let source = std::fs::read_to_string(path).ok()?;
let lang = LanguageRegistry::detect(path);
if let Some(lang) = lang {
let mut symbols = self.list_symbols_treesitter(path, &source, lang)?;
if lang.is_vue() {
if let Some(html_symbols) = self.list_vue_template_symbols(&source) {
symbols.extend(html_symbols);
}
}
Some(symbols)
} else {
Some(self.list_symbols_indent(&source, path))
}
}
pub fn extract_symbol(&mut self, path: &Path, symbol_name: &str) -> Option<SymbolSlice> {
let source = std::fs::read_to_string(path).ok()?;
let lang = LanguageRegistry::detect(path)?;
let symbols = self.list_symbols_treesitter(path, &source, lang)?;
let sym = symbols.iter().find(|s| s.name == symbol_name)?;
let text = source[sym.start_byte..sym.end_byte].to_string();
Some(SymbolSlice {
name: sym.name.clone(),
kind: sym.kind.clone(),
start_line: sym.start_line,
end_line: sym.end_line,
start_byte: sym.start_byte,
end_byte: sym.end_byte,
text,
})
}
pub fn skeleton(&mut self, path: &Path) -> Option<String> {
let source = std::fs::read_to_string(path).ok()?;
let lang = LanguageRegistry::detect(path);
if let Some(lang) = lang {
self.skeleton_treesitter(path, &source, lang)
} else {
Some(self.skeleton_indent(&source, path))
}
}
pub fn invalidate(&mut self, path: &Path) {
self.cache.invalidate(path);
}
pub fn count_syntax_errors(&mut self, source: &str, path: &Path) -> (usize, Vec<usize>) {
let lang = match language::LanguageRegistry::detect(path) {
Some(l) => l,
None => return (0, vec![]),
};
let tree = match self.cache.parse_source(source, lang) {
Some(t) => t,
None => return (0, vec![]),
};
let mut errors = Vec::new();
Self::collect_errors(tree.root_node(), &mut errors);
let count = errors.len();
errors.truncate(5);
(count, errors)
}
fn collect_errors(node: tree_sitter::Node, errors: &mut Vec<usize>) {
if node.is_error() || node.is_missing() {
errors.push(node.start_position().row + 1);
}
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
Self::collect_errors(cursor.node(), errors);
if !cursor.goto_next_sibling() {
break;
}
}
}
}
pub fn find_similar_calls(&mut self, path: &Path, pattern: &str) -> Option<String> {
let source = std::fs::read_to_string(path).ok()?;
let lang = LanguageRegistry::detect(path)?;
let tree = self.cache.parse_source(&source, lang)?;
let pattern_lower = pattern.to_lowercase();
let mut results: Vec<(usize, String, String)> = Vec::new();
Self::walk_matching_calls(tree.root_node(), &source, &pattern_lower, &mut results, "");
if results.is_empty() {
return None;
}
let short_name = path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string_lossy().to_string());
let mut out = format!(
"{} calls matching '{}' in {}:\n",
results.len(),
pattern,
short_name
);
for (line, call_text, func) in &results {
if func.is_empty() {
out.push_str(&format!(" L{}: {}\n", line, call_text));
} else {
out.push_str(&format!(" L{}: {} (in {})\n", line, call_text, func));
}
}
Some(out)
}
fn walk_matching_calls(
node: tree_sitter::Node,
source: &str,
pattern: &str,
results: &mut Vec<(usize, String, String)>,
enclosing_fn: &str,
) {
let mut current_fn = enclosing_fn.to_string();
let kind = node.kind();
if kind.contains("function") || kind.contains("method") || kind == "constructor_declaration"
{
if let Some(name_node) = node.child_by_field_name("name") {
current_fn = source[name_node.start_byte()..name_node.end_byte()].to_string();
}
}
if kind == "method_invocation" || kind == "call_expression" {
let call_text = &source[node.start_byte()..node.end_byte()];
let short = if call_text.len() > 80 {
let mut end = 77;
while !call_text.is_char_boundary(end) {
end -= 1;
}
format!("{}...", &call_text[..end])
} else {
call_text.to_string()
};
let oneline = short.replace('\n', " ").replace(" ", " ");
if call_text.to_lowercase().contains(pattern) {
let line = node.start_position().row + 1;
results.push((line, oneline, current_fn.clone()));
}
}
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
Self::walk_matching_calls(cursor.node(), source, pattern, results, ¤t_fn);
if !cursor.goto_next_sibling() {
break;
}
}
}
}
fn list_vue_template_symbols(&mut self, source: &str) -> Option<Vec<Symbol>> {
let template_start = source.find("<template")?;
let template_end = source.rfind("</template>")?;
if template_start >= template_end {
return None;
}
let template_content_start = source[template_start..].find('>')? + template_start + 1;
let template_content = &source[template_content_start..template_end];
let line_offset = source[..template_content_start].lines().count();
let html_grammar = Lang::html_grammar();
let mut parser = tree_sitter::Parser::new();
parser.set_language(&html_grammar).ok()?;
let tree = parser.parse(template_content, None)?;
let query_str = Lang::Html.symbols_query();
let query = tree_sitter::Query::new(&html_grammar, query_str).ok()?;
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), template_content.as_bytes());
let name_idx = query.capture_index_for_name("name")?;
let def_idx = query.capture_index_for_name("definition")?;
let mut symbols = Vec::new();
let mut seen_lines = std::collections::HashSet::new();
while let Some(m) = matches.next() {
let name_cap = match m.captures.iter().find(|c| c.index == name_idx) {
Some(c) => c,
None => continue,
};
let def_cap = match m.captures.iter().find(|c| c.index == def_idx) {
Some(c) => c,
None => continue,
};
let name_node = name_cap.node;
let def_node = def_cap.node;
let tag_name = &template_content[name_node.start_byte()..name_node.end_byte()];
let start_line = def_node.start_position().row + line_offset;
if matches!(
tag_name,
"div"
| "span"
| "p"
| "a"
| "li"
| "ul"
| "ol"
| "br"
| "hr"
| "img"
| "i"
| "b"
| "strong"
| "em"
| "small"
| "label"
| "input"
| "option"
| "thead"
| "tbody"
| "tr"
| "td"
| "th"
) {
let line = template_content
.lines()
.nth(def_node.start_position().row)
.unwrap_or("");
let has_vue_attr = line.contains("v-if")
|| line.contains("v-for")
|| line.contains("v-show")
|| line.contains("@click")
|| line.contains("v-model");
if !has_vue_attr {
continue;
}
}
if !seen_lines.insert(start_line) {
continue;
}
let end_line = def_node.end_position().row + line_offset;
symbols.push(Symbol {
name: format!("<{}>", tag_name),
start_line,
end_line,
start_byte: def_node.start_byte() + template_content_start,
end_byte: def_node.end_byte() + template_content_start,
kind: "element".to_string(),
});
if symbols.len() >= 20 {
break;
}
}
if symbols.is_empty() {
None
} else {
Some(symbols)
}
}
fn list_symbols_treesitter(
&mut self,
path: &Path,
source: &str,
lang: Lang,
) -> Option<Vec<Symbol>> {
if lang == Lang::Vue {
return self.list_symbols_vue(path, source);
}
let tree = self.cache.parse_source(source, lang)?;
let query_src = lang.symbols_query();
let grammar = lang.grammar();
let query = Query::new(&grammar, query_src).ok()?;
let def_idx = query.capture_index_for_name("definition")?;
let name_idx = query.capture_index_for_name("name")?;
let mut cursor = QueryCursor::new();
let mut symbols = Vec::new();
let mut seen_ranges: std::collections::HashSet<(usize, usize)> =
std::collections::HashSet::new();
let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
loop {
matches.advance();
let m = match matches.get() {
Some(m) => m,
None => break,
};
let mut sym_name = None;
let mut def_start = 0usize;
let mut def_end = 0usize;
let mut def_start_row = 0usize;
let mut def_end_row = 0usize;
let mut def_kind = "";
let mut has_def = false;
for capture in m.captures {
if capture.index == name_idx {
sym_name = Some(
source[capture.node.start_byte()..capture.node.end_byte()].to_string(),
);
}
if capture.index == def_idx {
def_start = capture.node.start_byte();
def_end = capture.node.end_byte();
def_start_row = capture.node.start_position().row;
def_end_row = capture.node.end_position().row;
def_kind = capture.node.kind();
has_def = true;
}
}
if let (Some(name), true) = (sym_name, has_def) {
let range = (def_start, def_end);
if seen_ranges.contains(&range) {
continue;
}
seen_ranges.insert(range);
symbols.push(Symbol {
name,
start_line: def_start_row + 1,
end_line: def_end_row + 1,
start_byte: def_start,
end_byte: def_end,
kind: def_kind.to_string(),
});
}
}
Some(symbols)
}
fn skeleton_treesitter(&mut self, path: &Path, source: &str, lang: Lang) -> Option<String> {
let symbols = self.list_symbols_treesitter(path, source, lang)?;
let lines: Vec<&str> = source.lines().collect();
let mut out = String::new();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("use ")
|| trimmed.starts_with("import ")
|| trimmed.starts_with("from ")
|| trimmed.starts_with("#include")
|| trimmed.starts_with("package ")
|| trimmed.starts_with("require")
{
out.push_str(&format!("{:4}| {}\n", i + 1, line));
}
}
if !out.is_empty() {
out.push('\n');
}
for sym in &symbols {
let sig_line = if sym.start_line <= lines.len() {
lines[sym.start_line - 1]
} else {
&sym.name
};
let line_range = format!("L{}-{}", sym.start_line, sym.end_line);
let body_lines = sym.end_line - sym.start_line + 1;
out.push_str(&format!(
"{:4}| {} {{ ... }} // {} ({} lines)\n",
sym.start_line,
sig_line.trim_end(),
line_range,
body_lines
));
}
Some(out)
}
fn extract_script_section(source: &str) -> Option<(String, usize, usize)> {
let script_start = source.find("<script")?;
let tag_end = source[script_start..].find('>')? + script_start + 1;
let script_end = source[tag_end..].find("</script>")? + tag_end;
let script_content = &source[tag_end..script_end];
let line_offset = source[..tag_end].lines().count();
let byte_offset = tag_end;
Some((script_content.to_string(), line_offset, byte_offset))
}
fn list_symbols_vue(&mut self, _path: &Path, source: &str) -> Option<Vec<Symbol>> {
let (script, line_offset, byte_offset) = Self::extract_script_section(source)?;
let tree = self.cache.parse_source(&script, Lang::Vue)?;
let query_src = Lang::Vue.symbols_query();
let grammar = Lang::Vue.grammar();
let query = Query::new(&grammar, query_src).ok()?;
let def_idx = query.capture_index_for_name("definition")?;
let name_idx = query.capture_index_for_name("name")?;
let mut cursor = QueryCursor::new();
let mut symbols = Vec::new();
let mut seen_ranges: std::collections::HashSet<(usize, usize)> =
std::collections::HashSet::new();
let mut matches = cursor.matches(&query, tree.root_node(), script.as_bytes());
loop {
matches.advance();
let m = match matches.get() {
Some(m) => m,
None => break,
};
let mut sym_name = None;
let mut def_start = 0usize;
let mut def_end = 0usize;
let mut def_start_row = 0usize;
let mut def_end_row = 0usize;
let mut def_kind = "";
let mut has_def = false;
for capture in m.captures {
if capture.index == name_idx {
sym_name = Some(
script[capture.node.start_byte()..capture.node.end_byte()].to_string(),
);
}
if capture.index == def_idx {
def_start = capture.node.start_byte();
def_end = capture.node.end_byte();
def_start_row = capture.node.start_position().row;
def_end_row = capture.node.end_position().row;
def_kind = capture.node.kind();
has_def = true;
}
}
if let (Some(name), true) = (sym_name, has_def) {
let range = (def_start, def_end);
if seen_ranges.contains(&range) {
continue;
}
seen_ranges.insert(range);
symbols.push(Symbol {
name,
start_line: def_start_row + line_offset,
end_line: def_end_row + line_offset,
start_byte: def_start + byte_offset,
end_byte: def_end + byte_offset,
kind: def_kind.to_string(),
});
}
}
let lines: Vec<&str> = source.lines().collect();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("<template")
|| trimmed.starts_with("<script")
|| trimmed.starts_with("<style")
{
let tag = if trimmed.starts_with("<template") {
"template"
} else if trimmed.starts_with("<script") {
"script"
} else {
"style"
};
let close_tag = format!("</{}>", tag);
let end_line = lines[i..]
.iter()
.position(|l| l.trim().starts_with(&close_tag))
.map(|p| i + p + 1)
.unwrap_or(lines.len());
let start_byte = lines[..i].iter().map(|l| l.len() + 1).sum::<usize>();
let end_byte = lines[..end_line].iter().map(|l| l.len() + 1).sum::<usize>();
symbols.push(Symbol {
name: format!("<{}>", tag),
start_line: i + 1,
end_line,
start_byte,
end_byte,
kind: "sfc_section".to_string(),
});
}
}
symbols.sort_by_key(|s| s.start_line);
Some(symbols)
}
fn list_symbols_indent(&self, source: &str, path: &Path) -> Vec<Symbol> {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
let lines: Vec<&str> = source.lines().collect();
match ext {
"css" | "scss" | "less" | "sass" => self.list_symbols_css(&lines),
"html" | "htm" => self.list_symbols_html(&lines),
"json" => self.list_symbols_json(&lines),
"yaml" | "yml" | "toml" => self.list_symbols_yaml(&lines),
"md" | "mdx" => self.list_symbols_markdown(&lines),
_ => self.list_symbols_code_indent(&lines),
}
}
fn list_symbols_css(&self, lines: &[&str]) -> Vec<Symbol> {
let mut symbols = Vec::new();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let indent = line.len() - line.trim_start().len();
let is_match = trimmed.starts_with(":root")
|| trimmed.starts_with("@keyframes")
|| trimmed.starts_with("@media")
|| trimmed.starts_with("@layer")
|| trimmed.starts_with("@import")
|| trimmed.starts_with("@font-face")
|| trimmed.starts_with("/* ===")
|| trimmed.starts_with("/* ---")
|| trimmed.starts_with("/* ***")
|| (indent == 0 && trimmed.starts_with('.') && trimmed.contains('{'))
|| (indent == 0 && trimmed.starts_with('#') && trimmed.contains('{'));
if is_match {
let end = find_block_end(lines, i);
let name = trimmed
.split('{')
.next()
.unwrap_or(trimmed)
.trim()
.to_string();
symbols.push(make_symbol(name, "css_rule", i, end, lines));
}
}
symbols
}
fn list_symbols_html(&self, lines: &[&str]) -> Vec<Symbol> {
let mut symbols = Vec::new();
let tags = [
"<head",
"<body",
"<header",
"<main",
"<footer",
"<nav",
"<section",
"<article",
"<!DOCTYPE",
];
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if tags.iter().any(|t| trimmed.starts_with(t)) {
let name = trimmed
.split(|c: char| c == '>' || c == ' ')
.next()
.unwrap_or(trimmed)
.to_string();
symbols.push(make_symbol(name, "html_tag", i, i + 1, lines));
}
}
symbols
}
fn list_symbols_json(&self, lines: &[&str]) -> Vec<Symbol> {
let mut symbols = Vec::new();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
let indent = line.len() - line.trim_start().len();
if indent <= 2 && trimmed.starts_with('"') && trimmed.contains(':') {
let name = trimmed
.split(':')
.next()
.unwrap_or(trimmed)
.trim_matches('"')
.trim()
.to_string();
symbols.push(make_symbol(name, "json_key", i, i + 1, lines));
}
}
symbols
}
fn list_symbols_yaml(&self, lines: &[&str]) -> Vec<Symbol> {
let mut symbols = Vec::new();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
let indent = line.len() - line.trim_start().len();
if indent == 0
&& !trimmed.is_empty()
&& !trimmed.starts_with('#')
&& !trimmed.starts_with("---")
{
let name = trimmed
.split(':')
.next()
.unwrap_or(trimmed)
.trim()
.to_string();
if !name.is_empty() {
symbols.push(make_symbol(name, "yaml_key", i, i + 1, lines));
}
}
}
symbols
}
fn list_symbols_markdown(&self, lines: &[&str]) -> Vec<Symbol> {
let mut symbols = Vec::new();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with('#') {
let name = trimmed.trim_start_matches('#').trim().to_string();
let end = lines[i + 1..]
.iter()
.position(|l| l.trim().starts_with('#'))
.map(|p| i + 1 + p)
.unwrap_or(lines.len());
symbols.push(make_symbol(name, "heading", i, end, lines));
}
}
symbols
}
fn list_symbols_code_indent(&self, lines: &[&str]) -> Vec<Symbol> {
let mut symbols = Vec::new();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let indent = line.len() - line.trim_start().len();
if indent <= 8 && contains_chinese(trimmed) {
if let Some(eq_pos) = trimmed.find('=') {
let var_name = trimmed[..eq_pos].trim();
if contains_chinese(var_name) && !var_name.contains(' ') {
symbols.push(make_symbol(
var_name.to_string(),
"chinese_variable",
i,
i + 1,
lines,
));
}
}
}
}
let mut i = 0;
while i < lines.len() {
let line = lines[i];
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with('#') {
i += 1;
continue;
}
let indent = line.len() - line.trim_start().len();
if indent == 0 && !trimmed.starts_with('}') && !trimmed.starts_with(')') {
let is_def = trimmed.starts_with("fn ")
|| trimmed.starts_with("pub ")
|| trimmed.starts_with("def ")
|| trimmed.starts_with("class ")
|| trimmed.starts_with("function ")
|| trimmed.starts_with("func ")
|| trimmed.starts_with("type ")
|| trimmed.starts_with("struct ")
|| trimmed.starts_with("enum ")
|| trimmed.starts_with("interface ")
|| trimmed.starts_with("impl ")
|| trimmed.starts_with("trait ")
|| trimmed.starts_with("const ")
|| trimmed.starts_with("export ")
|| trimmed.starts_with("async ")
|| trimmed.starts_with("public ")
|| trimmed.starts_with("private ")
|| trimmed.starts_with("protected ");
if is_def {
let start = i;
let mut end = i + 1;
while end < lines.len() {
let next = lines[end];
let next_trimmed = next.trim();
if next_trimmed.is_empty() {
end += 1;
continue;
}
let next_indent = next.len() - next.trim_start().len();
if next_indent == 0 && !next_trimmed.starts_with('}') {
break;
}
end += 1;
}
if end < lines.len() && lines[end].trim() == "}" {
end += 1;
}
let name = extract_indent_name(trimmed);
symbols.push(make_symbol(name, "indent_block", start, end, lines));
i = end;
continue;
}
}
i += 1;
}
symbols
}
fn skeleton_indent(&self, source: &str, path: &Path) -> String {
let symbols = self.list_symbols_indent(source, path);
let lines: Vec<&str> = source.lines().collect();
let mut out = String::new();
for sym in &symbols {
if sym.start_line <= lines.len() {
let sig = lines[sym.start_line - 1];
let body_lines = sym.end_line - sym.start_line + 1;
out.push_str(&format!(
"{:4}| {} // L{}-{} ({} lines)\n",
sym.start_line,
sig.trim_end(),
sym.start_line,
sym.end_line,
body_lines
));
}
}
out
}
}
#[derive(Debug, Clone)]
pub struct SymbolSlice {
pub name: String,
pub kind: String,
pub start_line: usize,
pub end_line: usize,
pub start_byte: usize,
pub end_byte: usize,
pub text: String,
}
fn make_symbol(name: String, kind: &str, start: usize, end: usize, lines: &[&str]) -> Symbol {
let start_byte = lines[..start].iter().map(|l| l.len() + 1).sum::<usize>();
let end_byte = lines[..end].iter().map(|l| l.len() + 1).sum::<usize>();
Symbol {
name,
start_line: start + 1,
end_line: end,
start_byte,
end_byte,
kind: kind.to_string(),
}
}
fn find_block_end(lines: &[&str], start: usize) -> usize {
let mut depth = 0i32;
for i in start..lines.len() {
for ch in lines[i].chars() {
if ch == '{' {
depth += 1;
}
if ch == '}' {
depth -= 1;
}
}
if depth <= 0 && i > start {
return i + 1;
}
}
(start + 1).min(lines.len())
}
fn extract_indent_name(line: &str) -> String {
let tokens: Vec<&str> = line.split_whitespace().collect();
for (i, tok) in tokens.iter().enumerate() {
if i == 0 {
continue;
}
let clean = tok
.trim_start_matches('*')
.trim_end_matches(|c: char| "({:<".contains(c));
if !clean.is_empty()
&& clean
.chars()
.next()
.map_or(false, |c| c.is_alphabetic() || c == '_')
{
return clean.to_string();
}
}
tokens.first().unwrap_or(&"unknown").to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn test_language_detection() {
assert_eq!(
LanguageRegistry::detect(Path::new("foo.rs")),
Some(Lang::Rust)
);
assert_eq!(
LanguageRegistry::detect(Path::new("bar.py")),
Some(Lang::Python)
);
assert_eq!(
LanguageRegistry::detect(Path::new("baz.js")),
Some(Lang::JavaScript)
);
assert_eq!(
LanguageRegistry::detect(Path::new("qux.ts")),
Some(Lang::TypeScript)
);
assert_eq!(
LanguageRegistry::detect(Path::new("main.go")),
Some(Lang::Go)
);
assert_eq!(
LanguageRegistry::detect(Path::new("App.java")),
Some(Lang::Java)
);
assert_eq!(LanguageRegistry::detect(Path::new("main.c")), Some(Lang::C));
assert_eq!(
LanguageRegistry::detect(Path::new("main.cpp")),
Some(Lang::Cpp)
);
assert_eq!(
LanguageRegistry::detect(Path::new("Program.cs")),
Some(Lang::CSharp)
);
assert_eq!(
LanguageRegistry::detect(Path::new("index.php")),
Some(Lang::Php)
);
assert_eq!(LanguageRegistry::detect(Path::new("readme.md")), None);
}
#[test]
fn test_list_symbols_rust() {
let mut searcher = SemanticSearcher::new();
let source = r#"
pub fn hello() {
println!("hello");
}
pub struct Point {
x: f64,
y: f64,
}
impl Point {
pub fn new(x: f64, y: f64) -> Self {
Self { x, y }
}
}
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".rs").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"hello"), "symbols: {:?}", names);
assert!(names.contains(&"Point"), "symbols: {:?}", names);
}
#[test]
fn test_extract_symbol_rust() {
let mut searcher = SemanticSearcher::new();
let source = r#"pub fn add(a: i32, b: i32) -> i32 {
a + b
}
pub fn sub(a: i32, b: i32) -> i32 {
a - b
}
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".rs").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let slice = searcher.extract_symbol(tmp.path(), "add").unwrap();
assert!(slice.text.contains("a + b"), "text: {}", slice.text);
assert!(!slice.text.contains("a - b"), "should not contain sub");
}
#[test]
fn test_skeleton_rust() {
let mut searcher = SemanticSearcher::new();
let source = r#"use std::io;
pub fn hello() {
println!("hello");
}
pub fn world() {
println!("world");
}
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".rs").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let skel = searcher.skeleton(tmp.path()).unwrap();
assert!(skel.contains("hello"), "skeleton: {}", skel);
assert!(skel.contains("world"), "skeleton: {}", skel);
assert!(skel.contains("use std::io"), "skeleton: {}", skel);
}
#[test]
fn test_list_symbols_python() {
let mut searcher = SemanticSearcher::new();
let source = r#"
def greet(name):
print(f"hello {name}")
class Calculator:
def add(self, a, b):
return a + b
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".py").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"greet"), "symbols: {:?}", names);
assert!(names.contains(&"Calculator"), "symbols: {:?}", names);
}
#[test]
fn test_list_symbols_csharp() {
let mut searcher = SemanticSearcher::new();
let source = r#"
class Program {
Program() {}
public static void Main(string[] args) {
}
}
interface IGreeter {
void Greet();
}
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".cs").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"Program"), "symbols: {:?}", names);
assert!(names.contains(&"Main"), "symbols: {:?}", names);
assert!(names.contains(&"IGreeter"), "symbols: {:?}", names);
}
#[test]
fn test_list_symbols_php() {
let mut searcher = SemanticSearcher::new();
let source = r#"
<?php
class Calculator {
public function add($a, $b) {
return $a + $b;
}
}
function greet($name) {
return "Hello, $name";
}
interface Printable {
public function print();
}
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".php").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"Calculator"), "php: {:?}", names);
assert!(names.contains(&"add"), "php: {:?}", names);
assert!(names.contains(&"greet"), "php: {:?}", names);
assert!(names.contains(&"Printable"), "php: {:?}", names);
}
#[test]
fn test_indent_fallback() {
let mut searcher = SemanticSearcher::new();
let source = r#"
def hello():
print("hello")
def world():
print("world")
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(
names.contains(&"hello()"),
"indent fallback symbols: {:?}",
names
);
}
#[test]
fn test_chinese_character_detection() {
assert!(is_chinese('中'));
assert!(is_chinese('文'));
assert!(!is_chinese('a'));
assert!(!is_chinese('1'));
assert!(!is_chinese('_'));
}
#[test]
fn test_contains_chinese() {
assert!(contains_chinese("用户名"));
assert!(contains_chinese("hello世界"));
assert!(!contains_chinese("hello"));
assert!(!contains_chinese("123"));
}
#[test]
fn test_pinyin_identifier_detection() {
assert!(is_pinyin_identifier("yonghuMing"));
assert!(is_pinyin_identifier("dingdanList"));
assert!(is_pinyin_identifier("zhongguoRen"));
assert!(is_pinyin_identifier("wenjianMuLu"));
assert!(!is_pinyin_identifier("hello"));
assert!(!is_pinyin_identifier("getUser"));
assert!(!is_pinyin_identifier(""));
assert!(!is_pinyin_identifier("123"));
}
#[test]
fn test_symbol_chinese_detection() {
let sym = Symbol {
name: "用户名".to_string(),
start_line: 1,
end_line: 1,
start_byte: 0,
end_byte: 9,
kind: "variable".to_string(),
};
assert!(sym.is_chinese());
assert!(!sym.is_pinyin());
assert!(sym.is_chinese_related());
let sym_pinyin = Symbol {
name: "yonghuMing".to_string(),
start_line: 1,
end_line: 1,
start_byte: 0,
end_byte: 10,
kind: "variable".to_string(),
};
assert!(!sym_pinyin.is_chinese());
assert!(sym_pinyin.is_pinyin());
assert!(sym_pinyin.is_chinese_related());
let sym_english = Symbol {
name: "getUser".to_string(),
start_line: 1,
end_line: 1,
start_byte: 0,
end_byte: 7,
kind: "function".to_string(),
};
assert!(!sym_english.is_chinese());
assert!(!sym_english.is_pinyin());
assert!(!sym_english.is_chinese_related());
}
#[test]
fn test_chinese_variable_extraction() {
let mut searcher = SemanticSearcher::new();
let source = r#"用户名 = "张三"
年龄 = 25
def get_user():
return 用户名
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"用户名"), "symbols: {:?}", names);
}
#[test]
fn test_mixed_chinese_english_detection() {
assert!(contains_chinese("getUser用户名"));
assert!(contains_chinese("query_订单列表"));
assert!(contains_chinese("test数据"));
assert!(contains_chinese("order详情"));
let sym_mixed1 = Symbol {
name: "getUser用户名".to_string(),
start_line: 1,
end_line: 1,
start_byte: 0,
end_byte: 0,
kind: "variable".to_string(),
};
assert!(sym_mixed1.is_chinese_related());
let sym_mixed2 = Symbol {
name: "query_订单列表".to_string(),
start_line: 1,
end_line: 1,
start_byte: 0,
end_byte: 0,
kind: "variable".to_string(),
};
assert!(sym_mixed2.is_chinese_related());
assert!(!contains_chinese("getUser"));
assert!(!contains_chinese("queryOrderList"));
}
#[test]
fn test_mixed_content_extraction() {
let mut searcher = SemanticSearcher::new();
let source = r#"getUser用户名 = "张三"
query_订单列表 = []
test数据 = 42
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"getUser用户名"), "symbols: {:?}", names);
assert!(names.contains(&"query_订单列表"), "symbols: {:?}", names);
assert!(names.contains(&"test数据"), "symbols: {:?}", names);
}
#[test]
fn test_chinese_variable_nested_indent() {
let mut searcher = SemanticSearcher::new();
let source = r#"def process():
用户名 = "张三"
订单列表 = []
if True:
配置项 = "value"
"#;
let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
tmp.write_all(source.as_bytes()).unwrap();
let symbols = searcher.list_symbols(tmp.path()).unwrap();
let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"用户名"), "nested symbols: {:?}", names);
assert!(names.contains(&"订单列表"), "nested symbols: {:?}", names);
assert!(names.contains(&"配置项"), "nested symbols: {:?}", names);
}
}