use std::{
borrow::Cow,
fs::File,
io::{self, Read},
path::{Path, PathBuf},
sync::{Arc, Mutex},
};
use globset::GlobSet;
use grep_matcher::Matcher;
use grep_regex::RegexMatcherBuilder;
use grep_searcher::{
BinaryDetection, Searcher, SearcherBuilder, Sink, SinkContext, SinkContextKind, SinkMatch,
};
use ignore::{ParallelVisitor, ParallelVisitorBuilder, WalkState};
use napi::{
JsString,
bindgen_prelude::*,
threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
};
use napi_derive::napi;
use rayon::prelude::*;
use smallvec::SmallVec;
use crate::{fs_cache, glob_util, task};
const MAX_FILE_BYTES: u64 = 4 * 1024 * 1024;
const SMALL_FILE_READ_BYTES: u64 = 128 * 1024;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[napi(string_enum)]
pub enum GrepOutputMode {
#[napi(value = "content")]
Content,
#[napi(value = "count")]
Count,
#[napi(value = "filesWithMatches")]
FilesWithMatches,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum OutputMode {
Content,
Count,
FilesWithMatches,
}
#[napi(object)]
pub struct SearchOptions {
pub pattern: String,
pub ignore_case: Option<bool>,
pub multiline: Option<bool>,
pub max_count: Option<u32>,
pub offset: Option<u32>,
pub context_before: Option<u32>,
pub context_after: Option<u32>,
pub context: Option<u32>,
pub max_columns: Option<u32>,
pub mode: Option<GrepOutputMode>,
}
#[napi(object)]
pub struct GrepOptions<'env> {
pub pattern: String,
pub path: String,
pub glob: Option<String>,
pub r#type: Option<String>,
pub ignore_case: Option<bool>,
pub multiline: Option<bool>,
pub hidden: Option<bool>,
pub gitignore: Option<bool>,
pub cache: Option<bool>,
pub max_count: Option<u32>,
pub offset: Option<u32>,
pub context_before: Option<u32>,
pub context_after: Option<u32>,
pub context: Option<u32>,
pub max_columns: Option<u32>,
pub mode: Option<GrepOutputMode>,
pub signal: Option<Unknown<'env>>,
pub timeout_ms: Option<u32>,
}
#[derive(Clone)]
#[napi(object)]
pub struct ContextLine {
pub line_number: u32,
pub line: String,
}
#[napi(object)]
pub struct Match {
pub line_number: u32,
pub line: String,
pub context_before: Option<Vec<ContextLine>>,
pub context_after: Option<Vec<ContextLine>>,
pub truncated: Option<bool>,
}
#[napi(object)]
pub struct SearchResult {
pub matches: Vec<Match>,
pub match_count: u32,
pub limit_reached: bool,
pub error: Option<String>,
}
#[derive(Clone)]
#[napi(object)]
pub struct GrepMatch {
pub path: String,
pub line_number: u32,
pub line: String,
pub context_before: Option<Vec<ContextLine>>,
pub context_after: Option<Vec<ContextLine>>,
pub truncated: Option<bool>,
pub match_count: Option<u32>,
}
#[napi(object)]
pub struct GrepResult {
pub matches: Vec<GrepMatch>,
pub total_matches: u32,
pub files_with_matches: u32,
pub files_searched: u32,
pub limit_reached: Option<bool>,
}
enum TypeFilter {
Known { exts: &'static [&'static str], names: &'static [&'static str] },
Custom(String),
}
impl TypeFilter {
fn match_ext(&self, ext: &str) -> bool {
match self {
Self::Known { exts, .. } => exts.iter().any(|e| ext.eq_ignore_ascii_case(e)),
Self::Custom(custom_ext) => ext.eq_ignore_ascii_case(custom_ext),
}
}
fn match_name(&self, name: &str) -> bool {
match self {
Self::Known { names, .. } => names.iter().any(|n| name.eq_ignore_ascii_case(n)),
Self::Custom(ext) => ext.eq_ignore_ascii_case(name),
}
}
}
struct MatchCollector {
matches: Vec<CollectedMatch>,
match_count: u64,
collected_count: u64,
max_count: Option<u64>,
offset: u64,
skipped: u64,
limit_reached: bool,
max_columns: Option<usize>,
collect_matches: bool,
context_before: SmallVec<[ContextLine; 8]>,
}
struct CollectedMatch {
line_number: u64,
line: String,
context_before: SmallVec<[ContextLine; 8]>,
context_after: SmallVec<[ContextLine; 8]>,
truncated: bool,
}
struct SearchResultInternal {
matches: Vec<CollectedMatch>,
match_count: u64,
collected: u64,
limit_reached: bool,
}
struct FileEntry {
path: PathBuf,
relative_path: String,
}
struct FileSearchResult {
relative_path: String,
matches: Vec<CollectedMatch>,
match_count: u64,
limit_reached: bool,
}
enum FileBytes {
Mapped(memmap2::Mmap),
Owned(Vec<u8>),
}
impl FileBytes {
fn as_slice(&self) -> &[u8] {
match self {
Self::Mapped(mapped) => mapped.as_ref(),
Self::Owned(bytes) => bytes.as_slice(),
}
}
}
impl MatchCollector {
fn new(
max_count: Option<u64>,
offset: u64,
max_columns: Option<usize>,
collect_matches: bool,
) -> Self {
Self {
matches: Vec::new(),
match_count: 0,
collected_count: 0,
max_count,
offset,
skipped: 0,
limit_reached: false,
max_columns,
collect_matches,
context_before: SmallVec::new(),
}
}
}
fn truncate_line(line: String, max_columns: Option<usize>) -> (String, bool) {
match max_columns {
Some(max) if line.len() > max => {
let cut = max.saturating_sub(3);
let boundary = line.floor_char_boundary(cut);
(format!("{}...", &line[..boundary]), true)
},
_ => (line, false),
}
}
fn bytes_to_trimmed_string(bytes: &[u8]) -> String {
match std::str::from_utf8(bytes) {
Ok(text) => text.trim_end().to_string(),
Err(_) => String::from_utf8_lossy(bytes).trim_end().to_string(),
}
}
impl Sink for MatchCollector {
type Error = io::Error;
fn matched(
&mut self,
_searcher: &Searcher,
mat: &SinkMatch<'_>,
) -> std::result::Result<bool, Self::Error> {
self.match_count += 1;
if self.limit_reached {
return Ok(false);
}
if self.skipped < self.offset {
self.skipped += 1;
self.context_before.clear();
return Ok(true);
}
if self.collect_matches {
let raw_line = bytes_to_trimmed_string(mat.bytes());
let (line, truncated) = truncate_line(raw_line, self.max_columns);
let line_number = mat.line_number().unwrap_or(0);
self.matches.push(CollectedMatch {
line_number,
line,
context_before: std::mem::take(&mut self.context_before),
context_after: SmallVec::new(),
truncated,
});
} else {
self.context_before.clear();
}
self.collected_count += 1;
if let Some(max) = self.max_count
&& self.collected_count >= max
{
self.limit_reached = true;
}
Ok(true)
}
fn context(
&mut self,
_searcher: &Searcher,
ctx: &SinkContext<'_>,
) -> std::result::Result<bool, Self::Error> {
if !self.collect_matches {
return Ok(true);
}
let raw_line = bytes_to_trimmed_string(ctx.bytes());
let (line, _) = truncate_line(raw_line, self.max_columns);
let line_number = ctx.line_number().unwrap_or(0);
match ctx.kind() {
SinkContextKind::Before => {
self
.context_before
.push(ContextLine { line_number: crate::utils::clamp_u32(line_number), line });
},
SinkContextKind::After => {
if let Some(last_match) = self.matches.last_mut() {
last_match
.context_after
.push(ContextLine { line_number: crate::utils::clamp_u32(line_number), line });
}
},
SinkContextKind::Other => {},
}
Ok(true)
}
}
const fn parse_output_mode(mode: Option<GrepOutputMode>) -> OutputMode {
match mode {
None | Some(GrepOutputMode::Content) => OutputMode::Content,
Some(GrepOutputMode::Count) => OutputMode::Count,
Some(GrepOutputMode::FilesWithMatches) => OutputMode::FilesWithMatches,
}
}
fn resolve_search_path(path: &str) -> Result<PathBuf> {
let candidate = PathBuf::from(path);
if candidate.is_absolute() {
return Ok(candidate);
}
let cwd = std::env::current_dir()
.map_err(|err| Error::from_reason(format!("Failed to resolve cwd: {err}")))?;
Ok(cwd.join(candidate))
}
fn resolve_type_filter(type_name: Option<&str>) -> Option<TypeFilter> {
let normalized = type_name
.map(str::trim)
.filter(|value| !value.is_empty())
.map(|value| value.trim_start_matches('.').to_lowercase())?;
let (exts, names): (&[&str], &[&str]) = match normalized.as_str() {
"js" | "javascript" => (&["js", "jsx", "mjs", "cjs"], &[]),
"ts" | "typescript" => (&["ts", "tsx", "mts", "cts"], &[]),
"json" => (&["json", "jsonc", "json5"], &[]),
"yaml" | "yml" => (&["yaml", "yml"], &[]),
"toml" => (&["toml"], &[]),
"md" | "markdown" => (&["md", "markdown", "mdx"], &[]),
"py" | "python" => (&["py", "pyi"], &[]),
"rs" | "rust" => (&["rs"], &[]),
"go" => (&["go"], &[]),
"java" => (&["java"], &[]),
"kt" | "kotlin" => (&["kt", "kts"], &[]),
"c" => (&["c", "h"], &[]),
"cpp" | "cxx" => (&["cpp", "cc", "cxx", "hpp", "hxx", "hh"], &[]),
"cs" | "csharp" => (&["cs", "csx"], &[]),
"php" => (&["php", "phtml"], &[]),
"rb" | "ruby" => (&["rb", "rake", "gemspec"], &[]),
"sh" | "bash" => (&["sh", "bash", "zsh"], &[]),
"zsh" => (&["zsh"], &[]),
"fish" => (&["fish"], &[]),
"html" => (&["html", "htm"], &[]),
"css" => (&["css"], &[]),
"scss" => (&["scss"], &[]),
"sass" => (&["sass"], &[]),
"less" => (&["less"], &[]),
"xml" => (&["xml"], &[]),
"docker" | "dockerfile" => (&[], &["dockerfile"]),
"make" | "makefile" => (&[], &["makefile"]),
_ => {
return Some(TypeFilter::Custom(normalized));
},
};
Some(TypeFilter::Known { exts, names })
}
fn matches_type_filter(path: &Path, filter: &TypeFilter) -> bool {
let base_name = path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or("");
if filter.match_name(base_name) {
return true;
}
let ext = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
if ext.is_empty() {
return false;
}
filter.match_ext(ext)
}
fn resolve_context(
context: Option<u32>,
context_before: Option<u32>,
context_after: Option<u32>,
) -> (u32, u32) {
if context_before.is_some() || context_after.is_some() {
(context_before.unwrap_or(0), context_after.unwrap_or(0))
} else {
let value = context.unwrap_or(0);
(value, value)
}
}
#[derive(Clone, Copy)]
struct SearchParams {
context_before: u32,
context_after: u32,
max_columns: Option<u32>,
mode: OutputMode,
max_count: Option<u64>,
offset: u64,
}
fn run_search(
matcher: &grep_regex::RegexMatcher,
content: &[u8],
params: SearchParams,
) -> io::Result<SearchResultInternal> {
run_search_slice(&mut build_searcher_for_params(params), matcher, content, params)
}
fn run_search_slice(
searcher: &mut Searcher,
matcher: &grep_regex::RegexMatcher,
content: &[u8],
params: SearchParams,
) -> io::Result<SearchResultInternal> {
let mut collector = MatchCollector::new(
params.max_count,
params.offset,
params.max_columns.map(|v| v as usize),
params.mode == OutputMode::Content,
);
searcher.search_slice(matcher, content, &mut collector)?;
Ok(SearchResultInternal {
matches: collector.matches,
match_count: collector.match_count,
collected: collector.collected_count,
limit_reached: collector.limit_reached,
})
}
fn build_searcher_for_params(params: SearchParams) -> Searcher {
build_searcher(
if params.mode == OutputMode::Content {
params.context_before
} else {
0
},
if params.mode == OutputMode::Content {
params.context_after
} else {
0
},
)
}
fn build_searcher(context_before: u32, context_after: u32) -> Searcher {
SearcherBuilder::new()
.binary_detection(BinaryDetection::quit(b'\x00'))
.line_number(true)
.before_context(context_before as usize)
.after_context(context_after as usize)
.build()
}
fn read_file_bytes(path: &Path) -> io::Result<Option<FileBytes>> {
let file = match File::open(path) {
Ok(file) => file,
Err(err)
if matches!(err.kind(), io::ErrorKind::NotFound | io::ErrorKind::PermissionDenied) =>
{
return Ok(None);
},
Err(err) => return Err(err),
};
let metadata = file.metadata()?;
if !metadata.is_file() {
return Ok(None);
}
let size = metadata.len();
if size > MAX_FILE_BYTES {
return Ok(None);
} else if size == 0 {
return Ok(Some(FileBytes::Owned(Vec::new())));
}
if size <= SMALL_FILE_READ_BYTES {
let mut buffer = Vec::with_capacity(size as usize);
let mut handle = file;
handle.read_to_end(&mut buffer)?;
return Ok(Some(FileBytes::Owned(buffer)));
}
let mapping = unsafe {
memmap2::Mmap::map(&file)
};
let bytes = if let Ok(mapped) = mapping {
FileBytes::Mapped(mapped)
} else {
let mut buffer = Vec::with_capacity(size as usize);
let mut handle = file;
handle.read_to_end(&mut buffer)?;
FileBytes::Owned(buffer)
};
Ok(Some(bytes))
}
fn to_public_match(matched: CollectedMatch) -> Match {
let context_before = if matched.context_before.is_empty() {
None
} else {
Some(matched.context_before.into_vec())
};
let context_after = if matched.context_after.is_empty() {
None
} else {
Some(matched.context_after.into_vec())
};
Match {
line_number: crate::utils::clamp_u32(matched.line_number),
line: matched.line,
context_before,
context_after,
truncated: if matched.truncated { Some(true) } else { None },
}
}
fn to_grep_match(path: String, matched: CollectedMatch) -> GrepMatch {
let context_before = if matched.context_before.is_empty() {
None
} else {
Some(matched.context_before.into_vec())
};
let context_after = if matched.context_after.is_empty() {
None
} else {
Some(matched.context_after.into_vec())
};
GrepMatch {
path,
line_number: crate::utils::clamp_u32(matched.line_number),
line: matched.line,
context_before,
context_after,
truncated: if matched.truncated { Some(true) } else { None },
match_count: None,
}
}
fn push_content_matches(
matches: &mut Vec<GrepMatch>,
path: String,
collected_matches: Vec<CollectedMatch>,
) {
let last_index = collected_matches.len().saturating_sub(1);
let mut path = Some(path);
for (index, matched) in collected_matches.into_iter().enumerate() {
let match_path = if index == last_index {
path.take().expect("path is available for final match")
} else {
path
.as_ref()
.expect("path is available for cloned matches")
.clone()
};
matches.push(to_grep_match(match_path, matched));
}
}
const fn empty_search_result(error: Option<String>) -> SearchResult {
SearchResult { matches: Vec::new(), match_count: 0, limit_reached: false, error }
}
struct GrepConfig {
pattern: String,
path: String,
glob: Option<String>,
type_filter: Option<String>,
ignore_case: Option<bool>,
multiline: Option<bool>,
hidden: Option<bool>,
gitignore: Option<bool>,
cache: Option<bool>,
max_count: Option<u32>,
offset: Option<u32>,
context_before: Option<u32>,
context_after: Option<u32>,
context: Option<u32>,
max_columns: Option<u32>,
mode: Option<GrepOutputMode>,
}
fn collect_files(
root: &Path,
scanned_entries: &[fs_cache::GlobMatch],
glob_set: Option<&GlobSet>,
type_filter: Option<&TypeFilter>,
) -> Vec<FileEntry> {
let mut entries = Vec::new();
for entry in scanned_entries {
if entry.file_type != fs_cache::FileType::File {
continue;
}
if let Some(glob_set) = glob_set
&& !glob_set.is_match(Path::new(&entry.path))
{
continue;
}
let path = root.join(&entry.path);
if let Some(filter) = type_filter
&& !matches_type_filter(&path, filter)
{
continue;
}
entries.push(FileEntry { path, relative_path: entry.path.clone() });
}
entries
}
fn find_valid_repetition(bytes: &[u8], start: usize) -> Option<usize> {
let len = bytes.len();
let mut i = start + 1;
if i >= len || !bytes[i].is_ascii_digit() {
return None;
}
while i < len && bytes[i].is_ascii_digit() {
i += 1;
}
if i >= len {
return None;
}
if bytes[i] == b'}' {
return Some(i);
}
if bytes[i] != b',' {
return None;
}
i += 1;
if i >= len {
return None;
}
while i < len && bytes[i].is_ascii_digit() {
i += 1;
}
if i < len && bytes[i] == b'}' {
return Some(i);
}
None
}
fn find_braced_escape_end(bytes: &[u8], start: usize) -> Option<usize> {
let mut i = start + 1;
while i < bytes.len() {
if bytes[i] == b'}' {
return Some(i);
}
i += 1;
}
None
}
fn sanitize_braces(pattern: &str) -> Cow<'_, str> {
let bytes = pattern.as_bytes();
if !bytes.contains(&b'{') && !bytes.contains(&b'}') {
return Cow::Borrowed(pattern);
}
let len = bytes.len();
let mut result = String::with_capacity(len + 8);
let mut modified = false;
let mut i = 0;
while i < len {
if bytes[i] == b'\\' && i + 1 < len {
result.push('\\');
i += 1;
// The next character is the escaped literal; push it regardless.
// Safety: index is in bounds (checked above).
let ch = pattern[i..]
.chars()
.next()
.expect("non-empty slice has a char");
result.push(ch);
i += ch.len_utf8();
if matches!(ch, 'p' | 'P' | 'x' | 'u') && i < len && bytes[i] == b'{' {
if let Some(end) = find_braced_escape_end(bytes, i) {
result.push_str(&pattern[i..=end]);
i = end + 1;
} else {
result.push_str(&pattern[i..]);
i = len;
}
}
continue;
}
if bytes[i] == b'{' {
if let Some(end) = find_valid_repetition(bytes, i) {
result.push_str(&pattern[i..=end]);
i = end + 1;
continue;
}
result.push_str("\\{");
i += 1;
modified = true;
continue;
}
if bytes[i] == b'}' {
result.push_str("\\}");
i += 1;
modified = true;
continue;
}
let ch = pattern[i..]
.chars()
.next()
.expect("non-empty slice has a char");
result.push(ch);
i += ch.len_utf8();
}
if modified {
Cow::Owned(result)
} else {
Cow::Borrowed(pattern)
}
}
/// Escape unescaped parentheses after a group-syntax regex error.
///
/// Search patterns like `fetchAnthropicProvider(` are common literal snippets,
/// but the regex engine parses the trailing `(` as the start of a capture
/// group. When the parser already reported invalid group syntax, escaping any
/// remaining literal parentheses preserves useful search behavior without
/// changing valid regexes.
fn escape_unescaped_parentheses(pattern: &str) -> Cow<'_, str> {
let bytes = pattern.as_bytes();
if !bytes.contains(&b'(') && !bytes.contains(&b')') {
return Cow::Borrowed(pattern);
}
let mut result = String::with_capacity(pattern.len() + 4);
let mut modified = false;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
result.push('\\');
i += 1;
let ch = pattern[i..]
.chars()
.next()
.expect("non-empty slice has a char");
result.push(ch);
i += ch.len_utf8();
continue;
}
let ch = pattern[i..]
.chars()
.next()
.expect("non-empty slice has a char");
if matches!(ch, '(' | ')') {
result.push('\\');
modified = true;
}
result.push(ch);
i += ch.len_utf8();
}
if modified {
Cow::Owned(result)
} else {
Cow::Borrowed(pattern)
}
}
fn build_regex_matcher(
pattern: &str,
ignore_case: bool,
multiline: bool,
) -> std::result::Result<grep_regex::RegexMatcher, grep_regex::Error> {
RegexMatcherBuilder::new()
.case_insensitive(ignore_case)
.multi_line(multiline)
.build(pattern)
}
#[cfg(test)]
mod tests {
#[cfg(unix)]
use std::{ffi::CString, os::unix::ffi::OsStrExt};
use std::{
fs,
path::{Path, PathBuf},
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime, UNIX_EPOCH},
};
use super::{
GrepConfig, GrepOutputMode, escape_unescaped_parentheses, grep_sync, sanitize_braces,
};
use crate::task;
struct TempDirGuard(PathBuf);
impl TempDirGuard {
fn new() -> Self {
static COUNTER: AtomicU64 = AtomicU64::new(0);
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system time is after UNIX_EPOCH")
.as_nanos();
let seq = COUNTER.fetch_add(1, Ordering::Relaxed);
let pid = std::process::id();
let path = std::env::temp_dir().join(format!("pi-grep-test-{pid}-{nanos}-{seq}"));
fs::create_dir_all(&path).expect("create temp test directory");
Self(path)
}
fn path(&self) -> &Path {
&self.0
}
}
impl Drop for TempDirGuard {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.0);
}
}
fn write_file(path: &Path, content: &str) {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).expect("create parent directories for test file");
}
fs::write(path, content).expect("write test file");
}
#[cfg(unix)]
fn make_fifo(path: &Path) {
let fifo_path =
CString::new(path.as_os_str().as_bytes()).expect("fifo path has no NUL bytes");
// SAFETY: `fifo_path` is a valid CString (NUL-terminated, no interior NULs),
// so `as_ptr()` yields a valid C string pointer. `0o600` is a valid mode.
// The CString is alive for the duration of the call.
let rc = unsafe { libc::mkfifo(fifo_path.as_ptr(), 0o600) };
assert_eq!(rc, 0, "create fifo: {}", std::io::Error::last_os_error());
}
#[cfg(unix)]
fn base_grep_config(path: &Path) -> GrepConfig {
GrepConfig {
pattern: "needle".to_string(),
path: path.to_string_lossy().into_owned(),
glob: None,
type_filter: None,
ignore_case: None,
multiline: None,
hidden: None,
gitignore: Some(false),
cache: Some(false),
max_count: None,
offset: None,
context_before: None,
context_after: None,
context: None,
max_columns: None,
mode: None,
}
}
#[test]
fn preserves_unicode_property_escapes() {
assert_eq!(sanitize_braces(r"\p{Greek}").as_ref(), r"\p{Greek}");
}
#[test]
fn preserves_hex_brace_escapes() {
assert_eq!(sanitize_braces(r"\x{41}").as_ref(), r"\x{41}");
}
#[test]
fn preserves_malformed_braced_escapes() {
assert_eq!(sanitize_braces(r"\p{Greek").as_ref(), r"\p{Greek");
}
#[test]
fn escapes_non_quantifier_braces() {
assert_eq!(sanitize_braces("${platform}").as_ref(), "$\\{platform\\}");
}
#[test]
fn preserves_valid_quantifiers() {
assert_eq!(sanitize_braces("a{2,4}").as_ref(), "a{2,4}");
}
#[test]
fn preserves_escaped_parentheses() {
assert_eq!(escape_unescaped_parentheses(r"foo\(bar\)").as_ref(), r"foo\(bar\)");
}
#[test]
fn escapes_literal_parentheses() {
assert_eq!(
escape_unescaped_parentheses("fetchAnthropicProvider(").as_ref(),
r"fetchAnthropicProvider\("
);
assert_eq!(
escape_unescaped_parentheses("fetchAnthropicProvider()").as_ref(),
r"fetchAnthropicProvider\(\)"
);
}
#[cfg(unix)]
#[test]
fn grep_directory_skips_fifo_entries() {
let root = TempDirGuard::new();
write_file(&root.path().join("regular.txt"), "needle\n");
make_fifo(&root.path().join("skip-me.fifo"));
let result = grep_sync(base_grep_config(root.path()), None, task::CancelToken::default())
.expect("directory grep should succeed");
assert_eq!(result.total_matches, 1);
assert_eq!(result.files_with_matches, 1);
assert_eq!(result.files_searched, 1);
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].path, "regular.txt");
}
#[cfg(unix)]
#[test]
fn grep_directory_applies_offset_and_limit_in_walker_order() {
let root = TempDirGuard::new();
write_file(&root.path().join("a.txt"), "needle a1\nneedle a2\n");
write_file(&root.path().join("b.txt"), "needle b1\n");
write_file(&root.path().join("c.txt"), "haystack\n");
let mut config = base_grep_config(root.path());
config.max_count = Some(2);
config.offset = Some(1);
let result = grep_sync(config, None, task::CancelToken::default())
.expect("directory grep should succeed");
assert_eq!(result.total_matches, 3);
assert_eq!(result.files_with_matches, 2);
assert_eq!(result.limit_reached, Some(true));
assert_eq!(result.matches.len(), 2);
assert_eq!(result.matches[0].path, "a.txt");
assert_eq!(result.matches[0].line, "needle a2");
assert_eq!(result.matches[1].path, "b.txt");
assert_eq!(result.matches[1].line, "needle b1");
}
#[cfg(unix)]
#[test]
fn grep_count_mode_limit_applies_to_matches_not_files() {
let root = TempDirGuard::new();
write_file(&root.path().join("a.txt"), "needle a1\nneedle a2\n");
write_file(&root.path().join("b.txt"), "needle b1\n");
let mut config = base_grep_config(root.path());
config.mode = Some(GrepOutputMode::Count);
config.max_count = Some(2);
let result = grep_sync(config, None, task::CancelToken::default())
.expect("directory grep should succeed");
assert_eq!(result.total_matches, 3);
assert_eq!(result.files_with_matches, 2);
assert_eq!(result.limit_reached, Some(true));
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].path, "a.txt");
assert_eq!(result.matches[0].match_count, Some(2));
}
#[cfg(unix)]
#[test]
fn grep_streaming_respects_pre_cancelled_token() {
let root = TempDirGuard::new();
write_file(&root.path().join("regular.txt"), "needle\n");
let ct = task::CancelToken::new(Some(0), None);
std::thread::sleep(Duration::from_millis(1));
let result = grep_sync(base_grep_config(root.path()), None, ct);
let Err(err) = result else {
panic!("pre-cancelled grep should fail before returning matches");
};
assert!(
err.to_string().contains("Timeout"),
"expected timeout cancellation error, got: {err}"
);
}
#[cfg(unix)]
#[test]
fn grep_special_root_path_returns_empty_result() {
let root = TempDirGuard::new();
let fifo = root.path().join("direct.fifo");
make_fifo(&fifo);
let result = grep_sync(base_grep_config(&fifo), None, task::CancelToken::default())
.expect("special-file grep should return an empty result");
assert!(result.matches.is_empty());
assert_eq!(result.total_matches, 0);
assert_eq!(result.files_with_matches, 0);
assert_eq!(result.files_searched, 0);
assert_eq!(result.limit_reached, None);
}
}
fn build_matcher(
pattern: &str,
ignore_case: bool,
multiline: bool,
) -> Result<grep_regex::RegexMatcher> {
let sanitized = sanitize_braces(pattern);
match build_regex_matcher(sanitized.as_ref(), ignore_case, multiline) {
Ok(matcher) => Ok(matcher),
Err(err) => {
let message = err.to_string();
if message.contains("unclosed group") || message.contains("unopened group") {
let escaped = escape_unescaped_parentheses(sanitized.as_ref());
if escaped.as_ref() != sanitized.as_ref() {
return build_regex_matcher(escaped.as_ref(), ignore_case, multiline)
.map_err(|retry_err| Error::from_reason(format!("Regex error: {retry_err}")));
}
}
Err(Error::from_reason(format!("Regex error: {message}")))
},
}
}
// ---------------------------------------------------------------------------
// File / directory search orchestration
// ---------------------------------------------------------------------------
fn per_file_params(params: SearchParams) -> SearchParams {
let file_limit = match params.mode {
OutputMode::Content => params
.max_count
.map(|max| max.saturating_add(params.offset)),
OutputMode::Count => None,
OutputMode::FilesWithMatches => Some(1),
};
SearchParams { max_count: file_limit, offset: 0, ..params }
}
fn run_parallel_search(
entries: &[FileEntry],
matcher: &grep_regex::RegexMatcher,
params: SearchParams,
) -> Vec<FileSearchResult> {
let file_params = per_file_params(params);
let raw: Vec<Option<FileSearchResult>> = entries
.par_iter()
.map_init(
|| build_searcher_for_params(file_params),
|searcher, entry| {
let bytes = read_file_bytes(&entry.path).ok()??;
let search = if file_params.mode == OutputMode::FilesWithMatches {
let matched = matcher.is_match(bytes.as_slice()).ok()?;
SearchResultInternal {
matches: Vec::new(),
match_count: u64::from(matched),
collected: u64::from(matched),
limit_reached: false,
}
} else {
run_search_slice(searcher, matcher, bytes.as_slice(), file_params).ok()?
};
Some(FileSearchResult {
relative_path: entry.relative_path.clone(),
matches: search.matches,
match_count: search.match_count,
limit_reached: search.limit_reached,
})
},
)
.collect();
raw.into_iter().flatten().collect()
}
struct StreamingGrepVisitor<'a> {
root: &'a Path,
matcher: &'a grep_regex::RegexMatcher,
glob_set: Option<&'a GlobSet>,
type_filter: Option<&'a TypeFilter>,
params: SearchParams,
searcher: Searcher,
results: Vec<FileSearchResult>,
shared_results: Arc<Mutex<Vec<Vec<FileSearchResult>>>>,
error: Arc<Mutex<Option<String>>>,
ct: &'a task::CancelToken,
visited: usize,
}
impl Drop for StreamingGrepVisitor<'_> {
fn drop(&mut self) {
if self.results.is_empty() {
return;
}
let results = std::mem::take(&mut self.results);
self
.shared_results
.lock()
.expect("grep result collection lock poisoned")
.push(results);
}
}
impl ParallelVisitor for StreamingGrepVisitor<'_> {
fn visit(&mut self, entry: std::result::Result<ignore::DirEntry, ignore::Error>) -> WalkState {
if self.visited == 0 || self.visited >= 128 {
self.visited = 0;
if let Err(err) = self.ct.heartbeat() {
*self.error.lock().expect("error lock poisoned") = Some(err.to_string());
return WalkState::Quit;
}
}
self.visited += 1;
let Ok(entry) = entry else {
return WalkState::Continue;
};
if !entry
.file_type()
.is_some_and(|file_type| file_type.is_file())
{
return WalkState::Continue;
}
let relative = fs_cache::normalize_relative_path(self.root, entry.path());
if relative.is_empty() {
return WalkState::Continue;
}
if let Some(glob_set) = self.glob_set
&& !glob_set.is_match(Path::new(relative.as_ref()))
{
return WalkState::Continue;
}
if let Some(filter) = self.type_filter
&& !matches_type_filter(entry.path(), filter)
{
return WalkState::Continue;
}
let Ok(Some(bytes)) = read_file_bytes(entry.path()) else {
return WalkState::Continue;
};
let search = if self.params.mode == OutputMode::FilesWithMatches {
let Ok(matched) = self.matcher.is_match(bytes.as_slice()) else {
return WalkState::Continue;
};
SearchResultInternal {
matches: Vec::new(),
match_count: u64::from(matched),
collected: u64::from(matched),
limit_reached: false,
}
} else {
let Ok(search) =
run_search_slice(&mut self.searcher, self.matcher, bytes.as_slice(), self.params)
else {
return WalkState::Continue;
};
search
};
self.results.push(FileSearchResult {
relative_path: relative.into_owned(),
matches: search.matches,
match_count: search.match_count,
limit_reached: search.limit_reached,
});
WalkState::Continue
}
}
struct StreamingGrepVisitorBuilder<'a> {
root: &'a Path,
matcher: &'a grep_regex::RegexMatcher,
glob_set: Option<&'a GlobSet>,
type_filter: Option<&'a TypeFilter>,
params: SearchParams,
shared_results: Arc<Mutex<Vec<Vec<FileSearchResult>>>>,
error: Arc<Mutex<Option<String>>>,
ct: &'a task::CancelToken,
}
impl<'a> ParallelVisitorBuilder<'a> for StreamingGrepVisitorBuilder<'a> {
fn build(&mut self) -> Box<dyn ParallelVisitor + 'a> {
Box::new(StreamingGrepVisitor {
root: self.root,
matcher: self.matcher,
glob_set: self.glob_set,
type_filter: self.type_filter,
params: self.params,
searcher: build_searcher_for_params(self.params),
results: Vec::new(),
shared_results: Arc::clone(&self.shared_results),
error: Arc::clone(&self.error),
ct: self.ct,
visited: 0,
})
}
}
fn run_streaming_grep(
search_path: &Path,
matcher: &grep_regex::RegexMatcher,
glob_set: Option<&GlobSet>,
type_filter: Option<&TypeFilter>,
params: SearchParams,
include_hidden: bool,
use_gitignore: bool,
skip_node_modules: bool,
ct: &task::CancelToken,
) -> Result<Vec<FileSearchResult>> {
let mut builder =
fs_cache::build_walker(search_path, include_hidden, use_gitignore, skip_node_modules, false);
let workers = fs_cache::grep_workers();
if workers > 0 {
builder.threads(workers);
}
let file_params = per_file_params(params);
let shared_results = Arc::new(Mutex::new(Vec::new()));
let error = Arc::new(Mutex::new(None));
let mut visitor_builder = StreamingGrepVisitorBuilder {
root: search_path,
matcher,
glob_set,
type_filter,
params: file_params,
shared_results: Arc::clone(&shared_results),
error: Arc::clone(&error),
ct,
};
ct.heartbeat()?;
builder.build_parallel().visit(&mut visitor_builder);
let walk_error = error.lock().expect("error lock poisoned").take();
if let Some(error) = walk_error {
return Err(Error::from_reason(error));
}
let mut results: Vec<FileSearchResult> = shared_results
.lock()
.expect("grep result collection lock poisoned")
.drain(..)
.flatten()
.collect();
results.sort_unstable_by(|a, b| a.relative_path.cmp(&b.relative_path));
Ok(results)
}
fn push_count_match(matches: &mut Vec<GrepMatch>, path: String, match_count: u64) {
matches.push(GrepMatch {
path,
line_number: 0,
line: String::new(),
context_before: None,
context_after: None,
truncated: None,
match_count: Some(crate::utils::clamp_u32(match_count)),
});
}
fn push_file_match(matches: &mut Vec<GrepMatch>, path: String) {
matches.push(GrepMatch {
path,
line_number: 0,
line: String::new(),
context_before: None,
context_after: None,
truncated: None,
match_count: None,
});
}
fn aggregate_parallel_results(
results: Vec<FileSearchResult>,
params: SearchParams,
) -> (Vec<GrepMatch>, u64, u32, u32, bool) {
let SearchParams { mode, max_count, offset, .. } = params;
let mut matches = Vec::new();
let mut total_matches = 0u64;
let mut files_with_matches = 0u32;
let files_searched = crate::utils::clamp_u32(results.len() as u64);
let mut skipped = 0u64;
let mut emitted = 0u64;
let mut limit_reached = false;
for result in results {
if result.match_count == 0 {
continue;
}
let file_match_start = total_matches;
let file_match_count = result.match_count;
files_with_matches = files_with_matches.saturating_add(1);
total_matches = total_matches.saturating_add(file_match_count);
match mode {
OutputMode::Content => {
let mut selected_matches = Vec::new();
for matched in result.matches {
if skipped < offset {
skipped += 1;
continue;
}
if let Some(max) = max_count
&& emitted >= max
{
limit_reached = true;
break;
}
selected_matches.push(matched);
emitted += 1;
}
if !selected_matches.is_empty() {
push_content_matches(&mut matches, result.relative_path, selected_matches);
}
if result.limit_reached && skipped >= offset {
limit_reached = true;
}
},
OutputMode::Count => {
let skipped_in_file = offset
.saturating_sub(file_match_start)
.min(file_match_count);
let available = file_match_count.saturating_sub(skipped_in_file);
if available == 0 {
continue;
}
if let Some(max) = max_count
&& emitted >= max
{
limit_reached = true;
continue;
}
let remaining = max_count.map_or(available, |max| max.saturating_sub(emitted));
if remaining == 0 {
limit_reached = true;
continue;
}
push_count_match(&mut matches, result.relative_path, result.match_count);
let selected = available.min(remaining);
emitted = emitted.saturating_add(selected);
if selected < available {
limit_reached = true;
}
},
OutputMode::FilesWithMatches => {
if skipped < offset {
skipped += 1;
continue;
}
if let Some(max) = max_count
&& emitted >= max
{
limit_reached = true;
continue;
}
push_file_match(&mut matches, result.relative_path);
emitted += 1;
},
}
}
if let Some(max) = max_count
&& emitted >= max
{
limit_reached = true;
}
if max_count == Some(0) {
limit_reached = files_with_matches > 0;
}
(matches, total_matches, files_with_matches, files_searched, limit_reached)
}
fn search_sync(content: &[u8], options: SearchOptions) -> SearchResult {
let ignore_case = options.ignore_case.unwrap_or(false);
let multiline = options.multiline.unwrap_or(false);
let mode = parse_output_mode(options.mode);
let matcher = match build_matcher(&options.pattern, ignore_case, multiline) {
Ok(matcher) => matcher,
Err(err) => return empty_search_result(Some(err.to_string())),
};
let (context_before, context_after) =
resolve_context(options.context, options.context_before, options.context_after);
let max_columns = options.max_columns;
let max_count = options.max_count.map(u64::from);
let offset = options.offset.unwrap_or(0) as u64;
let params =
SearchParams { context_before, context_after, max_columns, mode, max_count, offset };
let result = match run_search(&matcher, content, params) {
Ok(result) => result,
Err(err) => return empty_search_result(Some(err.to_string())),
};
SearchResult {
matches: result.matches.into_iter().map(to_public_match).collect(),
match_count: crate::utils::clamp_u32(result.match_count),
limit_reached: result.limit_reached,
error: None,
}
}
fn grep_sync(
options: GrepConfig,
on_match: Option<&ThreadsafeFunction<GrepMatch>>,
ct: task::CancelToken,
) -> Result<GrepResult> {
let search_path = resolve_search_path(&options.path)?;
let metadata = std::fs::metadata(&search_path)
.map_err(|err| Error::from_reason(format!("Path not found: {err}")))?;
let ignore_case = options.ignore_case.unwrap_or(false);
let multiline = options.multiline.unwrap_or(false);
let output_mode = parse_output_mode(options.mode);
let matcher = build_matcher(&options.pattern, ignore_case, multiline)?;
let (context_before, context_after) =
resolve_context(options.context, options.context_before, options.context_after);
let (context_before, context_after) = if output_mode == OutputMode::Content {
(context_before, context_after)
} else {
(0, 0)
};
let max_columns = options.max_columns;
let max_count = options.max_count.map(u64::from);
let offset = options.offset.unwrap_or(0) as u64;
let include_hidden = options.hidden.unwrap_or(true);
let use_gitignore = options.gitignore.unwrap_or(true);
let use_cache = options.cache.unwrap_or(false);
let glob_set = glob_util::try_compile_glob(options.glob.as_deref(), true)?;
let type_filter = resolve_type_filter(options.type_filter.as_deref());
let params = SearchParams {
context_before,
context_after,
max_columns,
mode: output_mode,
max_count,
offset,
};
if !metadata.is_file() && !metadata.is_dir() {
return Ok(GrepResult {
matches: Vec::new(),
total_matches: 0,
files_with_matches: 0,
files_searched: 0,
limit_reached: None,
});
}
if metadata.is_file() {
if let Some(filter) = type_filter.as_ref()
&& !matches_type_filter(&search_path, filter)
{
return Ok(GrepResult {
matches: Vec::new(),
total_matches: 0,
files_with_matches: 0,
files_searched: 0,
limit_reached: None,
});
}
let Ok(Some(bytes)) = read_file_bytes(&search_path) else {
return Ok(GrepResult {
matches: Vec::new(),
total_matches: 0,
files_with_matches: 0,
files_searched: 0,
limit_reached: None,
});
};
if output_mode == OutputMode::FilesWithMatches && max_count.is_none() && offset == 0 {
let matched = matcher
.is_match(bytes.as_slice())
.map_err(|err| Error::from_reason(format!("Search failed: {err}")))?;
if !matched {
return Ok(GrepResult {
matches: Vec::new(),
total_matches: 0,
files_with_matches: 0,
files_searched: 1,
limit_reached: None,
});
}
let path_string = search_path.to_string_lossy().into_owned();
return Ok(GrepResult {
matches: vec![GrepMatch {
path: path_string,
line_number: 0,
line: String::new(),
context_before: None,
context_after: None,
truncated: None,
match_count: None,
}],
total_matches: 1,
files_with_matches: 1,
files_searched: 1,
limit_reached: None,
});
}
let search = run_search(&matcher, bytes.as_slice(), params)
.map_err(|err| Error::from_reason(format!("Search failed: {err}")))?;
if search.match_count == 0 {
return Ok(GrepResult {
matches: Vec::new(),
total_matches: 0,
files_with_matches: 0,
files_searched: 1,
limit_reached: None,
});
}
let path_string = search_path.to_string_lossy().into_owned();
let mut matches = Vec::new();
match output_mode {
OutputMode::Content => {
push_content_matches(&mut matches, path_string, search.matches);
},
OutputMode::Count => {
matches.push(GrepMatch {
path: path_string,
line_number: 0,
line: String::new(),
context_before: None,
context_after: None,
truncated: None,
match_count: Some(crate::utils::clamp_u32(search.match_count)),
});
},
OutputMode::FilesWithMatches => {
matches.push(GrepMatch {
path: path_string,
line_number: 0,
line: String::new(),
context_before: None,
context_after: None,
truncated: None,
match_count: None,
});
},
}
let limit_reached =
search.limit_reached || max_count.is_some_and(|max| search.collected >= max);
return Ok(GrepResult {
matches,
total_matches: crate::utils::clamp_u32(search.match_count),
files_with_matches: 1,
files_searched: 1,
limit_reached: if limit_reached { Some(true) } else { None },
});
}
let mentions_node_modules = options
.glob
.as_deref()
.is_some_and(|g| g.contains("node_modules"));
let scan_options = fs_cache::ScanOptions {
include_hidden,
use_gitignore,
skip_node_modules: !mentions_node_modules,
follow_links: false,
detail: fs_cache::ScanDetail::Minimal,
};
let entries = if use_cache {
let scan = fs_cache::get_or_scan(&search_path, scan_options, &ct)?;
let mut entries =
collect_files(&search_path, &scan.entries, glob_set.as_ref(), type_filter.as_ref());
if entries.is_empty() && scan.cache_age_ms >= fs_cache::empty_recheck_ms() {
let fresh = fs_cache::force_rescan(&search_path, scan_options, true, &ct)?;
entries = collect_files(&search_path, &fresh, glob_set.as_ref(), type_filter.as_ref());
}
Some(entries)
} else {
None
};
let results = if let Some(entries) = entries {
ct.heartbeat()?;
if entries.is_empty() {
return Ok(GrepResult {
matches: Vec::new(),
total_matches: 0,
files_with_matches: 0,
files_searched: 0,
limit_reached: None,
});
}
run_parallel_search(&entries, &matcher, params)
} else {
run_streaming_grep(
&search_path,
&matcher,
glob_set.as_ref(),
type_filter.as_ref(),
params,
include_hidden,
use_gitignore,
!mentions_node_modules,
&ct,
)?
};
let (matches, total_matches, files_with_matches, files_searched, limit_reached) =
aggregate_parallel_results(results, params);
if let Some(callback) = on_match {
for grep_match in &matches {
callback.call(Ok(grep_match.clone()), ThreadsafeFunctionCallMode::NonBlocking);
}
}
Ok(GrepResult {
matches,
total_matches: crate::utils::clamp_u32(total_matches),
files_with_matches,
files_searched,
limit_reached: if limit_reached { Some(true) } else { None },
})
}
#[napi]
pub fn search(content: Either<JsString, Uint8Array>, options: SearchOptions) -> SearchResult {
match &content {
Either::A(js_str) => {
let utf8 = match js_str.into_utf8() {
Ok(utf8) => utf8,
Err(err) => return empty_search_result(Some(err.to_string())),
};
search_sync(utf8.as_slice(), options)
},
Either::B(buf) => search_sync(buf.as_ref(), options),
}
}
#[napi]
pub fn has_match(
content: Either<JsString, Uint8Array>,
pattern: Either<JsString, Uint8Array>,
ignore_case: Option<bool>,
multiline: Option<bool>,
) -> Result<bool> {
let content_utf8;
let content_slice: &[u8] = match &content {
Either::A(js_str) => {
content_utf8 = js_str.into_utf8()?;
content_utf8.as_slice()
},
Either::B(buf) => buf.as_ref(),
};
let pattern_utf8;
let pattern_string;
let pattern_ref: &str = match &pattern {
Either::A(js_str) => {
pattern_utf8 = js_str.into_utf8()?;
pattern_utf8.as_str()?
},
Either::B(buf) => {
pattern_string = std::str::from_utf8(buf.as_ref())
.map_err(|err| Error::from_reason(format!("Invalid UTF-8 in pattern: {err}")))?
.to_owned();
&pattern_string
},
};
let matcher =
build_matcher(pattern_ref, ignore_case.unwrap_or(false), multiline.unwrap_or(false))?;
Ok(matcher.is_match(content_slice).unwrap_or(false))
}
#[napi]
pub fn grep(
options: GrepOptions<'_>,
#[napi(ts_arg_type = "((error: Error | null, match: GrepMatch) => void) | undefined | null")]
on_match: Option<ThreadsafeFunction<GrepMatch>>,
) -> task::Promise<GrepResult> {
let GrepOptions {
pattern,
path,
glob,
r#type,
ignore_case,
multiline,
hidden,
gitignore,
cache,
max_count,
offset,
context_before,
context_after,
context,
max_columns,
mode,
timeout_ms,
signal,
} = options;
let config = GrepConfig {
pattern,
path,
glob,
type_filter: r#type,
ignore_case,
multiline,
hidden,
gitignore,
cache,
max_count,
offset,
context_before,
context_after,
context,
max_columns,
mode,
};
let ct = task::CancelToken::new(timeout_ms, signal);
task::blocking("grep", ct, move |ct| grep_sync(config, on_match.as_ref(), ct))
}