use std::fmt::Write as FmtWrite;
use std::path::{Path, PathBuf};
use std::time::Instant;
use sha2::{Digest, Sha256};
use crate::config::DatalogConfig;
pub struct DatalogWriter {
base_dir: PathBuf,
configured_dir: Option<String>,
enabled: bool,
buf: String,
active: bool,
start: Option<Instant>,
llm_turn_start: Option<Instant>,
step: usize,
file_path: Option<PathBuf>,
filename_tag: Option<String>,
session_id: Option<String>,
}
impl DatalogWriter {
pub fn new(working_dir: &Path, config: &DatalogConfig) -> Self {
Self::new_inner(working_dir, config, None)
}
pub fn new_with_filename_tag(
working_dir: &Path,
config: &DatalogConfig,
filename_tag: &str,
) -> Self {
Self::new_inner(
working_dir,
config,
Some(sanitize_filename_tag(filename_tag)),
)
}
fn new_inner(working_dir: &Path, config: &DatalogConfig, filename_tag: Option<String>) -> Self {
Self {
base_dir: working_dir.to_path_buf(),
configured_dir: config.dir.clone(),
enabled: config.enabled,
buf: String::new(),
active: false,
start: None,
llm_turn_start: None,
step: 0,
file_path: None,
filename_tag: filename_tag.filter(|s| !s.is_empty()),
session_id: None,
}
}
pub fn set_session_id(&mut self, session_id: &str) {
self.session_id = if session_id.is_empty() {
None
} else {
Some(session_id.to_string())
};
}
pub fn set_working_dir(&mut self, dir: &Path) {
self.base_dir = dir.to_path_buf();
}
pub fn resolve_log_dir(base_dir: &Path, configured: Option<&str>) -> PathBuf {
let root = match configured {
None => Self::default_root(),
Some(s) if s.starts_with("~/") || s == "~" => {
let rest = s.strip_prefix("~/").unwrap_or("");
crate::tool::real_home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(rest)
}
Some(s) => {
let p = PathBuf::from(s);
if p.is_absolute() {
p
} else {
base_dir.join(p)
}
}
};
root.join(project_slug(base_dir))
}
fn default_root() -> PathBuf {
crate::config::Config::config_dir().join("datalog")
}
pub fn clear(&mut self) {
if let Some(ref path) = self.file_path {
let _ = std::fs::remove_file(path);
}
self.buf.clear();
self.active = false;
self.start = None;
self.llm_turn_start = None;
self.step = 0;
self.file_path = None;
}
fn flush(&self) {
if let Some(ref path) = self.file_path {
let _ = std::fs::write(path, &self.buf);
}
}
pub fn begin_turn(&mut self, user_message: &str, model_name: &str, context_window: usize) {
if !self.enabled {
return;
}
self.buf.clear();
self.step = 0;
self.active = true;
self.start = Some(Instant::now());
let timestamp = format_timestamp();
let filename_stem = timestamp.replace(' ', "_").replace(':', "-");
let filename = match self.filename_tag.as_deref() {
Some(tag) => format!("{filename_stem}_{tag}.md"),
None => format!("{filename_stem}.md"),
};
let log_dir = Self::resolve_log_dir(&self.base_dir, self.configured_dir.as_deref());
let _ = std::fs::create_dir_all(&log_dir);
self.file_path = Some(log_dir.join(filename));
let build_id = option_env!("ATOMCODE_BUILD_ID").unwrap_or("dev");
let _ = writeln!(&mut self.buf, "# Turn {} [build:{}]", timestamp, build_id);
let _ = writeln!(
&mut self.buf,
"**env:** model={}, ctx_window={}, session={}, cwd={}",
model_name,
context_window,
self.session_id.as_deref().unwrap_or("-"),
self.base_dir.display()
);
let _ = writeln!(&mut self.buf);
let _ = writeln!(&mut self.buf, "## User");
let _ = writeln!(&mut self.buf, "```");
let _ = writeln!(&mut self.buf, "{}", user_message);
let _ = writeln!(&mut self.buf, "```");
let _ = writeln!(&mut self.buf);
let _ = writeln!(&mut self.buf, "## Agent");
let _ = writeln!(&mut self.buf);
self.flush();
}
pub fn log_llm_call(&mut self) {
if !self.active {
return;
}
if let Some(prev_start) = self.llm_turn_start {
let dur = prev_start.elapsed();
if dur.as_millis() >= 1000 {
let _ = writeln!(&mut self.buf, " _({:.1}s)_\n", dur.as_secs_f64());
}
}
self.step += 1;
let _ = writeln!(&mut self.buf, "### Turn {}", self.step);
self.llm_turn_start = Some(Instant::now());
self.flush();
}
pub fn log_context_stats(
&mut self,
system_tokens: usize,
sent_tokens: usize,
dropped_tokens: usize,
_working_set_tokens: usize,
total_messages: usize,
) {
if !self.active {
return;
}
let total = system_tokens + sent_tokens;
let _ = writeln!(
&mut self.buf,
" _[ctx: {}tok = sys:{}+sent:{}+dropped:{}, msgs:{}]_",
total, system_tokens, sent_tokens, dropped_tokens, total_messages
);
self.flush();
}
pub fn log_cache_hit(&mut self, prompt_tokens: usize, cached_tokens: usize) {
if !self.active {
return;
}
let pct = if prompt_tokens > 0 {
cached_tokens * 100 / prompt_tokens
} else {
0
};
let _ = writeln!(
&mut self.buf,
" _[cache: {}/{}tok = {}% hit]_",
cached_tokens, prompt_tokens, pct
);
self.flush();
}
pub fn log_token_usage(
&mut self,
prompt_tokens: usize,
completion_tokens: usize,
cached_tokens: usize,
) {
if !self.active {
return;
}
let cache_str = if cached_tokens > 0 {
format!(", cache={}tok", cached_tokens)
} else {
String::new()
};
let _ = writeln!(
&mut self.buf,
" _[tokens: prompt={}+completion={}{}]_",
prompt_tokens, completion_tokens, cache_str
);
self.flush();
}
pub fn log_llm_dump(
&mut self,
messages: &[crate::conversation::message::Message],
tool_count: usize,
model: &str,
context_window: usize,
) {
if !self.active {
return;
}
let jsonl_path = self.file_path.as_ref().map(|p| p.with_extension("jsonl"));
if let Some(ref path) = jsonl_path {
let msgs_json = serde_json::to_value(messages).unwrap_or(serde_json::json!([]));
let total_tokens: usize = messages.iter().map(|m| m.estimate_tokens()).sum();
let dump = serde_json::json!({
"step": self.step,
"session_id": self.session_id.as_deref().unwrap_or(""),
"model": model,
"context_window": context_window,
"message_count": messages.len(),
"estimated_tokens": total_tokens,
"tool_count": tool_count,
"messages": msgs_json,
});
if let Ok(json_line) = serde_json::to_string(&dump) {
use std::io::Write;
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)
{
let _ = writeln!(f, "{}", json_line);
}
}
let _ = writeln!(
&mut self.buf,
" _[request: {}msgs · {}tok · {}tools]_",
messages.len(),
total_tokens,
tool_count
);
self.flush();
}
}
pub fn log_tool_call(&mut self, name: &str, args: &str) {
if !self.active {
return;
}
let detail = format_tool_args(name, args);
let _ = writeln!(&mut self.buf, "- {} {}", capitalize(name), detail);
if serde_json::from_str::<serde_json::Value>(args).is_err() {
let _ = writeln!(
&mut self.buf,
" [RAW ARGS: {}]",
args.chars().take(200).collect::<String>()
);
}
self.flush();
}
pub fn log_tool_result(&mut self, output: &str, success: bool) {
if !self.active {
return;
}
let icon = if success { "+" } else { "x" };
let first_line = output.lines().next().unwrap_or("");
let summary = if first_line.len() > 100 {
format!("{}...", first_line.chars().take(97).collect::<String>())
} else {
first_line.to_string()
};
let total_lines = output.lines().count();
if total_lines > 1 {
let _ = writeln!(
&mut self.buf,
" {} {} ({} lines)",
icon, summary, total_lines
);
} else {
let _ = writeln!(&mut self.buf, " {} {}", icon, summary);
}
let _ = writeln!(&mut self.buf);
self.flush();
}
pub fn log_model_text(&mut self, text: &str) {
if !self.active {
return;
}
let trimmed = text.trim();
if trimmed.is_empty() {
return;
}
let display = if trimmed.chars().count() > 500 {
format!("{}...", trimmed.chars().take(497).collect::<String>())
} else {
trimmed.to_string()
};
let _ = writeln!(&mut self.buf, " > {}", display.replace('\n', "\n > "));
let _ = writeln!(&mut self.buf);
self.flush();
}
pub fn log_text(&mut self, text: &str) {
if !self.active {
return;
}
if text.trim().is_empty() {
return;
}
let _ = writeln!(&mut self.buf, "**Response:**");
let _ = writeln!(&mut self.buf, "{}", text.trim());
let _ = writeln!(&mut self.buf);
self.flush();
}
pub fn log_error(&mut self, error: &str) {
if !self.active {
return;
}
let _ = writeln!(&mut self.buf, "**Error:** {}", error);
let _ = writeln!(&mut self.buf);
self.flush();
}
pub fn log_warning(&mut self, warning: &str) {
if !self.active {
return;
}
let _ = writeln!(&mut self.buf, "**Warning:** {}", warning);
let _ = writeln!(&mut self.buf);
self.flush();
}
pub fn end_turn(&mut self, total_tokens: usize, tool_call_count: usize) {
if !self.active {
return;
}
self.active = false;
if let Some(prev_start) = self.llm_turn_start.take() {
let dur = prev_start.elapsed();
if dur.as_millis() >= 1000 {
let _ = writeln!(&mut self.buf, " _({:.1}s)_", dur.as_secs_f64());
}
}
let duration = self.start.map(|s| s.elapsed()).unwrap_or_default();
let _ = writeln!(&mut self.buf);
let _ = writeln!(&mut self.buf, "---");
let _ = writeln!(
&mut self.buf,
"**Stats:** {} turns, {} tool calls, {:.1}s, {} tokens",
self.step,
tool_call_count,
duration.as_secs_f64(),
total_tokens,
);
self.flush();
}
}
fn capitalize(name: &str) -> String {
name.split('_')
.map(|w| {
let mut c = w.chars();
match c.next() {
None => String::new(),
Some(ch) => ch.to_uppercase().to_string() + c.as_str(),
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn format_tool_args(tool_name: &str, args_json: &str) -> String {
let args: serde_json::Value = match serde_json::from_str(args_json) {
Ok(v) => v,
Err(_) => return String::new(),
};
match tool_name {
"read_file" => {
let path = args.get("file_path").and_then(|v| v.as_str()).unwrap_or("");
let short = short_path(path);
let mut s = short;
if let Some(offset) = args.get("offset").and_then(|v| v.as_u64()) {
if let Some(limit) = args.get("limit").and_then(|v| v.as_u64()) {
s.push_str(&format!(" L{}-{}", offset, offset + limit));
}
}
s
}
"create_file" => {
let path = args.get("file_path").and_then(|v| v.as_str()).unwrap_or("");
let size = args
.get("content")
.and_then(|v| v.as_str())
.map(|s| s.len())
.unwrap_or(0);
format!("{} ({} bytes)", short_path(path), size)
}
"edit_file" => {
let path = args.get("file_path").and_then(|v| v.as_str()).unwrap_or("");
short_path(path)
}
"bash" => {
let cmd = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
if cmd.chars().count() > 80 {
format!("`{}...`", cmd.chars().take(77).collect::<String>())
} else {
format!("`{}`", cmd)
}
}
"list_directory" => {
let path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
short_path(path)
}
"grep" => {
let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
let path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
format!("\"{}\" in {}", pattern, short_path(path))
}
"glob" => {
let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
format!("\"{}\"", pattern)
}
_ => {
if let Some(obj) = args.as_object() {
obj.iter()
.map(|(k, v)| {
let val = match v {
serde_json::Value::String(s) if s.chars().count() > 30 => {
format!("{}...", s.chars().take(27).collect::<String>())
}
serde_json::Value::String(s) => s.clone(),
other => other.to_string(),
};
format!("{}={}", k, val)
})
.collect::<Vec<_>>()
.join(" ")
} else {
String::new()
}
}
}
}
fn short_path(path: &str) -> String {
let parts: Vec<&str> = path.rsplitn(3, '/').collect();
match parts.len() {
0 | 1 => path.to_string(),
2 => format!("{}/{}", parts[1], parts[0]),
_ => format!(".../{}/{}", parts[1], parts[0]),
}
}
fn format_timestamp() -> String {
chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string()
}
fn sanitize_filename_tag(tag: &str) -> String {
tag.chars()
.filter_map(|c| {
if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
Some(c)
} else if c.is_whitespace() {
Some('-')
} else {
None
}
})
.collect()
}
fn project_slug(working_dir: &Path) -> String {
let canonical = working_dir
.canonicalize()
.unwrap_or_else(|_| working_dir.to_path_buf());
let basename = canonical
.file_name()
.map(|s| s.to_string_lossy().to_string())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "root".to_string());
let safe: String = basename
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
c
} else {
'_'
}
})
.collect();
let mut hasher = Sha256::new();
hasher.update(canonical.to_string_lossy().as_bytes());
let digest = hasher.finalize();
format!(
"{}-{:02x}{:02x}{:02x}{:02x}",
safe, digest[0], digest[1], digest[2], digest[3]
)
}
#[cfg(test)]
mod tests {
use super::*;
fn make_log(dir: &Path) -> DatalogWriter {
let cfg = DatalogConfig {
enabled: true,
dir: Some(dir.to_string_lossy().to_string()),
};
let mut log = DatalogWriter::new(dir, &cfg);
log.begin_turn("test", "test-model", 16000);
log.log_llm_call();
log
}
#[test]
fn resolve_log_dir_default_lands_under_home() {
let base = PathBuf::from("/tmp/work");
let p = DatalogWriter::resolve_log_dir(&base, None);
let expected_root = crate::config::Config::config_dir().join("datalog");
assert!(
p.starts_with(&expected_root),
"{:?} should start with {:?}",
p,
expected_root
);
let slug = p.file_name().unwrap().to_string_lossy().to_string();
assert!(slug.starts_with("work-"), "slug {:?}", slug);
let hex_tail = slug.rsplit('-').next().unwrap();
assert_eq!(hex_tail.len(), 8, "hash tail should be 8 hex chars");
assert!(hex_tail.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn resolve_log_dir_absolute_uses_configured_root_with_slug() {
let base = PathBuf::from("/tmp/work");
let p = DatalogWriter::resolve_log_dir(&base, Some("/var/logs/atomcode"));
assert!(p.starts_with("/var/logs/atomcode"));
assert!(p
.file_name()
.unwrap()
.to_string_lossy()
.starts_with("work-"));
}
#[test]
fn resolve_log_dir_relative_joins_working_dir_then_slug() {
let base = PathBuf::from("/tmp/work");
let p = DatalogWriter::resolve_log_dir(&base, Some("logs/ac"));
assert!(p.starts_with("/tmp/work/logs/ac"));
assert!(p
.file_name()
.unwrap()
.to_string_lossy()
.starts_with("work-"));
}
#[test]
fn resolve_log_dir_tilde_expands_home() {
let base = PathBuf::from("/tmp/work");
let p = DatalogWriter::resolve_log_dir(&base, Some("~/.atomcode/logs"));
let expected_root = crate::tool::real_home_dir().unwrap().join(".atomcode/logs");
assert!(p.starts_with(&expected_root));
assert!(p
.file_name()
.unwrap()
.to_string_lossy()
.starts_with("work-"));
}
#[test]
fn project_slug_is_stable_for_same_path() {
let p = PathBuf::from("/tmp/repeatable-path");
let s1 = project_slug(&p);
let s2 = project_slug(&p);
assert_eq!(s1, s2, "slug must be deterministic");
}
#[test]
fn project_slug_disambiguates_same_basename() {
let a = PathBuf::from("/tmp/dup-test-a/foo");
let b = PathBuf::from("/tmp/dup-test-b/foo");
let sa = project_slug(&a);
let sb = project_slug(&b);
assert!(sa.starts_with("foo-"));
assert!(sb.starts_with("foo-"));
assert_ne!(sa, sb, "different parents must yield different slugs");
}
#[test]
fn format_timestamp_produces_correct_format() {
let ts = format_timestamp();
let parts: Vec<&str> = ts.split(' ').collect();
assert_eq!(parts.len(), 2, "Timestamp should have date and time parts");
let date_parts: Vec<&str> = parts[0].split('-').collect();
assert_eq!(date_parts.len(), 3, "Date should have 3 parts");
assert_eq!(date_parts[0].len(), 4, "Year should be 4 digits");
assert_eq!(date_parts[1].len(), 2, "Month should be 2 digits");
assert_eq!(date_parts[2].len(), 2, "Day should be 2 digits");
let time_parts: Vec<&str> = parts[1].split(':').collect();
assert_eq!(time_parts.len(), 3, "Time should have 3 parts");
assert_eq!(time_parts[0].len(), 2, "Hour should be 2 digits");
assert_eq!(time_parts[1].len(), 2, "Minute should be 2 digits");
assert_eq!(time_parts[2].len(), 2, "Second should be 2 digits");
let hour: u32 = time_parts[0].parse().expect("Hour should be numeric");
let minute: u32 = time_parts[1].parse().expect("Minute should be numeric");
let second: u32 = time_parts[2].parse().expect("Second should be numeric");
assert!(hour < 24, "Hour should be 0-23");
assert!(minute < 60, "Minute should be 0-59");
assert!(second < 60, "Second should be 0-59");
}
#[test]
fn disabled_writer_never_creates_files() {
let dir = std::env::temp_dir().join("atomcode_test_datalog_disabled");
let _ = std::fs::remove_dir_all(&dir);
let cfg = DatalogConfig {
enabled: false,
dir: Some(dir.to_string_lossy().to_string()),
};
let mut log = DatalogWriter::new(&dir, &cfg);
log.begin_turn("hello", "m", 1000);
log.log_llm_call();
log.log_text("response");
log.end_turn(0, 0);
assert!(log.file_path.is_none());
assert!(
!dir.exists(),
"disabled writer must not create the root dir"
);
}
#[test]
fn filename_tag_is_added_only_for_tagged_writer() {
let dir = std::env::temp_dir().join("atomcode_test_datalog_filename_tag");
let _ = std::fs::remove_dir_all(&dir);
let cfg = DatalogConfig {
enabled: true,
dir: Some(dir.to_string_lossy().to_string()),
};
let mut default_log = DatalogWriter::new(&dir, &cfg);
default_log.begin_turn("hello", "m", 1000);
let default_name = default_log
.file_path
.as_ref()
.unwrap()
.file_name()
.unwrap()
.to_string_lossy()
.to_string();
assert!(!default_name.contains("runtime-2"));
let mut tagged_log = DatalogWriter::new_with_filename_tag(&dir, &cfg, "runtime-2");
tagged_log.begin_turn("hello", "m", 1000);
let tagged_name = tagged_log
.file_path
.as_ref()
.unwrap()
.file_name()
.unwrap()
.to_string_lossy()
.to_string();
assert!(tagged_name.ends_with("_runtime-2.md"));
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_log_model_text_chinese_truncation() {
let dir = std::env::temp_dir().join("atomcode_test_datalog_cn");
let _ = std::fs::create_dir_all(&dir);
let mut log = make_log(&dir);
let long_chinese = "这是一段很长的中文文本用于测试截断逻辑".repeat(30);
assert!(long_chinese.chars().count() > 500);
log.log_model_text(&long_chinese);
let content = std::fs::read_to_string(log.file_path.as_ref().unwrap()).unwrap();
assert!(content.contains("..."));
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_log_model_text_short_no_truncation() {
let dir = std::env::temp_dir().join("atomcode_test_datalog_short");
let _ = std::fs::create_dir_all(&dir);
let mut log = make_log(&dir);
log.log_model_text("短文本");
let content = std::fs::read_to_string(log.file_path.as_ref().unwrap()).unwrap();
assert!(content.contains("短文本"));
assert!(!content.contains("..."));
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_log_model_text_mixed_unicode() {
let dir = std::env::temp_dir().join("atomcode_test_datalog_mixed");
let _ = std::fs::create_dir_all(&dir);
let mut log = make_log(&dir);
let mixed = format!("Hello 你好 {} end", "🎉测试".repeat(200));
assert!(mixed.chars().count() > 500);
log.log_model_text(&mixed);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_end_turn_stats_format() {
let dir = std::env::temp_dir().join("atomcode_test_datalog_stats");
let _ = std::fs::create_dir_all(&dir);
let mut log = make_log(&dir);
log.log_tool_call("bash", r#"{"command":"ls"}"#);
log.log_tool_result("file.txt", true);
log.end_turn(1000, 3);
let content = std::fs::read_to_string(log.file_path.as_ref().unwrap()).unwrap();
assert!(content.contains("1 turns"));
assert!(content.contains("3 tool calls"));
assert!(content.contains("1000 tokens"));
let _ = std::fs::remove_dir_all(&dir);
}
}