diff --git a/README.md b/README.md index 6faeb7d..34a605f 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ npx skills add jackwener/wx-cli -g - **零依赖安装** — 单一 Rust 二进制,一行命令装完 - **毫秒级响应** — 后台 daemon 持久缓存解密数据库,mtime 不变则复用 -- **AI 友好** — 默认 YAML 输出,更省 token & 易读;`--json` 可切换为 JSON(方便 `jq` 处理等) +- **AI 友好** — `history` / `search` / `sessions` / `new-messages` / `stats` / `attachments` 默认返回 `{..., meta}` wrapper,agent 能直接消费 freshness / source 信息 - **完全本地** — 数据不出本机,实时解密,无需全量预解密 --- @@ -168,6 +168,15 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 +`history` / `search` / `sessions` / `unread` / `new-messages` / `stats` / `attachments` 现在都会附带 `meta`: + +- `status`: `ok` / `possibly_stale` / `possibly_stale_unknown_shards` / `windowed` +- `unknown_shards`: 磁盘上存在、但 daemon 当前没有 key 的 `message_N.db` 分片;非空时应先跑 `wx init --force` +- `chat_latest_timestamp` / `chat_latest_db`: 当前命中数据里最新一条消息的时间和分片来源 +- `session_last_timestamp`: `session.db` 里 WeChat 自己记录的最新时间;如果明显领先于 `chat_latest_timestamp`,说明结果可能漏了消息 + +默认情况下,人类用户会在 stderr 看到可执行的 warning;agent / 脚本可直接读 stdout 里的 `meta`。传 `--with-meta` 会额外返回 `per_shard_latest` / `cache_mode_per_shard`,传隐藏 flag `--debug-source` 还会带真实 `shard_paths`。 + 引用消息会在 `history` / `search` / `new-messages` 输出中显示当前回复和被引用原文: ```text @@ -278,12 +287,14 @@ wx export "AI群" --since 2026-01-01 --format json ### 输出格式 -默认输出 YAML,更省 token & 易读;`--json` 可切换为 JSON(方便 `jq` 处理等): +默认输出 YAML;`--json` 可切换为 JSON。对 agent 而言,`history` / `search` / `sessions` / `new-messages` / `stats` / `attachments` 的 stdout 现在是 wrapper,而不是裸数组: ```bash wx sessions --json -wx search "关键词" --json | jq '.[0].content' +wx search "关键词" --json | jq '.results[0].content' wx new-messages --json +wx history "张三" --json | jq '.meta' +wx history "张三" --json --with-meta | jq '.meta.cache_mode_per_shard' ``` ### Daemon 管理 diff --git a/SKILL.md b/SKILL.md index 6b79e0d..be3dd63 100644 --- a/SKILL.md +++ b/SKILL.md @@ -159,6 +159,29 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 +`sessions` / `unread` / `history` / `search` / `new-messages` / `stats` / `attachments` 的 stdout 现在统一是 wrapper: + +```json +{ + "messages": [...], + "meta": { + "status": "ok", + "unknown_shards": [], + "chat_latest_timestamp": 1715750400, + "chat_latest_db": "message/message_2.db", + "session_last_timestamp": 1715760000 + } +} +``` + +其中: + +- `status = possibly_stale_unknown_shards`:磁盘上出现 daemon 不认识的新 `message_N.db`,先跑 `wx init --force` +- `status = possibly_stale`:`session.db` 记录的最新时间明显领先于本次查到的最新消息,结果可能漏消息 +- `status = windowed`:这次查询本来就是窗口化/过滤后的局部视图,不应把它当作"全量最新状态" +- `--with-meta`:额外返回 `per_shard_latest` / `cache_mode_per_shard` +- `--debug-source`:在 `--with-meta` 基础上再暴露真实 `shard_paths` + 引用消息(appmsg `type=57`)在 `history` / `search` / `new-messages` 输出里会展开为两行:第一行是当前回复,第二行以 `↳` 开头显示被引用原文,例如: ```text @@ -315,8 +338,10 @@ wx daemon logs --follow ```bash wx sessions --json wx new-messages --json -wx search "关键词" --json -wx history "张三" --json -n 50 +wx search "关键词" --json | jq '.results[0]' +wx history "张三" --json -n 50 | jq '.messages[0]' +wx history "张三" --json | jq '.meta' +wx history "张三" --json --with-meta | jq '.meta.cache_mode_per_shard' ``` CHAT 参数支持昵称、备注名、微信 ID,模糊匹配。不确定准确名称时,先用 `wx contacts --query` 搜索。 diff --git a/src/cli/attachments.rs b/src/cli/attachments.rs index 662c256..87e4434 100644 --- a/src/cli/attachments.rs +++ b/src/cli/attachments.rs @@ -1,9 +1,9 @@ use anyhow::Result; -use crate::ipc::Request; use super::history::{parse_time, parse_time_end}; -use super::output::{print_value, resolve}; +use super::output::{emit_warnings, print_response, OutputOpts}; use super::transport; +use crate::ipc::Request; /// `wx attachments` — 列出指定会话的附件消息(默认 image,可多选)。 /// @@ -16,10 +16,11 @@ pub fn cmd_attachments( offset: usize, since: Option, until: Option, - json: bool, + opts: OutputOpts, ) -> Result<()> { let since_ts = since.as_deref().map(parse_time).transpose()?; let until_ts = until.as_deref().map(parse_time_end).transpose()?; + let (with_meta, debug_source) = opts.request_flags(); // CLI 收上来的 Vec 为空时按默认(image)走,让 daemon 决定 fallback。 let kinds_param = if kinds.is_empty() { None } else { Some(kinds) }; @@ -31,12 +32,10 @@ pub fn cmd_attachments( offset, since: since_ts, until: until_ts, + with_meta, + debug_source, }; let resp = transport::send(req)?; - let data = resp - .data - .get("attachments") - .cloned() - .unwrap_or(serde_json::Value::Array(vec![])); - print_value(&data, &resolve(json)) + emit_warnings(&resp.data); + print_response(&resp.data, &opts) } diff --git a/src/cli/export.rs b/src/cli/export.rs index 85a6989..dd57385 100644 --- a/src/cli/export.rs +++ b/src/cli/export.rs @@ -1,7 +1,8 @@ -use anyhow::Result; -use crate::ipc::Request; -use super::transport; use super::history::{parse_time, parse_time_end}; +use super::output::{emit_warnings, warning_block_markdown, warning_block_text, OutputOpts}; +use super::transport; +use crate::ipc::Request; +use anyhow::Result; pub fn cmd_export( chat: String, @@ -10,9 +11,11 @@ pub fn cmd_export( limit: usize, format: String, output: Option, + opts: OutputOpts, ) -> Result<()> { let since_ts = since.as_deref().map(parse_time).transpose()?; let until_ts = until.as_deref().map(parse_time_end).transpose()?; + let (with_meta, debug_source) = opts.request_flags(); let req = Request::History { chat, @@ -21,24 +24,42 @@ pub fn cmd_export( since: since_ts, until: until_ts, msg_type: None, + with_meta, + debug_source, }; let resp = transport::send(req)?; - let messages = resp.data["messages"].as_array().cloned().unwrap_or_default(); + emit_warnings(&resp.data); + let messages = resp.data["messages"] + .as_array() + .cloned() + .unwrap_or_default(); let chat_name = resp.data["chat"].as_str().unwrap_or("").to_string(); let is_group = resp.data["is_group"].as_bool().unwrap_or(false); let count = messages.len(); let text = match format.as_str() { "json" => serde_json::to_string_pretty(&resp.data)?, + "yaml" => serde_yaml::to_string(&resp.data)?, "txt" => { let group_str = if is_group { "[群]" } else { "" }; - let mut lines = vec![format!("=== {}{} ({} 条) ===\n", chat_name, group_str, count)]; + let mut lines = vec![format!( + "=== {}{} ({} 条) ===\n", + chat_name, group_str, count + )]; + if let Some(warn) = warning_block_text(&resp.data) { + lines.push(warn); + lines.push(String::new()); + } for m in &messages { let time = m["time"].as_str().unwrap_or(""); let sender = m["sender"].as_str().unwrap_or(""); let content = m["content"].as_str().unwrap_or(""); - let sender_str = if !sender.is_empty() { format!("{}: ", sender) } else { String::new() }; + let sender_str = if !sender.is_empty() { + format!("{}: ", sender) + } else { + String::new() + }; lines.push(format!("[{}] {}{}", time, sender_str, content)); } lines.join("\n") @@ -50,11 +71,18 @@ pub fn cmd_export( format!("# {}{}", chat_name, group_str), format!("\n> 导出 {} 条消息\n", count), ]; + if let Some(warn) = warning_block_markdown(&resp.data) { + lines.push(warn); + } for m in &messages { let time = m["time"].as_str().unwrap_or(""); let sender = m["sender"].as_str().unwrap_or(""); let content = m["content"].as_str().unwrap_or("").replace('\n', "\n> "); - let sender_md = if !sender.is_empty() { format!("**{}**: ", sender) } else { String::new() }; + let sender_md = if !sender.is_empty() { + format!("**{}**: ", sender) + } else { + String::new() + }; lines.push(format!("### {}\n\n{}{}\n", time, sender_md, content)); } lines.join("\n") diff --git a/src/cli/history.rs b/src/cli/history.rs index b5fabb7..80c2df9 100644 --- a/src/cli/history.rs +++ b/src/cli/history.rs @@ -1,7 +1,7 @@ -use anyhow::Result; -use crate::ipc::Request; +use super::output::{emit_warnings, print_response, OutputOpts}; use super::transport; -use super::output::{resolve, print_value}; +use crate::ipc::Request; +use anyhow::Result; pub fn cmd_history( chat: String, @@ -10,37 +10,51 @@ pub fn cmd_history( since: Option, until: Option, msg_type: Option, - json: bool, + opts: OutputOpts, ) -> Result<()> { let since_ts = since.as_deref().map(parse_time).transpose()?; let until_ts = until.as_deref().map(parse_time_end).transpose()?; let type_val = msg_type.as_deref().and_then(parse_msg_type); + let (with_meta, debug_source) = opts.request_flags(); - let req = Request::History { chat, limit, offset, since: since_ts, until: until_ts, msg_type: type_val }; + let req = Request::History { + chat, + limit, + offset, + since: since_ts, + until: until_ts, + msg_type: type_val, + with_meta, + debug_source, + }; let resp = transport::send(req)?; - - let msgs = resp.data.get("messages") - .cloned() - .unwrap_or(serde_json::Value::Array(vec![])); - print_value(&msgs, &resolve(json)) + emit_warnings(&resp.data); + print_response(&resp.data, &opts) } pub fn parse_time(s: &str) -> Result { use chrono::{Local, TimeZone}; for fmt in &["%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M"] { if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(s, fmt) { - return Local.from_local_datetime(&dt).single() + return Local + .from_local_datetime(&dt) + .single() .map(|d| d.timestamp()) .ok_or_else(|| anyhow::anyhow!("本地时间歧义: {}", s)); } } if let Ok(d) = chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d") { let dt = d.and_hms_opt(0, 0, 0).unwrap(); - return Local.from_local_datetime(&dt).single() + return Local + .from_local_datetime(&dt) + .single() .map(|d| d.timestamp()) .ok_or_else(|| anyhow::anyhow!("本地时间歧义: {}", s)); } - anyhow::bail!("无法解析时间 '{}',支持 YYYY-MM-DD / YYYY-MM-DD HH:MM / YYYY-MM-DD HH:MM:SS", s) + anyhow::bail!( + "无法解析时间 '{}',支持 YYYY-MM-DD / YYYY-MM-DD HH:MM / YYYY-MM-DD HH:MM:SS", + s + ) } pub fn parse_time_end(s: &str) -> Result { @@ -48,7 +62,9 @@ pub fn parse_time_end(s: &str) -> Result { if s.len() == 10 { if let Ok(d) = chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d") { let dt = d.and_hms_opt(23, 59, 59).unwrap(); - return Local.from_local_datetime(&dt).single() + return Local + .from_local_datetime(&dt) + .single() .map(|d| d.timestamp()) .ok_or_else(|| anyhow::anyhow!("本地时间歧义: {}", s)); } @@ -59,15 +75,15 @@ pub fn parse_time_end(s: &str) -> Result { /// 将消息类型字符串转为 local_type 整数,未知类型返回 None pub fn parse_msg_type(s: &str) -> Option { match s { - "text" => Some(1), - "image" => Some(3), - "voice" => Some(34), - "video" => Some(43), - "sticker" => Some(47), + "text" => Some(1), + "image" => Some(3), + "voice" => Some(34), + "video" => Some(43), + "sticker" => Some(47), "location" => Some(48), "link" | "file" => Some(49), - "call" => Some(50), - "system" => Some(10000), - _ => None, + "call" => Some(50), + "system" => Some(10000), + _ => None, } } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 2ec2476..b4d6cf4 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,24 +1,25 @@ -mod init; pub mod attachments; pub mod biz_articles; -pub mod extract; -pub mod sessions; -pub mod history; -pub mod search; pub mod contacts; -pub mod export; pub mod daemon_cmd; -pub mod transport; -pub mod output; -pub mod unread; +pub mod export; +pub mod extract; +pub mod favorites; +pub mod history; +mod init; pub mod members; pub mod new_messages; -pub mod stats; -pub mod favorites; -pub mod sns_notifications; +pub mod output; +pub mod search; +pub mod sessions; pub mod sns_feed; +pub mod sns_notifications; pub mod sns_search; +pub mod stats; +pub mod transport; +pub mod unread; +use self::output::OutputOpts; use anyhow::Result; use clap::{Parser, Subcommand}; @@ -26,6 +27,12 @@ use clap::{Parser, Subcommand}; #[derive(Parser)] #[command(name = "wx", version = env!("CARGO_PKG_VERSION"), about = "wx — 微信本地数据 CLI")] pub struct Cli { + /// 返回更重的 freshness/source 元数据(如 per-shard latest、cache modes) + #[arg(long, global = true)] + with_meta: bool, + /// 在 meta 里暴露真实 shard 路径(调试用) + #[arg(long, global = true, hide = true)] + debug_source: bool, #[command(subcommand)] command: Commands, } @@ -335,46 +342,184 @@ pub fn run() { } fn dispatch(cli: Cli) -> Result<()> { + let base_with_meta = cli.with_meta; + let base_debug_source = cli.debug_source; match cli.command { Commands::Init { force } => init::cmd_init(force), - Commands::Sessions { limit, json } => sessions::cmd_sessions(limit, json), - Commands::History { chat, limit, offset, since, until, msg_type, json } => { - history::cmd_history(chat, limit, offset, since, until, msg_type, json) - } - Commands::Search { keyword, chats, limit, since, until, msg_type, json } => { - search::cmd_search(keyword, chats, limit, since, until, msg_type, json) - } + Commands::Sessions { limit, json } => sessions::cmd_sessions( + limit, + OutputOpts { + json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ), + Commands::History { + chat, + limit, + offset, + since, + until, + msg_type, + json, + } => history::cmd_history( + chat, + limit, + offset, + since, + until, + msg_type, + OutputOpts { + json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ), + Commands::Search { + keyword, + chats, + limit, + since, + until, + msg_type, + json, + } => search::cmd_search( + keyword, + chats, + limit, + since, + until, + msg_type, + OutputOpts { + json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ), Commands::Contacts { query, limit, json } => contacts::cmd_contacts(query, limit, json), - Commands::Export { chat, since, until, limit, format, output } => { - export::cmd_export(chat, since, until, limit, format, output) + Commands::Export { + chat, + since, + until, + limit, + format, + output, + } => { + let export_json = format == "json"; + export::cmd_export( + chat, + since, + until, + limit, + format, + output, + OutputOpts { + json: export_json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ) } - Commands::Unread { limit, filter, json } => unread::cmd_unread(limit, filter, json), + Commands::Unread { + limit, + filter, + json, + } => unread::cmd_unread( + limit, + filter, + OutputOpts { + json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ), Commands::Members { chat, json } => members::cmd_members(chat, json), - Commands::NewMessages { limit, json } => new_messages::cmd_new_messages(limit, json), - Commands::Stats { chat, since, until, json } => { - stats::cmd_stats(chat, since, until, json) - } - Commands::Favorites { limit, fav_type, query, json } => { - favorites::cmd_favorites(limit, fav_type, query, json) - } - Commands::SnsNotifications { limit, since, until, include_read, json } => { - sns_notifications::cmd_sns_notifications(limit, since, until, include_read, json) - } - Commands::SnsFeed { limit, since, until, user, json } => { - sns_feed::cmd_sns_feed(limit, since, until, user, json) - } - Commands::SnsSearch { keyword, limit, since, until, user, json } => { - sns_search::cmd_sns_search(keyword, limit, since, until, user, json) - } - Commands::BizArticles { limit, account, since, until, unread, json } => { - biz_articles::cmd_biz_articles(limit, account, since, until, unread, json) - } - Commands::Attachments { chat, kinds, limit, offset, since, until, json } => { - attachments::cmd_attachments(chat, kinds, limit, offset, since, until, json) - } - Commands::Extract { attachment_id, output, overwrite, json } => { - extract::cmd_extract(attachment_id, output, overwrite, json) - } + Commands::NewMessages { limit, json } => new_messages::cmd_new_messages( + limit, + OutputOpts { + json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ), + Commands::Stats { + chat, + since, + until, + json, + } => stats::cmd_stats( + chat, + since, + until, + OutputOpts { + json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ), + Commands::Favorites { + limit, + fav_type, + query, + json, + } => favorites::cmd_favorites(limit, fav_type, query, json), + Commands::SnsNotifications { + limit, + since, + until, + include_read, + json, + } => sns_notifications::cmd_sns_notifications(limit, since, until, include_read, json), + Commands::SnsFeed { + limit, + since, + until, + user, + json, + } => sns_feed::cmd_sns_feed(limit, since, until, user, json), + Commands::SnsSearch { + keyword, + limit, + since, + until, + user, + json, + } => sns_search::cmd_sns_search(keyword, limit, since, until, user, json), + Commands::BizArticles { + limit, + account, + since, + until, + unread, + json, + } => biz_articles::cmd_biz_articles(limit, account, since, until, unread, json), + Commands::Attachments { + chat, + kinds, + limit, + offset, + since, + until, + json, + } => attachments::cmd_attachments( + chat, + kinds, + limit, + offset, + since, + until, + OutputOpts { + json, + with_meta: base_with_meta, + debug_source: base_debug_source, + }, + ), + Commands::Extract { + attachment_id, + output, + overwrite, + json, + } => extract::cmd_extract(attachment_id, output, overwrite, json), Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd), } } diff --git a/src/cli/new_messages.rs b/src/cli/new_messages.rs index b847210..5d73e4d 100644 --- a/src/cli/new_messages.rs +++ b/src/cli/new_messages.rs @@ -1,8 +1,8 @@ +use super::output::{emit_warnings, print_response, OutputOpts}; +use super::transport; +use crate::ipc::Request; use anyhow::Result; use std::collections::HashMap; -use crate::ipc::Request; -use super::transport; -use super::output::{resolve, print_value}; fn state_file() -> std::path::PathBuf { dirs::home_dir() @@ -18,7 +18,8 @@ fn load_state() -> Option> { let data = std::fs::read_to_string(state_file()).ok()?; let v: serde_json::Value = serde_json::from_str(&data).ok()?; // 旧格式(只有 timestamp 字段)没有 sessions key → 返回 None 触发首次运行逻辑 - let map: HashMap = v.get("sessions")? + let map: HashMap = v + .get("sessions")? .as_object()? .iter() .filter_map(|(k, v)| v.as_i64().map(|ts| (k.clone(), ts))) @@ -33,17 +34,27 @@ fn save_state(new_state: &HashMap) -> Result<()> { if let Some(parent) = path.parent() { std::fs::create_dir_all(parent)?; } - std::fs::write(&path, serde_json::to_string(&serde_json::json!({ "sessions": new_state }))?)?; + std::fs::write( + &path, + serde_json::to_string(&serde_json::json!({ "sessions": new_state }))?, + )?; Ok(()) } -pub fn cmd_new_messages(limit: usize, json: bool) -> Result<()> { +pub fn cmd_new_messages(limit: usize, opts: OutputOpts) -> Result<()> { let state = load_state(); - let resp = transport::send(Request::NewMessages { state, limit })?; + let (with_meta, debug_source) = opts.request_flags(); + let resp = transport::send(Request::NewMessages { + state, + limit, + with_meta, + debug_source, + })?; // 保存 daemon 返回的 new_state if let Some(obj) = resp.data.get("new_state").and_then(|v| v.as_object()) { - let map: HashMap = obj.iter() + let map: HashMap = obj + .iter() .filter_map(|(k, v)| v.as_i64().map(|ts| (k.clone(), ts))) .collect(); if !map.is_empty() { @@ -51,8 +62,6 @@ pub fn cmd_new_messages(limit: usize, json: bool) -> Result<()> { } } - let messages = resp.data.get("messages") - .cloned() - .unwrap_or(serde_json::Value::Array(vec![])); - print_value(&messages, &resolve(json)) + emit_warnings(&resp.data); + print_response(&resp.data, &opts) } diff --git a/src/cli/output.rs b/src/cli/output.rs index 33ef78a..c4fe5f3 100644 --- a/src/cli/output.rs +++ b/src/cli/output.rs @@ -1,12 +1,31 @@ +use chrono::{Local, TimeZone}; + /// 输出格式 pub enum Fmt { Yaml, Json, } +#[derive(Clone, Copy, Debug)] +pub struct OutputOpts { + pub json: bool, + pub with_meta: bool, + pub debug_source: bool, +} + +impl OutputOpts { + pub fn request_flags(self) -> (bool, bool) { + (self.with_meta || self.debug_source, self.debug_source) + } +} + /// 默认 YAML,--json 时输出 JSON pub fn resolve(json: bool) -> Fmt { - if json { Fmt::Json } else { Fmt::Yaml } + if json { + Fmt::Json + } else { + Fmt::Yaml + } } pub fn print_value(value: &serde_json::Value, fmt: &Fmt) -> anyhow::Result<()> { @@ -16,3 +35,95 @@ pub fn print_value(value: &serde_json::Value, fmt: &Fmt) -> anyhow::Result<()> { } Ok(()) } + +pub fn print_response(data: &serde_json::Value, opts: &OutputOpts) -> anyhow::Result<()> { + print_value(data, &resolve(opts.json)) +} + +pub fn emit_warnings(data: &serde_json::Value) { + for line in warning_lines(data) { + eprintln!("[wx] 警告:{}", line); + } +} + +pub fn warning_lines(data: &serde_json::Value) -> Vec { + let mut lines = Vec::new(); + let meta = match data.get("meta") { + Some(v) if v.is_object() => v, + _ => return lines, + }; + + let unknown_shards: Vec = meta + .get("unknown_shards") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default(); + + if !unknown_shards.is_empty() { + lines.push(format!( + "磁盘上发现 daemon 不认识的分片 {},结果可能不完整;运行 `wx init --force` 重新提取密钥。", + unknown_shards.join(", ") + )); + } + + let status = meta.get("status").and_then(|v| v.as_str()).unwrap_or(""); + if status == "possibly_stale" || status == "possibly_stale_unknown_shards" { + let session_ts = meta.get("session_last_timestamp").and_then(|v| v.as_i64()); + let chat_ts = meta.get("chat_latest_timestamp").and_then(|v| v.as_i64()); + if let (Some(session_ts), Some(chat_ts)) = (session_ts, chat_ts) { + let subject = data + .get("chat") + .and_then(|v| v.as_str()) + .or_else(|| data.get("username").and_then(|v| v.as_str())) + .unwrap_or("当前查询"); + lines.push(format!( + "session.db 显示 '{}' 最新到 {},但本次扫描只到 {},结果可能过期或不完整。", + subject, + fmt_meta_ts(session_ts), + fmt_meta_ts(chat_ts), + )); + } + } + + lines +} + +pub fn warning_block_text(data: &serde_json::Value) -> Option { + let lines = warning_lines(data); + if lines.is_empty() { + return None; + } + Some( + lines + .into_iter() + .map(|line| format!("[wx] 警告:{}", line)) + .collect::>() + .join("\n"), + ) +} + +pub fn warning_block_markdown(data: &serde_json::Value) -> Option { + let lines = warning_lines(data); + if lines.is_empty() { + return None; + } + let mut out = String::from("> [!WARNING]\n"); + for line in lines { + out.push_str("> "); + out.push_str(&line); + out.push('\n'); + } + Some(out) +} + +fn fmt_meta_ts(ts: i64) -> String { + Local + .timestamp_opt(ts, 0) + .single() + .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()) + .unwrap_or_else(|| ts.to_string()) +} diff --git a/src/cli/search.rs b/src/cli/search.rs index e6f3d00..42d6210 100644 --- a/src/cli/search.rs +++ b/src/cli/search.rs @@ -1,8 +1,8 @@ -use anyhow::Result; -use crate::ipc::Request; +use super::history::{parse_msg_type, parse_time, parse_time_end}; +use super::output::{emit_warnings, print_response, OutputOpts}; use super::transport; -use super::history::{parse_time, parse_time_end, parse_msg_type}; -use super::output::{resolve, print_value}; +use crate::ipc::Request; +use anyhow::Result; pub fn cmd_search( keyword: String, @@ -11,12 +11,13 @@ pub fn cmd_search( since: Option, until: Option, msg_type: Option, - json: bool, + opts: OutputOpts, ) -> Result<()> { let since_ts = since.as_deref().map(parse_time).transpose()?; let until_ts = until.as_deref().map(parse_time_end).transpose()?; let type_val = msg_type.as_deref().and_then(parse_msg_type); let chats_opt = if chats.is_empty() { None } else { Some(chats) }; + let (with_meta, debug_source) = opts.request_flags(); let req = Request::Search { keyword, @@ -25,11 +26,11 @@ pub fn cmd_search( since: since_ts, until: until_ts, msg_type: type_val, + with_meta, + debug_source, }; let resp = transport::send(req)?; - let results = resp.data.get("results") - .cloned() - .unwrap_or(serde_json::Value::Array(vec![])); - print_value(&results, &resolve(json)) + emit_warnings(&resp.data); + print_response(&resp.data, &opts) } diff --git a/src/cli/sessions.rs b/src/cli/sessions.rs index 9ccadb8..3b70e63 100644 --- a/src/cli/sessions.rs +++ b/src/cli/sessions.rs @@ -1,12 +1,15 @@ -use anyhow::Result; -use crate::ipc::Request; +use super::output::{emit_warnings, print_response, OutputOpts}; use super::transport; -use super::output::{resolve, print_value}; +use crate::ipc::Request; +use anyhow::Result; -pub fn cmd_sessions(limit: usize, json: bool) -> Result<()> { - let resp = transport::send(Request::Sessions { limit })?; - let data = resp.data.get("sessions") - .cloned() - .unwrap_or(serde_json::Value::Array(vec![])); - print_value(&data, &resolve(json)) +pub fn cmd_sessions(limit: usize, opts: OutputOpts) -> Result<()> { + let (with_meta, debug_source) = opts.request_flags(); + let resp = transport::send(Request::Sessions { + limit, + with_meta, + debug_source, + })?; + emit_warnings(&resp.data); + print_response(&resp.data, &opts) } diff --git a/src/cli/stats.rs b/src/cli/stats.rs index 2e9a293..87dcf4c 100644 --- a/src/cli/stats.rs +++ b/src/cli/stats.rs @@ -1,18 +1,25 @@ -use anyhow::Result; -use crate::ipc::Request; -use super::transport; use super::history::{parse_time, parse_time_end}; -use super::output::{resolve, print_value}; +use super::output::{emit_warnings, print_response, OutputOpts}; +use super::transport; +use crate::ipc::Request; +use anyhow::Result; pub fn cmd_stats( chat: String, since: Option, until: Option, - json: bool, + opts: OutputOpts, ) -> Result<()> { let since_ts = since.as_deref().map(parse_time).transpose()?; let until_ts = until.as_deref().map(parse_time_end).transpose()?; - - let resp = transport::send(Request::Stats { chat, since: since_ts, until: until_ts })?; - print_value(&resp.data, &resolve(json)) + let (with_meta, debug_source) = opts.request_flags(); + let resp = transport::send(Request::Stats { + chat, + since: since_ts, + until: until_ts, + with_meta, + debug_source, + })?; + emit_warnings(&resp.data); + print_response(&resp.data, &opts) } diff --git a/src/cli/unread.rs b/src/cli/unread.rs index 031700c..fcc4235 100644 --- a/src/cli/unread.rs +++ b/src/cli/unread.rs @@ -1,18 +1,22 @@ -use anyhow::Result; -use crate::ipc::Request; +use super::output::{emit_warnings, print_response, OutputOpts}; use super::transport; -use super::output::{resolve, print_value}; +use crate::ipc::Request; +use anyhow::Result; -pub fn cmd_unread(limit: usize, filter: Vec, json: bool) -> Result<()> { +pub fn cmd_unread(limit: usize, filter: Vec, opts: OutputOpts) -> Result<()> { // 空或含 "all" 视为不过滤;其他值已被 clap value_parser 验证过,直接透传给 daemon。 let filter_vec = if filter.is_empty() || filter.iter().any(|s| s == "all") { None } else { Some(filter) }; - let resp = transport::send(Request::Unread { limit, filter: filter_vec })?; - let data = resp.data.get("sessions") - .cloned() - .unwrap_or(serde_json::Value::Array(vec![])); - print_value(&data, &resolve(json)) + let (with_meta, debug_source) = opts.request_flags(); + let resp = transport::send(Request::Unread { + limit, + filter: filter_vec, + with_meta, + debug_source, + })?; + emit_warnings(&resp.data); + print_response(&resp.data, &opts) } diff --git a/src/daemon/cache.rs b/src/daemon/cache.rs index d6e5892..561df51 100644 --- a/src/daemon/cache.rs +++ b/src/daemon/cache.rs @@ -23,6 +23,40 @@ struct CacheEntry { decrypted_path: PathBuf, } +/// `DbCache::get_with_mode()` 本次解析 rel_key 时实际走了哪条路径。 +/// +/// latency tier: +/// - `CacheHit`:~0ms,只返回已有解密产物 +/// - `WalIncremental`:典型 <10s,只在 cached DB 上增量 apply WAL +/// - `FullDecrypt`:最慢路径,大库上可能到 ~120s +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CacheMode { + /// Path 1:主 `.db` 和 WAL 都没变,直接命中缓存。 + CacheHit, + /// Path 2:主 `.db` 没变、只有 WAL 变了,在 cached DB 上增量 apply。 + WalIncremental, + /// Path 3:主 `.db` 变了或缓存 miss,重新 full decrypt。 + FullDecrypt, +} + +impl CacheMode { + /// 手工固定为 snake_case 字符串,避免未来给 enum 直接 derive `Serialize` + /// 时静默改变 wire 形态。 + pub fn as_str(self) -> &'static str { + match self { + CacheMode::CacheHit => "cache_hit", + CacheMode::WalIncremental => "wal_incremental", + CacheMode::FullDecrypt => "full_decrypt", + } + } +} + +#[derive(Debug, Clone)] +pub struct CacheResolve { + pub path: PathBuf, + pub mode: CacheMode, +} + /// 解密后数据库的 mtime-aware 缓存 /// /// 当数据库文件(.db)或 WAL 文件(.db-wal)的 mtime 发生变化时, @@ -36,10 +70,7 @@ pub struct DbCache { } impl DbCache { - pub async fn new( - db_dir: PathBuf, - all_keys: HashMap, - ) -> Result { + pub async fn new(db_dir: PathBuf, all_keys: HashMap) -> Result { Self::with_dirs(db_dir, config::cache_dir(), config::mtime_file(), all_keys).await } @@ -94,23 +125,34 @@ impl DbCache { if !dec_path.exists() { continue; } - let db_path = self.db_dir.join(rel_key.replace('\\', std::path::MAIN_SEPARATOR_STR).replace('/', std::path::MAIN_SEPARATOR_STR)); + let db_path = self.db_dir.join( + rel_key + .replace('\\', std::path::MAIN_SEPARATOR_STR) + .replace('/', std::path::MAIN_SEPARATOR_STR), + ); let wal_path = wal_path_for(&db_path); let db_mt = mtime_nanos(&db_path); - let _wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; + let _wal_mt = if wal_path.exists() { + mtime_nanos(&wal_path) + } else { + 0 + }; // 只要主 .db 没变,就把 cached 产物载回来。 // 如果 WAL mtime 变了,后续 `get()` 会自动走 Path 2:在已有 cached DB 上增量 apply_wal, // 而不是 daemon 重启后第一条请求又退回全量解密。 if db_mt == entry.db_mt { - inner.insert(rel_key.clone(), CacheEntry { - db_mtime: db_mt, - // 保留"cached 产物构建时看到的 wal_mtime",让 `get()` 去比较当前 WAL - // 是否发生了变化,从而决定 exact-hit 还是 WAL 增量。 - wal_mtime: entry.wal_mt, - decrypted_path: dec_path, - }); + inner.insert( + rel_key.clone(), + CacheEntry { + db_mtime: db_mt, + // 保留"cached 产物构建时看到的 wal_mtime",让 `get()` 去比较当前 WAL + // 是否发生了变化,从而决定 exact-hit 还是 WAL 增量。 + wal_mtime: entry.wal_mt, + decrypted_path: dec_path, + }, + ); reused += 1; } } @@ -123,13 +165,19 @@ impl DbCache { async fn save_persistent(&self) { let mtime_file = &self.mtime_file; let inner = self.inner.lock().await; - let data: HashMap = inner.iter().map(|(k, v)| { - (k.clone(), MtimeEntry { - db_mt: v.db_mtime, - wal_mt: v.wal_mtime, - path: v.decrypted_path.to_string_lossy().into_owned(), + let data: HashMap = inner + .iter() + .map(|(k, v)| { + ( + k.clone(), + MtimeEntry { + db_mt: v.db_mtime, + wal_mt: v.wal_mtime, + path: v.decrypted_path.to_string_lossy().into_owned(), + }, + ) }) - }).collect(); + .collect(); drop(inner); if let Ok(json) = serde_json::to_string_pretty(&data) { @@ -148,14 +196,19 @@ impl DbCache { /// WeChat 在写消息时只 append WAL(除非触发 checkpoint),因此 path 2 是常态; /// 这条路径把"每次请求都全量解密 ~1.8GB DB(~120s)"压到"只解 WAL 帧(典型 < 10s)"。 pub async fn get(&self, rel_key: &str) -> Result> { + Ok(self.get_with_mode(rel_key).await?.map(|r| r.path)) + } + + pub async fn get_with_mode(&self, rel_key: &str) -> Result> { let enc_key_hex = match self.all_keys.get(rel_key) { Some(k) => k.clone(), None => return Ok(None), }; let db_path = self.db_dir.join( - rel_key.replace('\\', std::path::MAIN_SEPARATOR_STR) - .replace('/', std::path::MAIN_SEPARATOR_STR) + rel_key + .replace('\\', std::path::MAIN_SEPARATOR_STR) + .replace('/', std::path::MAIN_SEPARATOR_STR), ); if !db_path.exists() { return Ok(None); @@ -163,21 +216,28 @@ impl DbCache { let wal_path = wal_path_for(&db_path); let db_mt = mtime_nanos(&db_path); - let wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; + let wal_mt = if wal_path.exists() { + mtime_nanos(&wal_path) + } else { + 0 + }; let cached = { let inner = self.inner.lock().await; inner.get(rel_key).cloned() }; - let enc_key_bytes = hex_to_32bytes(&enc_key_hex) - .with_context(|| format!("密钥格式错误: {}", rel_key))?; + let enc_key_bytes = + hex_to_32bytes(&enc_key_hex).with_context(|| format!("密钥格式错误: {}", rel_key))?; // Path 1 / Path 2:主 .db mtime 未变且 cached 产物仍在 if let Some(entry) = cached.as_ref() { if entry.db_mtime == db_mt && entry.decrypted_path.exists() { if entry.wal_mtime == wal_mt { - return Ok(Some(entry.decrypted_path.clone())); + return Ok(Some(CacheResolve { + path: entry.decrypted_path.clone(), + mode: CacheMode::CacheHit, + })); } // Path 2: WAL-only 变化 → 在 cached 产物上重新 apply_wal @@ -190,20 +250,31 @@ impl DbCache { let key_copy = enc_key_bytes; tokio::task::spawn_blocking(move || { wal::apply_wal(&wal_path2, &out_path2, &key_copy) - }).await??; + }) + .await??; } - eprintln!("[cache] WAL 增量 {} ({}ms)", rel_key, t0.elapsed().as_millis()); + eprintln!( + "[cache] WAL 增量 {} ({}ms)", + rel_key, + t0.elapsed().as_millis() + ); { let mut inner = self.inner.lock().await; - inner.insert(rel_key.to_string(), CacheEntry { - db_mtime: db_mt, - wal_mtime: wal_mt, - decrypted_path: out_path.clone(), - }); + inner.insert( + rel_key.to_string(), + CacheEntry { + db_mtime: db_mt, + wal_mtime: wal_mt, + decrypted_path: out_path.clone(), + }, + ); } self.save_persistent().await; - return Ok(Some(out_path)); + return Ok(Some(CacheResolve { + path: out_path, + mode: CacheMode::WalIncremental, + })); } } @@ -213,39 +284,51 @@ impl DbCache { let db_path2 = db_path.clone(); let out_path2 = out_path.clone(); let key_copy = enc_key_bytes; - tokio::task::spawn_blocking(move || { - crypto::full_decrypt(&db_path2, &out_path2, &key_copy) - }).await??; + tokio::task::spawn_blocking(move || crypto::full_decrypt(&db_path2, &out_path2, &key_copy)) + .await??; if wal_path.exists() { let out_path3 = out_path.clone(); let wal_path3 = wal_path.clone(); let key_copy2 = enc_key_bytes; - tokio::task::spawn_blocking(move || { - wal::apply_wal(&wal_path3, &out_path3, &key_copy2) - }).await??; + tokio::task::spawn_blocking(move || wal::apply_wal(&wal_path3, &out_path3, &key_copy2)) + .await??; } - eprintln!("[cache] 全量解密 {} ({}ms)", rel_key, t0.elapsed().as_millis()); + eprintln!( + "[cache] 全量解密 {} ({}ms)", + rel_key, + t0.elapsed().as_millis() + ); { let mut inner = self.inner.lock().await; - inner.insert(rel_key.to_string(), CacheEntry { - db_mtime: db_mt, - wal_mtime: wal_mt, - decrypted_path: out_path.clone(), - }); + inner.insert( + rel_key.to_string(), + CacheEntry { + db_mtime: db_mt, + wal_mtime: wal_mt, + decrypted_path: out_path.clone(), + }, + ); } self.save_persistent().await; - Ok(Some(out_path)) + Ok(Some(CacheResolve { + path: out_path, + mode: CacheMode::FullDecrypt, + })) } } pub(super) fn mtime_nanos(path: &Path) -> u64 { std::fs::metadata(path) .and_then(|m| m.modified()) - .map(|t| t.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_nanos() as u64) + .map(|t| { + t.duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64 + }) .unwrap_or(0) } @@ -273,8 +356,7 @@ mod tests { use super::*; /// 64 字符 hex(不需要是真 SQLCipher key — 仅用来证明"是否触发了 full_decrypt") - const FAKE_KEY_HEX: &str = - "0000000000000000000000000000000000000000000000000000000000000000"; + const FAKE_KEY_HEX: &str = "0000000000000000000000000000000000000000000000000000000000000000"; /// 路径区分约定: /// - 完全 hit / WAL 增量 → `decrypted_path` **内容不变** @@ -337,7 +419,11 @@ mod tests { let (cache, _db_path, decrypted_path, _mtime_file, rel_key) = setup_seeded_cache("exact").await; - let p = cache.get(&rel_key).await.unwrap().expect("cache should hit"); + let p = cache + .get(&rel_key) + .await + .unwrap() + .expect("cache should hit"); assert_eq!(p, decrypted_path); // 完全 hit → cached file 内容不应被改 @@ -387,7 +473,10 @@ mod tests { // 第一次:完全 hit let p1 = cache.get(&rel_key).await.unwrap().expect("first get hits"); assert_eq!(p1, decrypted_path); - assert_eq!(std::fs::read(&decrypted_path).unwrap(), ORIGINAL_CACHED_BYTES); + assert_eq!( + std::fs::read(&decrypted_path).unwrap(), + ORIGINAL_CACHED_BYTES + ); // bump WAL mtime(重写仍 31 bytes,apply_wal 仍 noop) std::thread::sleep(std::time::Duration::from_millis(20)); @@ -442,6 +531,72 @@ mod tests { ); } + #[tokio::test] + async fn get_with_mode_reports_each_path() { + let root = unique_tmpdir("getwithmode"); + let db_dir = root.join("db_storage"); + let cache_dir = root.join("cache"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let rel_key = "message_0.db".to_string(); + let db_path = db_dir.join(&rel_key); + std::fs::write(&db_path, b"fake encrypted db").unwrap(); + let wal_path = wal_path_for(&db_path); + std::fs::write(&wal_path, [0u8; 31]).unwrap(); + + let cached_hash = format!("{:x}", md5::compute(rel_key.as_bytes())); + let decrypted_path = cache_dir.join(format!("{}.db", cached_hash)); + std::fs::write(&decrypted_path, ORIGINAL_CACHED_BYTES).unwrap(); + + let db_mt = mtime_nanos(&db_path); + let wal_mt0 = mtime_nanos(&wal_path); + let mtime_file = cache_dir.join("_mtimes.json"); + let payload = serde_json::to_string(&serde_json::json!({ + &rel_key: { + "db_mt": db_mt, + "wal_mt": wal_mt0, + "path": decrypted_path.display().to_string(), + } + })) + .unwrap(); + std::fs::write(&mtime_file, payload).unwrap(); + + let mut all_keys = HashMap::new(); + all_keys.insert(rel_key.clone(), FAKE_KEY_HEX.to_string()); + let cache = DbCache::with_dirs(db_dir, cache_dir, mtime_file, all_keys) + .await + .unwrap(); + + let hit = cache + .get_with_mode(&rel_key) + .await + .unwrap() + .expect("cache should hit"); + assert_eq!(hit.path, decrypted_path); + assert_eq!(hit.mode, CacheMode::CacheHit); + + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&wal_path, [0xffu8; 31]).unwrap(); + let wal = cache + .get_with_mode(&rel_key) + .await + .unwrap() + .expect("WAL-only change should stay incremental"); + assert_eq!(wal.path, decrypted_path); + assert_eq!(wal.mode, CacheMode::WalIncremental); + + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&db_path, b"different bytes").unwrap(); + let full = cache + .get_with_mode(&rel_key) + .await + .unwrap() + .expect("db mtime change should trigger full decrypt"); + assert_eq!(full.path, decrypted_path); + assert_eq!(full.mode, CacheMode::FullDecrypt); + } + #[tokio::test] async fn restart_with_wal_change_still_reuses_cached_db_then_applies_wal() { let root = unique_tmpdir("restart-wal"); @@ -486,7 +641,11 @@ mod tests { .await .unwrap(); - let p = cache.get(&rel_key).await.unwrap().expect("cache should reuse persisted DB"); + let p = cache + .get(&rel_key) + .await + .unwrap() + .expect("cache should reuse persisted DB"); assert_eq!(p, decrypted_path); let body = std::fs::read(&decrypted_path).unwrap(); assert_eq!( diff --git a/src/daemon/meta.rs b/src/daemon/meta.rs new file mode 100644 index 0000000..1db37f5 --- /dev/null +++ b/src/daemon/meta.rs @@ -0,0 +1,269 @@ +//! Freshness metadata appended to every q_* response. +//! +//! 背景:`all_keys.json` 是 `wx init` 时的快照。WeChat 在 daemon 启动后随时可能创建 +//! 新的 `message_N.db` 分片;如果只信任 init 时收到的 `msg_db_keys` 列表,新分片里 +//! 的数据对 daemon 完全不可见 → 调用方拿到的是看似正常但缺数据的结果("stale")。 +//! +//! 本模块的职责: +//! 1. 提供 `Meta` 结构体,由各 `q_*` 函数填充后塞进 response(顶层 `meta` 字段)。 +//! 2. 提供 `discover_unknown_shards(db_dir, msg_db_keys)`:扫描磁盘上当前真实存在的 +//! `message/message_*.db` 文件,diff 出 daemon 未持有 enc_key 的"未知分片"列表。 +//! 3. 集中 `MetaStatus` 的判定规则,避免 8 个 q_* 各自判,规则漂移。 + +use serde::Serialize; +use std::collections::HashMap; +use std::path::Path; + +/// 每条 q_* 响应附带的"新鲜度元数据"。 +/// +/// 序列化为 JSON 时,所有 `Option` 字段在 `None` 时省略,让最常见的命令调用 +/// 输出尽量短;重负载字段(per_shard_*、shard_paths)默认不填,由 CLI 层 +/// 通过 `--debug-source` 等开关显式请求时才放进来。 +#[derive(Debug, Clone, Serialize, Default)] +pub struct Meta { + /// 命中数据中最新一条的 create_time(unix 秒)。 + /// `q_history` / `q_search` / `q_new_messages` 等基于 Msg_ 表的查询都应填。 + /// `q_sessions` / `q_unread` 这类基于 SessionTable 的查询填会话维度的最新 ts。 + #[serde(skip_serializing_if = "Option::is_none")] + pub chat_latest_timestamp: Option, + + /// 上面那条最新消息所在的分片 rel_key(`message/message_3.db`)。 + /// 让 agent 一眼看出"当前命中的最新数据来自哪个分片"。 + #[serde(skip_serializing_if = "Option::is_none")] + pub chat_latest_db: Option, + + /// 该 chat 在 `session.db.SessionTable.last_timestamp` 里的值(如果可读)。 + /// 这是 WeChat 自己写的"最近一条消息时间",与上面 `chat_latest_timestamp` 比较 + /// 即可发现"session 说有更新但 history 没读到" → 漏分片。 + #[serde(skip_serializing_if = "Option::is_none")] + pub session_last_timestamp: Option, + + /// 本次查询实际遍历的分片数(即 `names.msg_db_keys.len()` 的子集;包括命中 0 行的)。 + pub shards_scanned: usize, + + /// 本次查询里至少返回了 1 行的分片数。 + pub shards_hit: usize, + + /// 磁盘上存在但 daemon 没有 enc_key 的分片 rel_key 列表。 + /// 非空 ⇒ `wx init` 之后 WeChat 又分裂了新分片 → 必须重跑 `wx init`。 + pub unknown_shards: Vec, + + /// 由上述字段派生出的总体状态,CLI / agent 主要看这一个。 + pub status: MetaStatus, + + // 重负载/调试字段:默认不填,CLI 层显式开启 + #[serde(skip_serializing_if = "Option::is_none")] + pub per_shard_latest: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub cache_mode_per_shard: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub shard_paths: Option>, +} + +#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum MetaStatus { + #[default] + Ok, + /// `session.db` 的最新时间明显领先于本次消息查询结果,说明数据可能过期或不完整。 + PossiblyStale, + /// 最强信号:磁盘上出现 daemon 不认识的新分片,通常必须重跑 `wx init --force`。 + PossiblyStaleUnknownShards, + /// 调用方主动传了 `since` / `until` / `offset` 等窗口条件,结果天然是局部视图。 + Windowed, +} + +/// session 领先 history 多少秒就报 `PossiblyStale`。 +/// +/// 24h 的取值是故意保守的:活跃群聊/私聊很少会整整一天没有新消息, +/// 超过这个窗口就值得显式提醒 agent 不要把结果当成“当前最新状态”。 +pub const STALE_THRESHOLD_SECS: i64 = 24 * 3600; + +/// 统一 freshness status 的优先级: +/// 1. `unknown_shards` 非空:daemon 整体视图已经过期,优先返回 `PossiblyStaleUnknownShards` +/// 2. `windowed=true`:调用方本来就在看局部窗口,不参与 stale 推导 +/// 3. `session_last - chat_latest > STALE_THRESHOLD_SECS`:返回 `PossiblyStale` +/// 4. 其他情况:`Ok` +pub fn derive_status( + chat_latest: Option, + session_last: Option, + unknown_shards: &[String], + windowed: bool, +) -> MetaStatus { + if !unknown_shards.is_empty() { + return MetaStatus::PossiblyStaleUnknownShards; + } + if windowed { + return MetaStatus::Windowed; + } + match (chat_latest, session_last) { + (Some(c), Some(s)) if s - c > STALE_THRESHOLD_SECS => MetaStatus::PossiblyStale, + _ => MetaStatus::Ok, + } +} + +/// 扫描 `/message/` 下真实存在的 `message_*.db`,diff 出 daemon 当前没有 key +/// 的未知分片。 +/// +/// 契约: +/// - 返回值一律是 `/` 分隔的 rel_key(如 `message/message_3.db`),与 `all_keys.json` 对齐 +/// - 结果按字典序排序,方便测试和 CLI 稳定显示 +/// - 排除 `_fts*` / `_resource*`,因为它们是索引/附件库,不属于消息分片真相 +pub fn discover_unknown_shards(db_dir: &Path, known: &[String]) -> Vec { + let known_set: std::collections::HashSet = + known.iter().map(|k| k.replace('\\', "/")).collect(); + + let msg_dir = db_dir.join("message"); + let entries = match std::fs::read_dir(&msg_dir) { + Ok(it) => it, + Err(_) => return Vec::new(), + }; + + let mut unknown: Vec = Vec::new(); + for entry in entries.flatten() { + let name = entry.file_name(); + let Some(name_str) = name.to_str() else { + continue; + }; + if !is_message_shard(name_str) { + continue; + } + let rel = format!("message/{}", name_str); + if !known_set.contains(&rel) { + unknown.push(rel); + } + } + unknown.sort(); + unknown +} + +fn is_message_shard(file_name: &str) -> bool { + if !file_name.starts_with("message_") || !file_name.ends_with(".db") { + return false; + } + if file_name.contains("_fts") || file_name.contains("_resource") { + return false; + } + let stem = &file_name["message_".len()..file_name.len() - ".db".len()]; + !stem.is_empty() && stem.chars().all(|c| c.is_ascii_digit()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn is_message_shard_accepts_normal_shards() { + assert!(is_message_shard("message_0.db")); + assert!(is_message_shard("message_12.db")); + } + + #[test] + fn is_message_shard_rejects_fts_and_resource() { + assert!(!is_message_shard("message_0_fts.db")); + assert!(!is_message_shard("message_fts.db")); + assert!(!is_message_shard("message_0_resource.db")); + assert!(!is_message_shard("message_resource.db")); + } + + #[test] + fn is_message_shard_rejects_non_digits() { + assert!(!is_message_shard("message_a.db")); + assert!(!is_message_shard("message_.db")); + assert!(!is_message_shard("session.db")); + assert!(!is_message_shard("message_0.db.bak")); + } + + #[test] + fn discover_unknown_shards_finds_disk_only_shards() { + let dir = tempdir(); + let msg_dir = dir.join("message"); + std::fs::create_dir_all(&msg_dir).unwrap(); + for f in [ + "message_0.db", + "message_1.db", + "message_2.db", + "message_0_fts.db", + ] { + std::fs::write(msg_dir.join(f), b"").unwrap(); + } + let known = vec![ + "message/message_0.db".to_string(), + "message/message_1.db".to_string(), + ]; + let unknown = discover_unknown_shards(&dir, &known); + assert_eq!(unknown, vec!["message/message_2.db".to_string()]); + } + + #[test] + fn discover_unknown_shards_normalizes_backslash_in_known_keys() { + let dir = tempdir(); + let msg_dir = dir.join("message"); + std::fs::create_dir_all(&msg_dir).unwrap(); + std::fs::write(msg_dir.join("message_0.db"), b"").unwrap(); + + let known = vec!["message\\message_0.db".to_string()]; + assert!(discover_unknown_shards(&dir, &known).is_empty()); + } + + #[test] + fn discover_unknown_shards_returns_empty_when_message_dir_missing() { + let dir = tempdir(); + assert!(discover_unknown_shards(&dir, &[]).is_empty()); + } + + #[test] + fn derive_status_unknown_shards_overrides_windowed() { + let unknown = vec!["message/message_3.db".to_string()]; + assert_eq!( + derive_status(Some(100), Some(100), &unknown, true), + MetaStatus::PossiblyStaleUnknownShards + ); + } + + #[test] + fn derive_status_windowed_when_user_paginates() { + assert_eq!( + derive_status(Some(100), Some(999_999), &[], true), + MetaStatus::Windowed, + ); + } + + #[test] + fn derive_status_possibly_stale_when_session_far_ahead() { + let chat = Some(1_000_000); + let session = Some(1_000_000 + STALE_THRESHOLD_SECS + 1); + assert_eq!( + derive_status(chat, session, &[], false), + MetaStatus::PossiblyStale + ); + } + + #[test] + fn derive_status_ok_when_within_threshold() { + let chat = Some(1_000_000); + let session = Some(1_000_000 + STALE_THRESHOLD_SECS - 1); + assert_eq!(derive_status(chat, session, &[], false), MetaStatus::Ok); + } + + #[test] + fn derive_status_ok_when_either_side_unknown() { + assert_eq!( + derive_status(None, Some(999_999_999), &[], false), + MetaStatus::Ok + ); + assert_eq!(derive_status(Some(1), None, &[], false), MetaStatus::Ok); + assert_eq!(derive_status(None, None, &[], false), MetaStatus::Ok); + } + + fn tempdir() -> std::path::PathBuf { + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let p = std::env::temp_dir().join(format!("wx-cli-meta-test-{}-{}", pid, nanos)); + std::fs::create_dir_all(&p).unwrap(); + p + } +} diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs index b4a34c3..6503134 100644 --- a/src/daemon/mod.rs +++ b/src/daemon/mod.rs @@ -1,4 +1,5 @@ pub mod cache; +pub mod meta; pub mod query; pub mod server; diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 634ff2d..30f755f 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -7,7 +7,8 @@ use serde_json::{json, Value}; use std::collections::{HashMap, HashSet}; use std::sync::{Arc, OnceLock}; -use super::cache::DbCache; +use super::cache::{CacheMode, DbCache}; +use super::meta::{derive_status, discover_unknown_shards, Meta}; /// 静态编译的 Msg 表名正则,避免在热路径中重复编译 fn msg_table_re() -> &'static Regex { @@ -58,9 +59,21 @@ pub struct Names { pub verify_flags: HashMap, } +#[derive(Debug, Clone)] +struct MessageShard { + rel_key: String, + path: std::path::PathBuf, + table: String, + max_ts: i64, + cache_mode: CacheMode, +} + impl Names { pub fn display(&self, username: &str) -> String { - self.map.get(username).cloned().unwrap_or_else(|| username.to_string()) + self.map + .get(username) + .cloned() + .unwrap_or_else(|| username.to_string()) } /// 是否被微信官方标了认证/服务号 flag。未在 contact 表中的 username 返回 false。 @@ -69,6 +82,146 @@ impl Names { } } +fn current_unknown_shards(db: &DbCache, names: &Names) -> Vec { + discover_unknown_shards(db.db_dir(), &names.msg_db_keys) +} + +fn meta_for_shards( + scanned: usize, + shards: &[MessageShard], + shard_hits: usize, + unknown_shards: Vec, + session_last_timestamp: Option, + windowed: bool, + with_meta: bool, + debug_source: bool, +) -> Meta { + let latest = shards.first(); + let chat_latest_timestamp = latest.map(|s| s.max_ts); + Meta { + chat_latest_timestamp, + chat_latest_db: latest.map(|s| s.rel_key.clone()), + session_last_timestamp, + shards_scanned: scanned, + shards_hit: shard_hits, + unknown_shards: unknown_shards.clone(), + status: derive_status( + chat_latest_timestamp, + session_last_timestamp, + &unknown_shards, + windowed, + ), + per_shard_latest: if with_meta || debug_source { + Some( + shards + .iter() + .map(|s| (s.rel_key.clone(), s.max_ts)) + .collect(), + ) + } else { + None + }, + cache_mode_per_shard: if with_meta || debug_source { + Some( + shards + .iter() + .map(|s| (s.rel_key.clone(), s.cache_mode.as_str().to_string())) + .collect(), + ) + } else { + None + }, + shard_paths: if debug_source { + Some( + shards + .iter() + .map(|s| (s.rel_key.clone(), s.path.to_string_lossy().into_owned())) + .collect(), + ) + } else { + None + }, + } +} + +fn meta_for_global_query( + scanned: usize, + hit: usize, + unknown_shards: Vec, + windowed: bool, + with_meta: bool, + debug_source: bool, + cache_modes: Option>, + shard_paths: Option>, +) -> Meta { + Meta { + chat_latest_timestamp: None, + chat_latest_db: None, + session_last_timestamp: None, + shards_scanned: scanned, + shards_hit: hit, + unknown_shards: unknown_shards.clone(), + status: derive_status(None, None, &unknown_shards, windowed), + per_shard_latest: if with_meta || debug_source { + Some(HashMap::new()) + } else { + None + }, + cache_mode_per_shard: if with_meta || debug_source { + cache_modes + } else { + None + }, + shard_paths: if debug_source { shard_paths } else { None }, + } +} + +async fn session_last_timestamp(db: &DbCache, username: &str) -> Option { + let path = match db.get("session/session.db").await { + Ok(Some(path)) => path, + Ok(None) => return None, + Err(e) => { + eprintln!( + "[freshness] skip session_last_timestamp {}: {}", + username, e + ); + return None; + } + }; + + let username = username.to_string(); + let username_for_query = username.clone(); + match tokio::task::spawn_blocking(move || -> Result> { + let conn = Connection::open(&path)?; + let ts = conn + .query_row( + "SELECT last_timestamp FROM SessionTable WHERE username = ?", + [&username_for_query], + |row| row.get::<_, i64>(0), + ) + .ok(); + Ok(ts) + }) + .await + { + Ok(Ok(ts)) => ts, + Ok(Err(e)) => { + eprintln!( + "[freshness] skip session_last_timestamp {}: {}", + username, e + ); + None + } + Err(e) => { + eprintln!( + "[freshness] task error session_last_timestamp {}: {}", + username, e + ); + None + } + } +} + /// 加载联系人缓存(从 contact/contact.db) pub async fn load_names(db: &DbCache) -> Result { let path = db.get("contact/contact.db").await?; @@ -78,67 +231,89 @@ pub async fn load_names(db: &DbCache) -> Result { let p2 = p.clone(); let rows: Vec<(String, String, String, i64)> = tokio::task::spawn_blocking(move || { let conn = Connection::open(&p2).context("打开 contact.db 失败")?; - let mut stmt = conn.prepare( - "SELECT username, nick_name, remark, verify_flag FROM contact" - )?; - let rows = stmt.query_map([], |row| { - Ok(( - row.get::<_, String>(0)?, - row.get::<_, String>(1).unwrap_or_default(), - row.get::<_, String>(2).unwrap_or_default(), - row.get::<_, i64>(3).unwrap_or(0), - )) - })? - .collect::>>()?; + let mut stmt = + conn.prepare("SELECT username, nick_name, remark, verify_flag FROM contact")?; + let rows = stmt + .query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1).unwrap_or_default(), + row.get::<_, String>(2).unwrap_or_default(), + row.get::<_, i64>(3).unwrap_or(0), + )) + })? + .collect::>>()?; Ok::<_, anyhow::Error>(rows) - }).await??; + }) + .await??; for (uname, nick, remark, vf) in rows { - let display = if !remark.is_empty() { remark } - else if !nick.is_empty() { nick } - else { uname.clone() }; + let display = if !remark.is_empty() { + remark + } else if !nick.is_empty() { + nick + } else { + uname.clone() + }; verify_flags.insert(uname.clone(), vf); map.insert(uname, display); } } - let md5_to_uname: HashMap = map.keys() + let md5_to_uname: HashMap = map + .keys() .map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone())) .collect(); - Ok(Names { map, md5_to_uname, msg_db_keys: Vec::new(), verify_flags }) + Ok(Names { + map, + md5_to_uname, + msg_db_keys: Vec::new(), + verify_flags, + }) } /// 查询最近会话列表 -pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result { - let path = db.get("session/session.db").await? +pub async fn q_sessions( + db: &DbCache, + names: &Names, + limit: usize, + with_meta: bool, + debug_source: bool, +) -> Result { + let path = db + .get("session/session.db") + .await? .context("无法解密 session.db")?; let path2 = path.clone(); let limit_val = limit; - let rows: Vec<(String, i64, Vec, i64, i64, String, String)> = tokio::task::spawn_blocking(move || { - let conn = Connection::open(&path2)?; - let mut stmt = conn.prepare( - "SELECT username, unread_count, summary, last_timestamp, + let rows: Vec<(String, i64, Vec, i64, i64, String, String)> = + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&path2)?; + let mut stmt = conn.prepare( + "SELECT username, unread_count, summary, last_timestamp, last_msg_type, last_msg_sender, last_sender_display_name FROM SessionTable WHERE last_timestamp > 0 - ORDER BY last_timestamp DESC LIMIT ?" - )?; - let rows = stmt.query_map([limit_val as i64], |row| { - Ok(( - row.get::<_, String>(0)?, - row.get::<_, i64>(1).unwrap_or(0), - get_content_bytes(row, 2), - row.get::<_, i64>(3).unwrap_or(0), - row.get::<_, i64>(4).unwrap_or(0), - row.get::<_, String>(5).unwrap_or_default(), - row.get::<_, String>(6).unwrap_or_default(), - )) - })? - .collect::>>()?; - Ok::<_, anyhow::Error>(rows) - }).await??; + ORDER BY last_timestamp DESC LIMIT ?", + )?; + let rows = stmt + .query_map([limit_val as i64], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, i64>(1).unwrap_or(0), + get_content_bytes(row, 2), + row.get::<_, i64>(3).unwrap_or(0), + row.get::<_, i64>(4).unwrap_or(0), + row.get::<_, String>(5).unwrap_or_default(), + row.get::<_, String>(6).unwrap_or_default(), + )) + })? + .collect::>>()?; + Ok::<_, anyhow::Error>(rows) + }) + .await??; let mut results = Vec::new(); let mut group_nickname_cache: HashMap> = HashMap::new(); @@ -153,7 +328,9 @@ pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result Result, until: Option, msg_type: Option, + with_meta: bool, + debug_source: bool, ) -> Result { - let username = resolve_username(chat, names) - .with_context(|| format!("找不到联系人: {}", chat))?; + let username = + resolve_username(chat, names).with_context(|| format!("找不到联系人: {}", chat))?; let display = names.display(&username); let chat_type = chat_type_of(&username, names); let is_group = chat_type == "group"; - let tables = find_msg_tables(db, names, &username).await?; - if tables.is_empty() { + let (shards, scanned) = find_msg_shards(db, names, &username).await?; + if shards.is_empty() { anyhow::bail!("找不到 {} 的消息记录", display); } let mut all_msgs: Vec = Vec::new(); + let mut shard_hits = 0usize; let group_nicknames = if is_group { - load_group_nicknames(db, &username).await.unwrap_or_default() + load_group_nicknames(db, &username) + .await + .unwrap_or_default() } else { HashMap::new() }; - for (db_path, table_name) in &tables { - let path = db_path.clone(); - let tname = table_name.clone(); + for shard in &shards { + let path = shard.path.clone(); + let tname = shard.table.clone(); let uname = username.clone(); let is_group2 = is_group; let names_map = names.map.clone(); @@ -222,9 +425,25 @@ pub async fn q_history( let msgs: Vec = tokio::task::spawn_blocking(move || { // per-DB 软上限:offset + limit 已足够全局分页,避免大群全量加载 let per_db_cap = offset2 + limit2; - query_messages(&path, &tname, &uname, is_group2, &names_map, &group_nicknames2, since2, until2, msg_type, per_db_cap, 0) - }).await??; + query_messages( + &path, + &tname, + &uname, + is_group2, + &names_map, + &group_nicknames2, + since2, + until2, + msg_type, + per_db_cap, + 0, + ) + }) + .await??; + if !msgs.is_empty() { + shard_hits += 1; + } all_msgs.extend(msgs); } @@ -232,6 +451,19 @@ pub async fn q_history( let paged: Vec = all_msgs.into_iter().skip(offset).take(limit).collect(); let mut paged = paged; paged.sort_by_key(|m| m["timestamp"].as_i64().unwrap_or(0)); + let windowed = offset > 0 || since.is_some() || until.is_some() || msg_type.is_some(); + let unknown_shards = current_unknown_shards(db, names); + let session_ts = session_last_timestamp(db, &username).await; + let meta = meta_for_shards( + scanned, + &shards, + shard_hits, + unknown_shards, + session_ts, + windowed, + with_meta, + debug_source, + ); Ok(json!({ "chat": display, @@ -240,6 +472,7 @@ pub async fn q_history( "chat_type": chat_type, "count": paged.len(), "messages": paged, + "meta": meta, })) } @@ -253,64 +486,103 @@ pub async fn q_search( since: Option, until: Option, msg_type: Option, + with_meta: bool, + debug_source: bool, ) -> Result { - let mut targets: Vec<(String, String, String, String)> = Vec::new(); // (path, table, display, uname) + let mut targets: Vec<(String, String, String, String, String)> = Vec::new(); // (rel_key, path, table, display, uname) + let mut scanned_rel_keys: HashSet = HashSet::new(); + let mut cache_modes: HashMap = HashMap::new(); + let mut shard_paths: HashMap = HashMap::new(); if let Some(chat_names) = chats { for chat_name in &chat_names { if let Some(uname) = resolve_username(chat_name, names) { - let tables = find_msg_tables(db, names, &uname).await?; - for (p, t) in tables { - targets.push((p.to_string_lossy().into_owned(), t, names.display(&uname), uname.clone())); + let (shards, _) = find_msg_shards(db, names, &uname).await?; + for shard in shards { + scanned_rel_keys.insert(shard.rel_key.clone()); + cache_modes + .insert(shard.rel_key.clone(), shard.cache_mode.as_str().to_string()); + shard_paths.insert( + shard.rel_key.clone(), + shard.path.to_string_lossy().into_owned(), + ); + targets.push(( + shard.rel_key, + shard.path.to_string_lossy().into_owned(), + shard.table, + names.display(&uname), + uname.clone(), + )); } } } } else { // 全局搜索:遍历所有消息 DB for rel_key in &names.msg_db_keys { - let path = match db.get(rel_key).await? { - Some(p) => p, + let resolved = match db.get_with_mode(rel_key).await? { + Some(r) => r, None => continue, }; - let path2 = path.clone(); + scanned_rel_keys.insert(rel_key.clone()); + cache_modes.insert(rel_key.clone(), resolved.mode.as_str().to_string()); + shard_paths.insert( + rel_key.clone(), + resolved.path.to_string_lossy().into_owned(), + ); + let path2 = resolved.path.clone(); let md5_lookup = names.md5_to_uname.clone(); let names_map = names.map.clone(); + let rel_key2 = rel_key.clone(); - let table_targets: Vec<(String, String, String, String)> = match tokio::task::spawn_blocking(move || { - let conn = Connection::open(&path2)?; - let mut stmt = conn.prepare( - "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'" - )?; - let table_names: Vec = stmt.query_map([], |row| row.get(0))? - .filter_map(|r| r.ok()) - .collect(); + let table_targets: Vec<(String, String, String, String, String)> = + match tokio::task::spawn_blocking(move || { + let conn = Connection::open(&path2)?; + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'", + )?; + let table_names: Vec = stmt + .query_map([], |row| row.get(0))? + .filter_map(|r| r.ok()) + .collect(); - let re = msg_table_re(); - let mut result = Vec::new(); - for tname in table_names { - if !re.is_match(&tname) { + let re = msg_table_re(); + let mut result = Vec::new(); + for tname in table_names { + if !re.is_match(&tname) { + continue; + } + let hash = &tname[4..]; + let uname = md5_lookup.get(hash).cloned().unwrap_or_default(); + let display = if uname.is_empty() { + String::new() + } else { + names_map + .get(&uname) + .cloned() + .unwrap_or_else(|| uname.clone()) + }; + result.push(( + rel_key2.clone(), + path2.to_string_lossy().into_owned(), + tname, + display, + uname, + )); + } + Ok::<_, anyhow::Error>(result) + }) + .await + { + Ok(Ok(v)) => v, + Ok(Err(e)) => { + eprintln!("[search] skip DB {}: {}", rel_key, e); continue; } - let hash = &tname[4..]; - let uname = md5_lookup.get(hash).cloned().unwrap_or_default(); - let display = if uname.is_empty() { - String::new() - } else { - names_map.get(&uname).cloned().unwrap_or_else(|| uname.clone()) - }; - result.push(( - path2.to_string_lossy().into_owned(), - tname, - display, - uname, - )); - } - Ok::<_, anyhow::Error>(result) - }).await { - Ok(Ok(v)) => v, - Ok(Err(e)) => { eprintln!("[search] skip DB {}: {}", rel_key, e); continue; } - Err(e) => { eprintln!("[search] task error {}: {}", rel_key, e); continue; } - }; + Err(e) => { + eprintln!("[search] task error {}: {}", rel_key, e); + continue; + } + }; targets.extend(table_targets); } @@ -318,7 +590,9 @@ pub async fn q_search( // 按 db_path 分组 let mut by_path: HashMap> = HashMap::new(); - for (p, t, d, u) in targets { + let mut path_to_rel_key: HashMap = HashMap::new(); + for (rel_key, p, t, d, u) in targets { + path_to_rel_key.insert(p.clone(), rel_key); by_path.entry(p).or_default().push((t, d, u)); } @@ -340,7 +614,8 @@ pub async fn q_search( // 串行 await,活跃账号上 N 个分片要轮 N 次磁盘 IO;现在 JoinSet 把它们一次 // 全部 dispatch 到 blocking pool,整体 latency 退化为单 DB 慢路径。 let kw = keyword.to_string(); - let mut join_set: tokio::task::JoinSet>> = tokio::task::JoinSet::new(); + let mut join_set: tokio::task::JoinSet)>> = + tokio::task::JoinSet::new(); for (db_path, table_list) in by_path { let kw2 = kw.clone(); let since2 = since; @@ -359,32 +634,63 @@ pub async fn q_search( let group_nicknames = group_nicknames_by_chat2 .get(uname) .unwrap_or(&empty_group_nicknames); - match search_in_table(&conn, tname, &uname, is_group, - &names_map2, group_nicknames, &kw2, since2, until2, msg_type, limit2) - { + match search_in_table( + &conn, + tname, + &uname, + is_group, + &names_map2, + group_nicknames, + &kw2, + since2, + until2, + msg_type, + limit2, + ) { Ok(rows) => { for mut row in rows { - if row.get("chat").map(|v| v.as_str().unwrap_or("")).unwrap_or("").is_empty() { + if row + .get("chat") + .map(|v| v.as_str().unwrap_or("")) + .unwrap_or("") + .is_empty() + { if let Some(obj) = row.as_object_mut() { - obj.insert("chat".into(), serde_json::Value::String( - if display.is_empty() { tname.clone() } else { display.clone() } - )); + obj.insert( + "chat".into(), + serde_json::Value::String(if display.is_empty() { + tname.clone() + } else { + display.clone() + }), + ); } } all.push(row); } } - Err(e) => eprintln!("[search] skip table {} (db={}): {}", tname, db_path_for_log, e), + Err(e) => eprintln!( + "[search] skip table {} (db={}): {}", + tname, db_path_for_log, e + ), } } - Ok(all) + Ok((db_path_for_log, all)) }); } let mut results: Vec = Vec::new(); + let mut hit_rel_keys: HashSet = HashSet::new(); while let Some(joined) = join_set.join_next().await { match joined { - Ok(Ok(rows)) => results.extend(rows), + Ok(Ok((db_path, rows))) => { + if !rows.is_empty() { + if let Some(rel_key) = path_to_rel_key.get(&db_path) { + hit_rel_keys.insert(rel_key.clone()); + } + } + results.extend(rows) + } Ok(Err(e)) => eprintln!("[search] skip DB: {}", e), Err(e) => eprintln!("[search] task error: {}", e), } @@ -392,7 +698,20 @@ pub async fn q_search( results.sort_by_key(|r| std::cmp::Reverse(r["timestamp"].as_i64().unwrap_or(0))); let paged: Vec = results.into_iter().take(limit).collect(); - Ok(json!({ "keyword": keyword, "count": paged.len(), "results": paged })) + let unknown_shards = current_unknown_shards(db, names); + // 全局搜索 / keyword 过滤天然是窗口化结果,没有稳定的 chat-level latest baseline, + // 不参与 stale 推导;这里只保留 unknown_shards 这类 daemon 全局健康信号。 + let meta = meta_for_global_query( + scanned_rel_keys.len(), + hit_rel_keys.len(), + unknown_shards, + true, + with_meta, + debug_source, + Some(cache_modes), + Some(shard_paths), + ); + Ok(json!({ "keyword": keyword, "count": paged.len(), "results": paged, "meta": meta })) } /// 查询联系人 @@ -402,7 +721,9 @@ pub async fn q_search( /// 折叠入口(`brandsessionholder` / `@placeholder_foldgroup`)以及微信内部 `@xxx` 系统账号。 /// 这些都不应该出现在 `wx contacts` 输出里,统一走 `chat_type_of` 这条同样的真相判定。 pub async fn q_contacts(names: &Names, query: Option<&str>, limit: usize) -> Result { - let mut contacts: Vec = names.map.iter() + let mut contacts: Vec = names + .map + .iter() .filter(|(u, _)| chat_type_of(u, names) == "private") .map(|(u, d)| json!({ "username": u, "display": d })) .collect(); @@ -410,13 +731,22 @@ pub async fn q_contacts(names: &Names, query: Option<&str>, limit: usize) -> Res if let Some(q) = query { let low = q.to_lowercase(); contacts.retain(|c| { - c["display"].as_str().map(|s| s.to_lowercase().contains(&low)).unwrap_or(false) - || c["username"].as_str().map(|s| s.to_lowercase().contains(&low)).unwrap_or(false) + c["display"] + .as_str() + .map(|s| s.to_lowercase().contains(&low)) + .unwrap_or(false) + || c["username"] + .as_str() + .map(|s| s.to_lowercase().contains(&low)) + .unwrap_or(false) }); } contacts.sort_by(|a, b| { - a["display"].as_str().unwrap_or("").cmp(b["display"].as_str().unwrap_or("")) + a["display"] + .as_str() + .unwrap_or("") + .cmp(b["display"].as_str().unwrap_or("")) }); let total = contacts.len(); @@ -435,7 +765,9 @@ fn resolve_username(chat_name: &str, names: &Names) -> Option { } let low = chat_name.to_lowercase(); // 精确匹配显示名:排序后取第一个,保证确定性 - let mut exact: Vec<&String> = names.map.iter() + let mut exact: Vec<&String> = names + .map + .iter() .filter(|(_, display)| display.to_lowercase() == low) .map(|(uname, _)| uname) .collect(); @@ -444,11 +776,16 @@ fn resolve_username(chat_name: &str, names: &Names) -> Option { return Some(u.clone()); } // 模糊匹配:取 display name 最短的(最精确),相同长度取字典序最小 - let mut candidates: Vec<(&String, &String)> = names.map.iter() + let mut candidates: Vec<(&String, &String)> = names + .map + .iter() .filter(|(_, display)| display.to_lowercase().contains(&low)) .collect(); candidates.sort_by_key(|(uname, display)| (display.len(), uname.as_str())); - candidates.into_iter().next().map(|(uname, _)| uname.clone()) + candidates + .into_iter() + .next() + .map(|(uname, _)| uname.clone()) } async fn find_msg_tables( @@ -456,45 +793,69 @@ async fn find_msg_tables( names: &Names, username: &str, ) -> Result> { + let (shards, _) = find_msg_shards(db, names, username).await?; + Ok(shards.into_iter().map(|s| (s.path, s.table)).collect()) +} + +async fn find_msg_shards( + db: &DbCache, + names: &Names, + username: &str, +) -> Result<(Vec, usize)> { let table_name = format!("Msg_{:x}", md5::compute(username.as_bytes())); if !msg_table_re().is_match(&table_name) { - return Ok(Vec::new()); + return Ok((Vec::new(), 0)); } - let mut results: Vec<(i64, std::path::PathBuf, String)> = Vec::new(); + let mut scanned = 0usize; + let mut results: Vec = Vec::new(); for rel_key in &names.msg_db_keys { - let path = match db.get(rel_key).await? { - Some(p) => p, + let resolved = match db.get_with_mode(rel_key).await? { + Some(r) => r, None => continue, }; + scanned += 1; let tname = table_name.clone(); - let path2 = path.clone(); + let path2 = resolved.path.clone(); let max_ts: Option = tokio::task::spawn_blocking(move || { let conn = Connection::open(&path2)?; - let table_exists: Option = conn.query_row( - "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", - [&tname], - |row| row.get(0), - ).ok().flatten(); + let table_exists: Option = conn + .query_row( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", + [&tname], + |row| row.get(0), + ) + .ok() + .flatten(); if table_exists.is_none() { return Ok::<_, anyhow::Error>(None); } - let ts: Option = conn.query_row( - &format!("SELECT MAX(create_time) FROM [{}]", tname), - [], - |row| row.get(0), - ).ok().flatten(); + let ts: Option = conn + .query_row( + &format!("SELECT MAX(create_time) FROM [{}]", tname), + [], + |row| row.get(0), + ) + .ok() + .flatten(); Ok(ts) - }).await??; + }) + .await??; if let Some(ts) = max_ts { - results.push((ts, path.clone(), table_name.clone())); + results.push(MessageShard { + rel_key: rel_key.clone(), + path: resolved.path.clone(), + table: table_name.clone(), + max_ts: ts, + cache_mode: resolved.mode, + }); } } // 按最大时间戳降序排列(最新的优先) - results.sort_by_key(|(ts, _, _)| std::cmp::Reverse(*ts)); - Ok(results.into_iter().map(|(_, p, t)| (p, t)).collect()) + results.sort_by_key(|s| std::cmp::Reverse(s.max_ts)); + Ok((results, scanned)) } fn query_messages( @@ -544,23 +905,32 @@ fn query_messages( let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; - let rows = stmt.query_map(params_ref.as_slice(), |row| { - Ok(( - row.get::<_, i64>(0)?, - row.get::<_, i64>(1)?, - row.get::<_, i64>(2)?, - row.get::<_, i64>(3)?, - get_content_bytes(row, 4), - row.get::<_, i64>(5).unwrap_or(0), - )) - })? - .filter_map(|r| r.ok()) - .collect::>(); + let rows = stmt + .query_map(params_ref.as_slice(), |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, i64>(2)?, + row.get::<_, i64>(3)?, + get_content_bytes(row, 4), + row.get::<_, i64>(5).unwrap_or(0), + )) + })? + .filter_map(|r| r.ok()) + .collect::>(); let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); + let sender = sender_label( + real_sender_id, + &content, + is_group, + chat_username, + &id2u, + names_map, + group_nicknames, + ); let text = fmt_content(local_id, local_type, &content, is_group); let url = appmsg_url_for_message(local_type, &content); @@ -595,7 +965,10 @@ fn search_in_table( ) -> Result> { let id2u = load_id2u(conn); // 转义 LIKE 通配符,使用 '\' 作为 ESCAPE 字符 - let escaped_kw = keyword.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_"); + let escaped_kw = keyword + .replace('\\', "\\\\") + .replace('%', "\\%") + .replace('_', "\\_"); let search_decoded_content = msg_type == Some(49); let keyword_lower = keyword.to_lowercase(); let mut clauses: Vec = Vec::new(); @@ -620,7 +993,11 @@ fn search_in_table( } else { format!("WHERE {}", clauses.join(" AND ")) }; - let limit_clause = if search_decoded_content { "" } else { " LIMIT ?" }; + let limit_clause = if search_decoded_content { + "" + } else { + " LIMIT ?" + }; let sql = format!( "SELECT local_id, local_type, create_time, real_sender_id, message_content, WCDB_CT_message_content @@ -633,25 +1010,35 @@ fn search_in_table( let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; - let rows = stmt.query_map(params_ref.as_slice(), |row| { - Ok(( - row.get::<_, i64>(0)?, - row.get::<_, i64>(1)?, - row.get::<_, i64>(2)?, - row.get::<_, i64>(3)?, - get_content_bytes(row, 4), - row.get::<_, i64>(5).unwrap_or(0), - )) - })? - .filter_map(|r| r.ok()) - .collect::>(); + let rows = stmt + .query_map(params_ref.as_slice(), |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, i64>(2)?, + row.get::<_, i64>(3)?, + get_content_bytes(row, 4), + row.get::<_, i64>(5).unwrap_or(0), + )) + })? + .filter_map(|r| r.ok()) + .collect::>(); let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); + let sender = sender_label( + real_sender_id, + &content, + is_group, + chat_username, + &id2u, + names_map, + group_nicknames, + ); let text = fmt_content(local_id, local_type, &content, is_group); - if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) { + if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) + { continue; } let url = appmsg_url_for_message(local_type, &content); @@ -697,13 +1084,15 @@ fn contains_search_text(haystack: &str, keyword: &str, keyword_lower: &str) -> b fn load_id2u(conn: &Connection) -> HashMap { let mut map = HashMap::new(); if let Ok(mut stmt) = conn.prepare("SELECT rowid, user_name FROM Name2Id") { - let _ = stmt.query_map([], |row| { - Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) - }).map(|rows| { - for r in rows.flatten() { - map.insert(r.0, r.1); - } - }); + let _ = stmt + .query_map([], |row| { + Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) + }) + .map(|rows| { + for r in rows.flatten() { + map.insert(r.0, r.1); + } + }); } map } @@ -722,7 +1111,8 @@ async fn load_group_nicknames( tokio::task::spawn_blocking(move || { let conn = Connection::open(&contact_p)?; Ok::<_, anyhow::Error>(load_group_nickname_map_from_conn(&conn, &chat, None)) - }).await? + }) + .await? } async fn load_group_nickname_maps( @@ -745,7 +1135,8 @@ async fn load_group_nickname_maps( } } Ok::<_, anyhow::Error>(out) - }).await? + }) + .await? } fn load_group_nickname_map_from_conn( @@ -770,15 +1161,14 @@ fn load_group_nickname_map_from_conn( .unwrap_or_default() } -fn load_group_ext_buffer( - conn: &Connection, - chat_username: &str, -) -> Option> { +fn load_group_ext_buffer(conn: &Connection, chat_username: &str) -> Option> { [ "SELECT ext_buffer FROM chat_room WHERE username = ? LIMIT 1", "SELECT ext_buffer FROM chat_room WHERE chat_room_name = ? LIMIT 1", "SELECT ext_buffer FROM chat_room WHERE name = ? LIMIT 1", - ].iter().find_map(|sql| { + ] + .iter() + .find_map(|sql| { conn.query_row(sql, [chat_username], |row| row.get::<_, Option>>(0)) .ok() .flatten() @@ -793,28 +1183,38 @@ fn load_group_member_username_set( "SELECT id FROM chat_room WHERE username = ?", "SELECT id FROM chat_room WHERE chat_room_name = ?", "SELECT id FROM chat_room WHERE name = ?", - ].iter().find_map(|sql| { - conn.query_row(sql, [chat_username], |row| row.get::<_, i64>(0)).ok() - }).unwrap_or(0); + ] + .iter() + .find_map(|sql| { + conn.query_row(sql, [chat_username], |row| row.get::<_, i64>(0)) + .ok() + }) + .unwrap_or(0); if room_id == 0 { return None; } - let mut stmt = conn.prepare( - "SELECT c.username + let mut stmt = conn + .prepare( + "SELECT c.username FROM chatroom_member cm LEFT JOIN contact c ON c.id = cm.member_id - WHERE cm.room_id = ?" - ).ok()?; - let usernames: HashSet = stmt.query_map([room_id], |row| { - row.get::<_, String>(0) - }).ok()? - .filter_map(|r| r.ok()) - .filter(|uid| !uid.is_empty()) - .collect(); + WHERE cm.room_id = ?", + ) + .ok()?; + let usernames: HashSet = stmt + .query_map([room_id], |row| row.get::<_, String>(0)) + .ok()? + .filter_map(|r| r.ok()) + .filter(|uid| !uid.is_empty()) + .collect(); - if usernames.is_empty() { None } else { Some(usernames) } + if usernames.is_empty() { + None + } else { + Some(usernames) + } } fn decode_proto_varint(raw: &[u8], offset: usize) -> Option<(u64, usize)> { @@ -840,35 +1240,61 @@ fn proto_len_fields<'a>(raw: &'a [u8]) -> Vec<(u64, &'a [u8])> { let mut fields = Vec::new(); let mut idx = 0usize; while idx < raw.len() { - let Some((tag, next)) = decode_proto_varint(raw, idx) else { break; }; - if next <= idx { break; } + let Some((tag, next)) = decode_proto_varint(raw, idx) else { + break; + }; + if next <= idx { + break; + } idx = next; let field_no = tag >> 3; let wire_type = tag & 0x07; match wire_type { 0 => { - let Some((_, next)) = decode_proto_varint(raw, idx) else { break; }; - if next <= idx { break; } + let Some((_, next)) = decode_proto_varint(raw, idx) else { + break; + }; + if next <= idx { + break; + } idx = next; } 1 => { - let Some(next) = idx.checked_add(8) else { break; }; - if next > raw.len() { break; } + let Some(next) = idx.checked_add(8) else { + break; + }; + if next > raw.len() { + break; + } idx = next; } 2 => { - let Some((size, next)) = decode_proto_varint(raw, idx) else { break; }; - if next <= idx { break; } + let Some((size, next)) = decode_proto_varint(raw, idx) else { + break; + }; + if next <= idx { + break; + } idx = next; - let Ok(size) = usize::try_from(size) else { break; }; - let Some(end) = idx.checked_add(size) else { break; }; - if end > raw.len() { break; } + let Ok(size) = usize::try_from(size) else { + break; + }; + let Some(end) = idx.checked_add(size) else { + break; + }; + if end > raw.len() { + break; + } fields.push((field_no, &raw[idx..end])); idx = end; } 5 => { - let Some(next) = idx.checked_add(4) else { break; }; - if next > raw.len() { break; } + let Some(next) = idx.checked_add(4) else { + break; + }; + if next > raw.len() { + break; + } idx = next; } _ => break, @@ -912,9 +1338,10 @@ fn looks_like_username(value: &str) -> bool { return false; } let mut chars = value.chars(); - let Some(first) = chars.next() else { return false; }; - first.is_ascii_alphabetic() - && chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') + let Some(first) = chars.next() else { + return false; + }; + first.is_ascii_alphabetic() && chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') } fn pick_member_username( @@ -976,7 +1403,11 @@ fn pick_group_nickname(strings: &[(u64, String)], username: &str) -> Option, limit: usize, ) -> Vec { - let mut top_senders: Vec = sender_counts.iter() - .map(|(username, count)| json!({ - "sender": sender_display(username, "", names, group_nicknames), - "count": count, - })) + let mut top_senders: Vec = sender_counts + .iter() + .map(|(username, count)| { + json!({ + "sender": sender_display(username, "", names, group_nicknames), + "count": count, + }) + }) .collect(); top_senders.sort_by(|a, b| { - b["count"].as_i64().unwrap_or(0) + b["count"] + .as_i64() + .unwrap_or(0) .cmp(&a["count"].as_i64().unwrap_or(0)) .then_with(|| { - a["sender"].as_str().unwrap_or("") + a["sender"] + .as_str() + .unwrap_or("") .cmp(b["sender"].as_str().unwrap_or("")) }) }); @@ -1192,10 +1633,10 @@ fn parse_revoke(xml: &str) -> Option { return Some("[撤回了一条消息]".into()); } // 尝试简化:如果是 XML 格式的撤回内容,直接显示摘要 - Some(format!("[撤回] {}", inner - .chars() - .take(30) - .collect::())) + Some(format!( + "[撤回] {}", + inner.chars().take(30).collect::() + )) } /// 解析系统消息 XML(群通知等) @@ -1209,7 +1650,10 @@ fn parse_sysmsg(xml: &str) -> Option { } // 纯文本系统消息(无 XML) if !xml.starts_with('<') { - return Some(format!("[系统] {}", xml.chars().take(50).collect::())); + return Some(format!( + "[系统] {}", + xml.chars().take(50).collect::() + )); } Some("[系统消息]".into()) } @@ -1237,20 +1681,38 @@ fn parse_appmsg_legacy(text: &str) -> Option { let title = extract_xml_text(text, "title")?; let atype = extract_xml_text(text, "type").unwrap_or_default(); match atype.as_str() { - "6" => Some(if !title.is_empty() { format!("[文件] {}", title) } else { "[文件]".into() }), + "6" => Some(if !title.is_empty() { + format!("[文件] {}", title) + } else { + "[文件]".into() + }), "57" => { let ref_content = quote_refermsg_content(text) - .or_else(|| extract_xml_text(text, "content").and_then(|s| quote_content_text(&s, 40))) + .or_else(|| { + extract_xml_text(text, "content").and_then(|s| quote_content_text(&s, 40)) + }) .unwrap_or_default(); - let quote = if !title.is_empty() { format!("[引用] {}", title) } else { "[引用]".into() }; + let quote = if !title.is_empty() { + format!("[引用] {}", title) + } else { + "[引用]".into() + }; if !ref_content.is_empty() { Some(format!("{}\n \u{21b3} {}", quote, ref_content)) } else { Some(quote) } } - "33" | "36" | "44" => Some(if !title.is_empty() { format!("[小程序] {}", title) } else { "[小程序]".into() }), - _ => Some(if !title.is_empty() { format!("[链接] {}", title) } else { "[链接/文件]".into() }), + "33" | "36" | "44" => Some(if !title.is_empty() { + format!("[小程序] {}", title) + } else { + "[小程序]".into() + }), + _ => Some(if !title.is_empty() { + format!("[链接] {}", title) + } else { + "[链接/文件]".into() + }), } } @@ -1315,7 +1777,9 @@ fn record_item_lines<'a, 'input>(appmsg: Node<'a, 'input>) -> Vec { return lines; } - let Some(record_xml) = xml_text(xml_child(appmsg, "recorditem")).filter(|value| !value.is_empty()) else { + let Some(record_xml) = + xml_text(xml_child(appmsg, "recorditem")).filter(|value| !value.is_empty()) + else { return Vec::new(); }; let unescaped = unescape_html(&record_xml); @@ -1339,8 +1803,11 @@ fn record_item_lines_from_node<'a, 'input>(node: Node<'a, 'input>) -> Vec(item: Node<'a, 'input>) -> Option { let name = first_child_text(item, &["sourcename", "datasrcname", "sourceusername"]); - let desc = first_child_text(item, &["datadesc", "datatitle", "datafmt"]) - .or_else(|| item.attribute("datatype").and_then(record_datatype_label).map(str::to_string))?; + let desc = first_child_text(item, &["datadesc", "datatitle", "datafmt"]).or_else(|| { + item.attribute("datatype") + .and_then(record_datatype_label) + .map(str::to_string) + })?; let desc = collapse_text(&desc, 100); if let Some(name) = name.filter(|value| !value.is_empty()) { Some(format!("{}: {}", name, desc)) @@ -1411,7 +1878,10 @@ fn quote_refermsg_type_label(t: &str) -> Option<&'static str> { fn collapse_text(text: &str, max_chars: usize) -> String { let collapsed = text.split_whitespace().collect::>().join(" "); if collapsed.chars().count() > max_chars { - format!("{}...", collapsed.chars().take(max_chars).collect::()) + format!( + "{}...", + collapsed.chars().take(max_chars).collect::() + ) } else { collapsed } @@ -1458,8 +1928,7 @@ fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { } fn extract_favorite_url(content: &str) -> Option { - let url = extract_xml_text(content, "link") - .map(|s| unescape_html(strip_xml_cdata(&s)))?; + let url = extract_xml_text(content, "link").map(|s| unescape_html(strip_xml_cdata(&s)))?; if url.is_empty() || !(url.starts_with("http://") || url.starts_with("https://")) { return None; } @@ -1507,10 +1976,10 @@ fn extract_xml_attr(xml: &str, tag: &str, attr: &str) -> Option { fn unescape_html(s: &str) -> String { s.replace("<", "<") - .replace(">", ">") - .replace("&", "&") - .replace(""", "\"") - .replace("'", "'") + .replace(">", ">") + .replace("&", "&") + .replace(""", "\"") + .replace("'", "'") } #[cfg(test)] @@ -1533,7 +2002,9 @@ mod appmsg_tests { assert_eq!( parse_appmsg(xml).as_deref(), - Some("[合并聊天记录] 群聊的聊天记录 (2条)\n - 张三: 早上好 & coffee\n - 李四: [图片]") + Some( + "[合并聊天记录] 群聊的聊天记录 (2条)\n - 张三: 早上好 & coffee\n - 李四: [图片]" + ) ); } @@ -1754,7 +2225,8 @@ mod appmsg_tests { } fn fmt_time(ts: i64, fmt: &str) -> String { - Local.timestamp_opt(ts, 0) + Local + .timestamp_opt(ts, 0) .single() .map(|dt| dt.format(fmt).to_string()) .unwrap_or_else(|| ts.to_string()) @@ -1772,8 +2244,12 @@ pub async fn q_unread( names: &Names, limit: usize, filter: Option>, + with_meta: bool, + debug_source: bool, ) -> Result { - let path = db.get("session/session.db").await? + let path = db + .get("session/session.db") + .await? .context("无法解密 session.db")?; // 归一化 filter:小写 + 去除别名。返回 None 代表"不过滤"。 @@ -1782,59 +2258,81 @@ pub async fn q_unread( for raw in v { match raw.trim().to_lowercase().as_str() { "" | "all" => return None, - "private" => { set.insert("private"); } - "group" => { set.insert("group"); } - "official" | "official_account" => { set.insert("official_account"); } - "folded" | "fold" => { set.insert("folded"); } + "private" => { + set.insert("private"); + } + "group" => { + set.insert("group"); + } + "official" | "official_account" => { + set.insert("official_account"); + } + "folded" | "fold" => { + set.insert("folded"); + } _ => {} // 未知值忽略,避免拼错导致什么都不返回 } } - if set.is_empty() { None } else { Some(set) } + if set.is_empty() { + None + } else { + Some(set) + } }); // 有 filter 时必须全表扫:SQL LIMIT 会把想要的公众号先筛掉。 // 无 filter 时保留 LIMIT,避免重度用户的大量未读会话拖慢默认路径。 let has_filter = filter_set.is_some(); let limit_val = limit; - let rows: Vec<(String, i64, Vec, i64, i64, String, String)> = tokio::task::spawn_blocking(move || { - let conn = Connection::open(&path)?; - let sql = if has_filter { - "SELECT username, unread_count, summary, last_timestamp, + let rows: Vec<(String, i64, Vec, i64, i64, String, String)> = + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&path)?; + let sql = if has_filter { + "SELECT username, unread_count, summary, last_timestamp, last_msg_type, last_msg_sender, last_sender_display_name FROM SessionTable WHERE unread_count > 0 ORDER BY last_timestamp DESC" - } else { - "SELECT username, unread_count, summary, last_timestamp, + } else { + "SELECT username, unread_count, summary, last_timestamp, last_msg_type, last_msg_sender, last_sender_display_name FROM SessionTable WHERE unread_count > 0 ORDER BY last_timestamp DESC LIMIT ?" - }; - let mut stmt = conn.prepare(sql)?; - let map_row = |row: &rusqlite::Row<'_>| Ok(( - row.get::<_, String>(0)?, - row.get::<_, i64>(1).unwrap_or(0), - get_content_bytes(row, 2), - row.get::<_, i64>(3).unwrap_or(0), - row.get::<_, i64>(4).unwrap_or(0), - row.get::<_, String>(5).unwrap_or_default(), - row.get::<_, String>(6).unwrap_or_default(), - )); - let rows = if has_filter { - stmt.query_map([], map_row)?.collect::>>()? - } else { - stmt.query_map([limit_val as i64], map_row)?.collect::>>()? - }; - Ok::<_, anyhow::Error>(rows) - }).await??; + }; + let mut stmt = conn.prepare(sql)?; + let map_row = |row: &rusqlite::Row<'_>| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, i64>(1).unwrap_or(0), + get_content_bytes(row, 2), + row.get::<_, i64>(3).unwrap_or(0), + row.get::<_, i64>(4).unwrap_or(0), + row.get::<_, String>(5).unwrap_or_default(), + row.get::<_, String>(6).unwrap_or_default(), + )) + }; + let rows = if has_filter { + stmt.query_map([], map_row)? + .collect::>>()? + } else { + stmt.query_map([limit_val as i64], map_row)? + .collect::>>()? + }; + Ok::<_, anyhow::Error>(rows) + }) + .await??; let mut results = Vec::new(); let mut group_nickname_cache: HashMap> = HashMap::new(); for (username, unread, summary_bytes, ts, msg_type, sender, sender_name) in rows { let chat_type = chat_type_of(&username, names); if let Some(ref set) = filter_set { - if !set.contains(chat_type) { continue; } + if !set.contains(chat_type) { + continue; + } + } + if results.len() >= limit { + break; } - if results.len() >= limit { break; } let display = names.display(&username); let is_group = chat_type == "group"; @@ -1842,7 +2340,9 @@ pub async fn q_unread( let summary = strip_group_prefix(&summary); let sender_display = if is_group && !sender.is_empty() { if !group_nickname_cache.contains_key(&username) { - let nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); + let nicknames = load_group_nicknames(db, &username) + .await + .unwrap_or_default(); group_nickname_cache.insert(username.clone(), nicknames); } let empty = HashMap::new(); @@ -1865,14 +2365,35 @@ pub async fn q_unread( })); } let total = results.len(); - Ok(json!({ "sessions": results, "total": total })) + let latest_ts = results + .first() + .and_then(|v| v.get("timestamp")) + .and_then(|v| v.as_i64()); + let unknown_shards = current_unknown_shards(db, names); + let meta = Meta { + chat_latest_timestamp: latest_ts, + chat_latest_db: latest_ts.map(|_| "session/session.db".to_string()), + session_last_timestamp: None, + shards_scanned: 0, + shards_hit: 0, + unknown_shards: unknown_shards.clone(), + status: derive_status(latest_ts, None, &unknown_shards, false), + per_shard_latest: if with_meta || debug_source { + Some(HashMap::new()) + } else { + None + }, + cache_mode_per_shard: None, + shard_paths: None, + }; + Ok(json!({ "sessions": results, "total": total, "meta": meta })) } /// 查询群成员:优先从 contact.db 的 chatroom_member/chat_room 表获取完整列表, /// 若表不存在则退化为从消息记录聚合有发言记录的成员 pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result { - let username = resolve_username(chat, names) - .with_context(|| format!("找不到联系人: {}", chat))?; + let username = + resolve_username(chat, names).with_context(|| format!("找不到联系人: {}", chat))?; if !username.contains("@chatroom") { anyhow::bail!("'{}' 不是群聊,无法查看群成员", names.display(&username)); @@ -1889,11 +2410,13 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result let members_opt: Option> = tokio::task::spawn_blocking(move || { let conn = Connection::open(&contact_p)?; - let has_table: bool = conn.query_row( - "SELECT 1 FROM sqlite_master WHERE type='table' AND name='chatroom_member'", - [], - |_| Ok(true), - ).unwrap_or(false); + let has_table: bool = conn + .query_row( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name='chatroom_member'", + [], + |_| Ok(true), + ) + .unwrap_or(false); if !has_table { return Ok::<_, anyhow::Error>(None); @@ -1905,11 +2428,18 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result "SELECT id, owner FROM chat_room WHERE username = ?", "SELECT id, owner FROM chat_room WHERE chat_room_name = ?", "SELECT id, owner FROM chat_room WHERE name = ?", - ].iter().find_map(|sql| { + ] + .iter() + .find_map(|sql| { conn.query_row(sql, [&uname2], |row| { - Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1).unwrap_or_default())) - }).ok() - }).unwrap_or((0, String::new())); + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1).unwrap_or_default(), + )) + }) + .ok() + }) + .unwrap_or((0, String::new())); if room_id == 0 { return Ok::<_, anyhow::Error>(None); @@ -1919,60 +2449,66 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result "SELECT c.username, c.nick_name, c.remark FROM chatroom_member cm LEFT JOIN contact c ON c.id = cm.member_id - WHERE cm.room_id = ?" + WHERE cm.room_id = ?", )?; - let raw: Vec<(String, String, String)> = stmt.query_map([room_id], |row| { - Ok(( - row.get::<_, String>(0).unwrap_or_default(), - row.get::<_, String>(1).unwrap_or_default(), - row.get::<_, String>(2).unwrap_or_default(), - )) - })? - .filter_map(|r| r.ok()) - .filter(|(uid, _, _)| !uid.is_empty()) - .collect(); + let raw: Vec<(String, String, String)> = stmt + .query_map([room_id], |row| { + Ok(( + row.get::<_, String>(0).unwrap_or_default(), + row.get::<_, String>(1).unwrap_or_default(), + row.get::<_, String>(2).unwrap_or_default(), + )) + })? + .filter_map(|r| r.ok()) + .filter(|(uid, _, _)| !uid.is_empty()) + .collect(); if raw.is_empty() { return Ok(None); } - let target_usernames: HashSet = raw.iter() - .map(|(uid, _, _)| uid.clone()) - .collect(); - let group_nicknames = load_group_nickname_map_from_conn( - &conn, - &uname2, - Some(&target_usernames), - ); + let target_usernames: HashSet = + raw.iter().map(|(uid, _, _)| uid.clone()).collect(); + let group_nicknames = + load_group_nickname_map_from_conn(&conn, &uname2, Some(&target_usernames)); - let mut members: Vec = raw.iter().map(|(uid, nick, remark)| { - let contact_display = contact_display(uid, nick, remark, &names_map2); - let group_nickname = group_nicknames.get(uid).cloned().unwrap_or_default(); - let disp = if group_nickname.is_empty() { - contact_display.clone() - } else { - group_nickname.clone() - }; - let is_owner = uid == &owner && !owner.is_empty(); - json!({ - "username": uid, - "display": disp, - "contact_display": contact_display, - "group_nickname": group_nickname, - "is_owner": is_owner, + let mut members: Vec = raw + .iter() + .map(|(uid, nick, remark)| { + let contact_display = contact_display(uid, nick, remark, &names_map2); + let group_nickname = group_nicknames.get(uid).cloned().unwrap_or_default(); + let disp = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; + let is_owner = uid == &owner && !owner.is_empty(); + json!({ + "username": uid, + "display": disp, + "contact_display": contact_display, + "group_nickname": group_nickname, + "is_owner": is_owner, + }) }) - }).collect(); + .collect(); // 群主排首位,其余按 display 字典序 members.sort_by(|a, b| { let ao = a["is_owner"].as_bool().unwrap_or(false); let bo = b["is_owner"].as_bool().unwrap_or(false); - if ao != bo { return bo.cmp(&ao); } - a["display"].as_str().unwrap_or("").cmp(b["display"].as_str().unwrap_or("")) + if ao != bo { + return bo.cmp(&ao); + } + a["display"] + .as_str() + .unwrap_or("") + .cmp(b["display"].as_str().unwrap_or("")) }); Ok(Some(members)) - }).await??; + }) + .await??; if let Some(members) = members_opt { return Ok(json!({ @@ -2005,41 +2541,53 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result let conn = Connection::open(&path)?; let id2u = load_id2u(&conn); let mut stmt = conn.prepare(&format!( - "SELECT DISTINCT real_sender_id FROM [{}] WHERE real_sender_id > 0", tname + "SELECT DISTINCT real_sender_id FROM [{}] WHERE real_sender_id > 0", + tname ))?; - let ids: Vec = stmt.query_map([], |row| row.get(0))? + let ids: Vec = stmt + .query_map([], |row| row.get(0))? .filter_map(|r| r.ok()) .collect(); - let senders: Vec = ids.iter() + let senders: Vec = ids + .iter() .filter_map(|id| id2u.get(id)) .filter(|u| *u != &uname) .cloned() .collect(); Ok::<_, anyhow::Error>(senders) - }).await??; + }) + .await??; sender_set.extend(senders); } - let group_nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); - let mut members: Vec = sender_set.iter().map(|u| { - let contact_display = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); - let group_nickname = group_nicknames.get(u).cloned().unwrap_or_default(); - let display = if group_nickname.is_empty() { - contact_display.clone() - } else { - group_nickname.clone() - }; - json!({ - "username": u, - "display": display, - "contact_display": contact_display, - "group_nickname": group_nickname, - "is_owner": false, + let group_nicknames = load_group_nicknames(db, &username) + .await + .unwrap_or_default(); + let mut members: Vec = sender_set + .iter() + .map(|u| { + let contact_display = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); + let group_nickname = group_nicknames.get(u).cloned().unwrap_or_default(); + let display = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; + json!({ + "username": u, + "display": display, + "contact_display": contact_display, + "group_nickname": group_nickname, + "is_owner": false, + }) }) - }).collect(); + .collect(); members.sort_by(|a, b| { - a["display"].as_str().unwrap_or("").cmp(b["display"].as_str().unwrap_or("")) + a["display"] + .as_str() + .unwrap_or("") + .cmp(b["display"].as_str().unwrap_or("")) }); Ok(json!({ @@ -2057,37 +2605,46 @@ pub async fn q_new_messages( names: &Names, state: Option>, limit: usize, + with_meta: bool, + debug_source: bool, ) -> Result { // 首次运行(state=None)或未见过的会话,用 24h 前作为起点, // 避免第一次运行时把全量历史消息涌入 let fallback_ts = chrono::Utc::now().timestamp() - 86400; // 1. 从 session.db 读取所有会话的当前 last_timestamp - let session_path = db.get("session/session.db").await? + let session_path = db + .get("session/session.db") + .await? .context("无法解密 session.db")?; let all_sessions: Vec<(String, i64)> = tokio::task::spawn_blocking(move || { let conn = Connection::open(&session_path)?; let mut stmt = conn.prepare( - "SELECT username, last_timestamp FROM SessionTable WHERE last_timestamp > 0" + "SELECT username, last_timestamp FROM SessionTable WHERE last_timestamp > 0", )?; - let rows = stmt.query_map([], |row| { - Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1).unwrap_or(0))) - })? - .collect::>>()?; + let rows = stmt + .query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1).unwrap_or(0))) + })? + .collect::>>()?; Ok::<_, anyhow::Error>(rows) - }).await??; + }) + .await??; // 2. 记录 session.db 的当前快照(用于构建 new_state 基础) - let session_ts_map: HashMap = all_sessions.iter() + let session_ts_map: HashMap = all_sessions + .iter() .map(|(u, ts)| (u.clone(), *ts)) .collect(); // 3. 找出有新消息的会话 // 不在 state 中的会话(首次运行或新会话)以 fallback_ts 为基准 - let changed: Vec<(String, i64)> = all_sessions.into_iter() + let changed: Vec<(String, i64)> = all_sessions + .into_iter() .filter(|(uname, ts)| { - let last_known = state.as_ref() + let last_known = state + .as_ref() .and_then(|m| m.get(uname)) .copied() .unwrap_or(fallback_ts); @@ -2095,11 +2652,24 @@ pub async fn q_new_messages( }) .collect(); + let unknown_shards = current_unknown_shards(db, names); + if changed.is_empty() { + let meta = meta_for_global_query( + 0, + 0, + unknown_shards, + true, + with_meta, + debug_source, + Some(HashMap::new()), + Some(HashMap::new()), + ); return Ok(json!({ "count": 0, "messages": [], "new_state": session_ts_map, + "meta": meta, })); } @@ -2107,14 +2677,29 @@ pub async fn q_new_messages( // per_table_limit 取 limit*5 防止单表截断,最终由全局 truncate 收尾 let per_table_limit = limit.saturating_mul(5).max(200); let mut all_msgs: Vec = Vec::new(); + let mut scanned_rel_keys: HashSet = HashSet::new(); + let mut hit_rel_keys: HashSet = HashSet::new(); + let mut cache_modes: HashMap = HashMap::new(); + let mut shard_paths: HashMap = HashMap::new(); for (uname, _) in &changed { - let since_ts = state.as_ref() + let since_ts = state + .as_ref() .and_then(|m| m.get(uname)) .copied() .unwrap_or(fallback_ts); - let tables = find_msg_tables(db, names, uname).await?; - if tables.is_empty() { continue; } + let (shards, _) = find_msg_shards(db, names, uname).await?; + if shards.is_empty() { + continue; + } + for shard in &shards { + scanned_rel_keys.insert(shard.rel_key.clone()); + cache_modes.insert(shard.rel_key.clone(), shard.cache_mode.as_str().to_string()); + shard_paths.insert( + shard.rel_key.clone(), + shard.path.to_string_lossy().into_owned(), + ); + } let display = names.display(uname); let chat_type = chat_type_of(uname, names); @@ -2125,14 +2710,15 @@ pub async fn q_new_messages( HashMap::new() }; - for (db_path, table_name) in &tables { - let path = db_path.clone(); - let tname = table_name.clone(); + for shard in &shards { + let path = shard.path.clone(); + let tname = shard.table.clone(); let uname2 = uname.clone(); let display2 = display.clone(); let names_map = names.map.clone(); let group_nicknames2 = group_nicknames.clone(); let tname_for_log = tname.clone(); + let rel_key_for_hit = shard.rel_key.clone(); let msgs: Vec = match tokio::task::spawn_blocking(move || { let conn = Connection::open(&path)?; @@ -2144,26 +2730,35 @@ pub async fn q_new_messages( FROM [{}] WHERE create_time > ? ORDER BY create_time ASC LIMIT ?", tname ); - let rows: Vec<_> = conn.prepare(&sql) + let rows: Vec<_> = conn + .prepare(&sql) .and_then(|mut stmt| { - stmt.query_map( - rusqlite::params![since_ts, per_table_limit as i64], - |row| Ok(( + stmt.query_map(rusqlite::params![since_ts, per_table_limit as i64], |row| { + Ok(( row.get::<_, i64>(0)?, row.get::<_, i64>(1)?, row.get::<_, i64>(2)?, row.get::<_, i64>(3)?, get_content_bytes(row, 4), row.get::<_, i64>(5).unwrap_or(0), - )), - ).map(|it| it.filter_map(|r| r.ok()).collect()) + )) + }) + .map(|it| it.filter_map(|r| r.ok()).collect()) }) .unwrap_or_default(); let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2); + let sender = sender_label( + real_sender_id, + &content, + is_group, + &uname2, + &id2u, + &names_map, + &group_nicknames2, + ); let text = fmt_content(local_id, local_type, &content, is_group); let url = appmsg_url_for_message(local_type, &content); let mut msg = json!({ @@ -2183,12 +2778,23 @@ pub async fn q_new_messages( result.push(msg); } Ok::<_, anyhow::Error>(result) - }).await { + }) + .await + { Ok(Ok(v)) => v, - Ok(Err(e)) => { eprintln!("[new-messages] skip {}: {}", tname_for_log, e); continue; } - Err(e) => { eprintln!("[new-messages] task error: {}", e); continue; } + Ok(Err(e)) => { + eprintln!("[new-messages] skip {}: {}", tname_for_log, e); + continue; + } + Err(e) => { + eprintln!("[new-messages] task error: {}", e); + continue; + } }; + if !msgs.is_empty() { + hit_rel_keys.insert(rel_key_for_hit); + } all_msgs.extend(msgs); } } @@ -2209,7 +2815,9 @@ pub async fn q_new_messages( for msg in &all_msgs { if let (Some(u), Some(ts)) = (msg["username"].as_str(), msg["timestamp"].as_i64()) { let e = m.entry(u.to_string()).or_insert(0); - if ts > *e { *e = ts; } + if ts > *e { + *e = ts; + } } } m @@ -2224,7 +2832,7 @@ pub async fn q_new_messages( // 这样下次查 `since > returned_max` 仍能拿到 returned_max..session_ts 的截断尾巴。 returned_max_ts[uname] } - (false, Some(prev)) => prev, // 后续 + 截断:保持旧 since + (false, Some(prev)) => prev, // 后续 + 截断:保持旧 since (false, None) => { // 首次 + 截断:advance 到 session_ts 兜底,避免 since_ts 锁死。 new_state.get(uname).copied().unwrap_or(fallback_ts) @@ -2233,10 +2841,22 @@ pub async fn q_new_messages( new_state.insert(uname.clone(), next_ts); } + let meta = meta_for_global_query( + scanned_rel_keys.len(), + hit_rel_keys.len(), + unknown_shards, + true, + with_meta, + debug_source, + Some(cache_modes), + Some(shard_paths), + ); + Ok(json!({ "count": all_msgs.len(), "messages": all_msgs, "new_state": new_state, + "meta": meta, })) } @@ -2247,7 +2867,9 @@ pub async fn q_favorites( fav_type: Option, query: Option, ) -> Result { - let path = db.get("favorite/favorite.db").await? + let path = db + .get("favorite/favorite.db") + .await? .context("找不到 favorite.db,请确认微信数据目录")?; let rows: Vec = tokio::task::spawn_blocking(move || { @@ -2261,7 +2883,10 @@ pub async fn q_favorites( params.push(Box::new(t)); } let like_str: Option = query.map(|q| { - let esc = q.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_"); + let esc = q + .replace('\\', "\\\\") + .replace('%', "\\%") + .replace('_', "\\_"); format!("%{}%", esc) }); if let Some(ref s) = like_str { @@ -2282,58 +2907,61 @@ pub async fn q_favorites( where_clause ); - let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + let params_ref: Vec<&dyn rusqlite::types::ToSql> = + params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; - let rows: Vec = stmt.query_map(params_ref.as_slice(), |row| { - Ok(( - row.get::<_, i64>(0).unwrap_or(0), - row.get::<_, i64>(1).unwrap_or(0), - row.get::<_, i64>(2).unwrap_or(0), - row.get::<_, String>(3).unwrap_or_default(), - row.get::<_, String>(4).unwrap_or_default(), - row.get::<_, String>(5).unwrap_or_default(), - )) - })? - .filter_map(|r| r.ok()) - .map(|(local_id, ftype, ts, content, fromusr, chatname)| { - let type_str = match ftype { - 1 => "文本", - 2 => "图片", - 5 => "文章", - 19 => "名片", - 20 => "视频", - _ => "其他", - }; - // 安全截断(按 Unicode 字符而非字节) - let preview: String = content.chars().take(100).collect(); - let preview = if content.chars().count() > 100 { - format!("{}...", preview) - } else { - preview - }; - // WeChat 部分版本的 update_time 为毫秒,10位以上判定为毫秒后转秒 - let ts_secs = if ts > 9_999_999_999 { ts / 1000 } else { ts }; - let mut item = json!({ - "id": local_id, - "type": type_str, - "type_num": ftype, - "time": fmt_time(ts_secs, "%Y-%m-%d %H:%M"), - "timestamp": ts_secs, - "preview": preview, - "from": fromusr, - "chat": chatname, - }); - if ftype == 5 { - if let Some(url) = extract_favorite_url(&content) { - item["url"] = Value::String(url); + let rows: Vec = stmt + .query_map(params_ref.as_slice(), |row| { + Ok(( + row.get::<_, i64>(0).unwrap_or(0), + row.get::<_, i64>(1).unwrap_or(0), + row.get::<_, i64>(2).unwrap_or(0), + row.get::<_, String>(3).unwrap_or_default(), + row.get::<_, String>(4).unwrap_or_default(), + row.get::<_, String>(5).unwrap_or_default(), + )) + })? + .filter_map(|r| r.ok()) + .map(|(local_id, ftype, ts, content, fromusr, chatname)| { + let type_str = match ftype { + 1 => "文本", + 2 => "图片", + 5 => "文章", + 19 => "名片", + 20 => "视频", + _ => "其他", + }; + // 安全截断(按 Unicode 字符而非字节) + let preview: String = content.chars().take(100).collect(); + let preview = if content.chars().count() > 100 { + format!("{}...", preview) + } else { + preview + }; + // WeChat 部分版本的 update_time 为毫秒,10位以上判定为毫秒后转秒 + let ts_secs = if ts > 9_999_999_999 { ts / 1000 } else { ts }; + let mut item = json!({ + "id": local_id, + "type": type_str, + "type_num": ftype, + "time": fmt_time(ts_secs, "%Y-%m-%d %H:%M"), + "timestamp": ts_secs, + "preview": preview, + "from": fromusr, + "chat": chatname, + }); + if ftype == 5 { + if let Some(url) = extract_favorite_url(&content) { + item["url"] = Value::String(url); + } } - } - item - }) - .collect(); + item + }) + .collect(); Ok::<_, anyhow::Error>(rows) - }).await??; + }) + .await??; Ok(json!({ "count": rows.len(), @@ -2348,15 +2976,17 @@ pub async fn q_stats( chat: &str, since: Option, until: Option, + with_meta: bool, + debug_source: bool, ) -> Result { - let username = resolve_username(chat, names) - .with_context(|| format!("找不到联系人: {}", chat))?; + let username = + resolve_username(chat, names).with_context(|| format!("找不到联系人: {}", chat))?; let display = names.display(&username); let chat_type = chat_type_of(&username, names); let is_group = chat_type == "group"; - let tables = find_msg_tables(db, names, &username).await?; - if tables.is_empty() { + let (shards, scanned) = find_msg_shards(db, names, &username).await?; + if shards.is_empty() { anyhow::bail!("找不到 {} 的消息记录", display); } @@ -2366,14 +2996,17 @@ pub async fn q_stats( let mut sender_counts: HashMap = HashMap::new(); let mut hour_counts = [0i64; 24]; let group_nicknames = if is_group { - load_group_nicknames(db, &username).await.unwrap_or_default() + load_group_nicknames(db, &username) + .await + .unwrap_or_default() } else { HashMap::new() }; + let mut shard_hits = 0usize; - for (db_path, table_name) in &tables { - let path = db_path.clone(); - let tname = table_name.clone(); + for shard in &shards { + let path = shard.path.clone(); + let tname = shard.table.clone(); let uname = username.clone(); let is_group2 = is_group; @@ -2474,14 +3107,24 @@ pub async fn q_stats( }).await??; let (count, type_c, sender_c, hour_c) = result; + if count > 0 { + shard_hits += 1; + } total += count; - for (k, v) in type_c { *type_counts.entry(k).or_insert(0) += v; } - for (k, v) in sender_c { *sender_counts.entry(k).or_insert(0) += v; } - for i in 0..24 { hour_counts[i] += hour_c[i]; } + for (k, v) in type_c { + *type_counts.entry(k).or_insert(0) += v; + } + for (k, v) in sender_c { + *sender_counts.entry(k).or_insert(0) += v; + } + for i in 0..24 { + hour_counts[i] += hour_c[i]; + } } // 类型分布,按数量降序 - let mut by_type: Vec = type_counts.iter() + let mut by_type: Vec = type_counts + .iter() .map(|(t, c)| json!({ "type": t, "count": c })) .collect(); by_type.sort_by_key(|v| std::cmp::Reverse(v["count"].as_i64().unwrap_or(0))); @@ -2490,9 +3133,24 @@ pub async fn q_stats( let top_senders = group_top_senders(&sender_counts, &names.map, &group_nicknames, 10); // 24小时分布 - let by_hour: Vec = hour_counts.iter().enumerate() + let by_hour: Vec = hour_counts + .iter() + .enumerate() .map(|(h, c)| json!({ "hour": h, "count": c })) .collect(); + let windowed = since.is_some() || until.is_some(); + let unknown_shards = current_unknown_shards(db, names); + let session_ts = session_last_timestamp(db, &username).await; + let meta = meta_for_shards( + scanned, + &shards, + shard_hits, + unknown_shards, + session_ts, + windowed, + with_meta, + debug_source, + ); Ok(json!({ "chat": display, @@ -2503,10 +3161,10 @@ pub async fn q_stats( "by_type": by_type, "top_senders": top_senders, "by_hour": by_hour, + "meta": meta, })) } - /// 查询朋友圈互动通知(点赞 + 评论),对应微信 app 右上角的红点入口。 /// 空 `content` 是点赞,非空是评论正文。 pub async fn q_sns_notifications( @@ -2517,8 +3175,7 @@ pub async fn q_sns_notifications( until: Option, include_read: bool, ) -> Result { - let path = db.get("sns/sns.db").await? - .context("无法解密 sns.db")?; + let path = db.get("sns/sns.db").await?.context("无法解密 sns.db")?; let path2 = path.clone(); type Row = (i64, i64, i64, i64, String, String, String); @@ -2548,20 +3205,25 @@ pub async fn q_sns_notifications( where_clause ); params.push(Box::new(limit as i64)); - let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + let params_ref: Vec<&dyn rusqlite::types::ToSql> = + params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; - let rows = stmt.query_map(params_ref.as_slice(), |row| Ok(( - row.get::<_, i64>(0)?, - row.get::<_, i64>(1)?, - row.get::<_, i64>(2).unwrap_or(0), - row.get::<_, i64>(3).unwrap_or(0), - row.get::<_, String>(4).unwrap_or_default(), - row.get::<_, String>(5).unwrap_or_default(), - row.get::<_, String>(6).unwrap_or_default(), - )))? - .collect::>>()?; + let rows = stmt + .query_map(params_ref.as_slice(), |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, i64>(2).unwrap_or(0), + row.get::<_, i64>(3).unwrap_or(0), + row.get::<_, String>(4).unwrap_or_default(), + row.get::<_, String>(5).unwrap_or_default(), + row.get::<_, String>(6).unwrap_or_default(), + )) + })? + .collect::>>()?; Ok::<_, anyhow::Error>(rows) - }).await??; + }) + .await??; // 一次性取出涉及的 feed 原帖,避免 N+1 查询 let feed_ids: Vec = { @@ -2577,13 +3239,18 @@ pub async fn q_sns_notifications( return Ok::<_, anyhow::Error>(HashMap::new()); } let conn = Connection::open(&path3)?; - let placeholders = std::iter::repeat("?").take(feed_ids_clone.len()).collect::>().join(","); + let placeholders = std::iter::repeat("?") + .take(feed_ids_clone.len()) + .collect::>() + .join(","); let sql = format!( "SELECT tid, user_name, content FROM SnsTimeLine WHERE tid IN ({})", placeholders ); - let params: Vec<&dyn rusqlite::types::ToSql> = - feed_ids_clone.iter().map(|id| id as &dyn rusqlite::types::ToSql).collect(); + let params: Vec<&dyn rusqlite::types::ToSql> = feed_ids_clone + .iter() + .map(|id| id as &dyn rusqlite::types::ToSql) + .collect(); let mut stmt = conn.prepare(&sql)?; let mut map = HashMap::new(); let mut rows2 = stmt.query(params.as_slice())?; @@ -2603,19 +3270,22 @@ pub async fn q_sns_notifications( map.insert(tid, (author, preview)); } Ok(map) - }).await??; + }) + .await??; let mut out = Vec::with_capacity(rows.len()); for (_local_id, ct, _typ, fid, from_u, from_nick, content) in rows { - let kind = if content.trim().is_empty() { "like" } else { "comment" }; + let kind = if content.trim().is_empty() { + "like" + } else { + "comment" + }; let display = if !from_nick.is_empty() { from_nick.clone() } else { names.display(&from_u) }; - let (feed_author_u, feed_preview) = feeds.get(&fid) - .cloned() - .unwrap_or_default(); + let (feed_author_u, feed_preview) = feeds.get(&fid).cloned().unwrap_or_default(); let feed_author_display = if feed_author_u.is_empty() { String::new() } else { @@ -2687,8 +3357,8 @@ fn insert_media_i64(out: &mut serde_json::Map, key: &str, value: /// 字段名与 artifacts 仓库 `wechat_sns_dump.py::_parse_media` 对齐, /// 便于跨实现 diff。缺失字段直接省略(不输出 null),供下游代理图片 / 离线渲染。 fn parse_media_from_timeline(timeline: Node) -> Vec { - let Some(media_list) = xml_child(timeline, "ContentObject") - .and_then(|node| xml_child(node, "mediaList")) + let Some(media_list) = + xml_child(timeline, "ContentObject").and_then(|node| xml_child(node, "mediaList")) else { return Vec::new(); }; @@ -2748,7 +3418,9 @@ fn parse_media_from_timeline(timeline: Node) -> Vec { /// `parse_post_xml`,那边已经把整份 doc parse 一次直接复用 timeline 节点。 #[cfg(test)] fn parse_post_media(xml: &str) -> Vec { - let Ok(doc) = Document::parse(xml) else { return Vec::new(); }; + let Ok(doc) = Document::parse(xml) else { + return Vec::new(); + }; let Some(timeline) = doc.descendants().find(|n| n.has_tag_name("TimelineObject")) else { return Vec::new(); }; @@ -2826,7 +3498,14 @@ fn parse_post_xml(tid: i64, user_name_column: &str, content: &str) -> ParsedPost .map(str::to_string) .unwrap_or_default(); - ParsedPost { tid, create_time, author_username, content: text, media, location } + ParsedPost { + tid, + create_time, + author_username, + content: text, + media, + location, + } } fn post_to_value(p: ParsedPost, names: &Names) -> Value { @@ -2857,15 +3536,13 @@ pub async fn q_sns_feed( until: Option, user: Option<&str>, ) -> Result { - let path = db.get("sns/sns.db").await? - .context("无法解密 sns.db")?; + let path = db.get("sns/sns.db").await?.context("无法解密 sns.db")?; let limit = limit.min(SNS_MAX_LIMIT); let user_uname = match user { - Some(q) => Some( - resolve_username(q, names) - .with_context(|| format!("找不到联系人: {}", q))?, - ), + Some(q) => { + Some(resolve_username(q, names).with_context(|| format!("找不到联系人: {}", q))?) + } None => None, }; @@ -2909,7 +3586,10 @@ pub async fn q_sns_feed( Ok::<_, anyhow::Error>(out) }).await??; - let posts: Vec = parsed.into_iter().map(|p| post_to_value(p, names)).collect(); + let posts: Vec = parsed + .into_iter() + .map(|p| post_to_value(p, names)) + .collect(); let total = posts.len(); Ok(json!({ "posts": posts, "total": total })) } @@ -2927,15 +3607,13 @@ pub async fn q_sns_search( if keyword.trim().is_empty() { anyhow::bail!("搜索关键词不能为空"); } - let path = db.get("sns/sns.db").await? - .context("无法解密 sns.db")?; + let path = db.get("sns/sns.db").await?.context("无法解密 sns.db")?; let limit = limit.min(SNS_MAX_LIMIT); let user_uname = match user { - Some(q) => Some( - resolve_username(q, names) - .with_context(|| format!("找不到联系人: {}", q))?, - ), + Some(q) => { + Some(resolve_username(q, names).with_context(|| format!("找不到联系人: {}", q))?) + } None => None, }; @@ -2984,7 +3662,10 @@ pub async fn q_sns_search( Ok::<_, anyhow::Error>(out) }).await??; - let posts: Vec = parsed.into_iter().map(|p| post_to_value(p, names)).collect(); + let posts: Vec = parsed + .into_iter() + .map(|p| post_to_value(p, names)) + .collect(); let total = posts.len(); Ok(json!({ "keyword": keyword, "posts": posts, "total": total })) } @@ -3015,9 +3696,13 @@ fn parse_biz_xml_items(recv_time: i64, account_username: &str, xml: &str) -> Vec let mut items = Vec::new(); let mut search_from = 0; loop { - let Some(item_start) = xml[search_from..].find("") else { break; }; + let Some(item_start) = xml[search_from..].find("") else { + break; + }; let abs_start = search_from + item_start; - let Some(item_end) = xml[abs_start..].find("") else { break; }; + let Some(item_end) = xml[abs_start..].find("") else { + break; + }; let abs_end = abs_start + item_end + 7; let item_xml = &xml[abs_start..abs_end]; @@ -3072,7 +3757,11 @@ fn extract_cdata(xml: &str, tag: &str) -> Option { body }; let content = content.trim(); - if content.is_empty() { None } else { Some(content.to_string()) } + if content.is_empty() { + None + } else { + Some(content.to_string()) + } } else if inner.is_empty() { None } else { @@ -3092,28 +3781,33 @@ pub async fn q_biz_articles( until: Option, unread: bool, ) -> Result { - let biz_path = db.get("message/biz_message_0.db").await? - .context("无法解密 biz_message_0.db,请确认 all_keys.json 包含对应密钥")? -; + let biz_path = db + .get("message/biz_message_0.db") + .await? + .context("无法解密 biz_message_0.db,请确认 all_keys.json 包含对应密钥")?; // 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集, // 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。 let unread_usernames: Option> = if unread { - let session_path = db.get("session/session.db").await? + let session_path = db + .get("session/session.db") + .await? .context("无法解密 session.db")?; let session_path2 = session_path.clone(); let unread_rows: Vec = tokio::task::spawn_blocking(move || { let conn = Connection::open(&session_path2)?; - let mut stmt = conn.prepare( - "SELECT username FROM SessionTable WHERE unread_count > 0" - )?; - let rows: Vec = stmt.query_map([], |row| row.get::<_, String>(0))? + let mut stmt = + conn.prepare("SELECT username FROM SessionTable WHERE unread_count > 0")?; + let rows: Vec = stmt + .query_map([], |row| row.get::<_, String>(0))? .filter_map(|r| r.ok()) .collect(); Ok::<_, anyhow::Error>(rows) - }).await??; + }) + .await??; // 仅保留公众号类型的未读会话 - let set: std::collections::HashSet = unread_rows.into_iter() + let set: std::collections::HashSet = unread_rows + .into_iter() .filter(|u| chat_type_of(u, names) == "official_account") .collect(); if set.is_empty() { @@ -3129,24 +3823,28 @@ pub async fn q_biz_articles( let biz_path2 = biz_path.clone(); let id2username: HashMap = tokio::task::spawn_blocking(move || { let conn = Connection::open(&biz_path2)?; - let mut stmt = conn.prepare("SELECT rowid, user_name FROM Name2Id WHERE user_name LIKE 'gh_%'")? - ; - let rows = stmt.query_map([], |row| { - Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) - })? - .collect::>>()?; + let mut stmt = + conn.prepare("SELECT rowid, user_name FROM Name2Id WHERE user_name LIKE 'gh_%'")?; + let rows = stmt + .query_map([], |row| { + Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) + })? + .collect::>>()?; Ok::<_, anyhow::Error>(rows.into_iter().collect()) - }).await??; + }) + .await??; // 构建 md5(username) -> username 映射 - let md5_to_uname: HashMap = id2username.values() + let md5_to_uname: HashMap = id2username + .values() .map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone())) .collect(); // 2. 如果 指定了 --account,找到匹配的 username 列表 let account_low = account.as_deref().map(|s| s.to_lowercase()); let mut target_usernames: Option> = account_low.as_ref().map(|low| { - id2username.values() + id2username + .values() .filter(|u| { let display = names.display(u); display.to_lowercase().contains(low.as_str()) @@ -3159,13 +3857,18 @@ pub async fn q_biz_articles( // --unread 与 --account 取交集(进一步缩小范围) if let Some(ref unread_set) = unread_usernames { target_usernames = Some(match target_usernames.take() { - Some(acc_list) => acc_list.into_iter() + Some(acc_list) => acc_list + .into_iter() .filter(|u| unread_set.contains(u)) .collect(), None => unread_set.iter().cloned().collect(), }); // 交集为空 → 提前返回 - if target_usernames.as_ref().map(|v| v.is_empty()).unwrap_or(false) { + if target_usernames + .as_ref() + .map(|v| v.is_empty()) + .unwrap_or(false) + { return Ok(json!({ "count": 0, "articles": [] })); } } @@ -3175,7 +3878,8 @@ pub async fn q_biz_articles( let since2 = since; let until2 = until; let target_hashes: Option> = target_usernames.as_ref().map(|unames| { - unames.iter() + unames + .iter() .map(|u| format!("{:x}", md5::compute(u.as_bytes()))) .collect() }); @@ -3184,10 +3888,10 @@ pub async fn q_biz_articles( let conn = Connection::open(&biz_path3)?; // 列出所有 Msg_ 表 - let mut stmt = conn.prepare( - "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'" - )?; - let table_names: Vec = stmt.query_map([], |row| row.get(0))? + let mut stmt = conn + .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'")?; + let table_names: Vec = stmt + .query_map([], |row| row.get(0))? .filter_map(|r| r.ok()) .collect(); @@ -3195,12 +3899,16 @@ pub async fn q_biz_articles( let mut all_rows: Vec<(String, i64, i64, Vec, i64)> = Vec::new(); for tname in &table_names { - if !re.is_match(tname) { continue; } + if !re.is_match(tname) { + continue; + } let hash = &tname[4..]; // account 过滤 if let Some(ref hashes) = target_hashes { - if !hashes.iter().any(|h| h == hash) { continue; } + if !hashes.iter().any(|h| h == hash) { + continue; + } } let username = md5_to_uname.get(hash).cloned().unwrap_or_default(); @@ -3226,7 +3934,8 @@ pub async fn q_biz_articles( tname, where_clause ); - let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + let params_ref: Vec<&dyn rusqlite::types::ToSql> = + params.iter().map(|p| p.as_ref()).collect(); if let Ok(mut inner_stmt) = conn.prepare(&sql) { let msg_rows: Vec<_> = inner_stmt .query_map(params_ref.as_slice(), |row| { @@ -3244,13 +3953,16 @@ pub async fn q_biz_articles( } } Ok::<_, anyhow::Error>(all_rows) - }).await??; + }) + .await??; // 4. 解压并解析 XML let mut articles: Vec = Vec::new(); for (username, recv_time, ct, content_bytes, _) in rows { let content = decompress_message(&content_bytes, ct); - if content.is_empty() { continue; } + if content.is_empty() { + continue; + } let items = parse_biz_xml_items(recv_time, &username, &content); articles.extend(items); } @@ -3266,21 +3978,24 @@ pub async fn q_biz_articles( articles.truncate(limit); - let results: Vec = articles.into_iter().map(|a| { - let account_display = names.display(&a.account_username); - json!({ - "time": fmt_time(a.pub_time, "%Y-%m-%d %H:%M"), - "timestamp": a.pub_time, - "recv_time": a.recv_time, - "recv_time_str": fmt_time(a.recv_time, "%Y-%m-%d %H:%M"), - "account": account_display, - "account_username": a.account_username, - "title": a.title, - "url": a.url, - "digest": a.digest, - "cover_url": a.cover, + let results: Vec = articles + .into_iter() + .map(|a| { + let account_display = names.display(&a.account_username); + json!({ + "time": fmt_time(a.pub_time, "%Y-%m-%d %H:%M"), + "timestamp": a.pub_time, + "recv_time": a.recv_time, + "recv_time_str": fmt_time(a.recv_time, "%Y-%m-%d %H:%M"), + "account": account_display, + "account_username": a.account_username, + "title": a.title, + "url": a.url, + "digest": a.digest, + "cover_url": a.cover, + }) }) - }).collect(); + .collect(); Ok(json!({ "count": results.len(), "articles": results })) } @@ -3307,11 +4022,13 @@ pub async fn q_attachments( offset: usize, since: Option, until: Option, + with_meta: bool, + debug_source: bool, ) -> Result { use crate::attachment::{AttachmentId, AttachmentKind}; - let username = resolve_username(chat, names) - .with_context(|| format!("找不到联系人: {}", chat))?; + let username = + resolve_username(chat, names).with_context(|| format!("找不到联系人: {}", chat))?; let display = names.display(&username); let chat_type = chat_type_of(&username, names); let is_group = chat_type == "group"; @@ -3323,27 +4040,29 @@ pub async fn q_attachments( } let lo32_types: Vec = kind_filters.iter().map(|(_, t)| *t).collect(); // local_type → AttachmentKind 反查(mask 完后定 kind) - let type_to_kind: HashMap = kind_filters.iter() - .map(|(k, t)| (*t, *k)) - .collect(); + let type_to_kind: HashMap = + kind_filters.iter().map(|(k, t)| (*t, *k)).collect(); - let tables = find_msg_tables(db, names, &username).await?; - if tables.is_empty() { + let (shards, scanned) = find_msg_shards(db, names, &username).await?; + if shards.is_empty() { anyhow::bail!("找不到 {} 的消息记录", display); } // 群聊需要 sender 显示名 let group_nicknames = if is_group { - load_group_nicknames(db, &username).await.unwrap_or_default() + load_group_nicknames(db, &username) + .await + .unwrap_or_default() } else { HashMap::new() }; let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new(); + let mut shard_hits = 0usize; // 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx) - for (db_idx, (db_path, table_name)) in tables.iter().enumerate() { - let path = db_path.clone(); - let tname = table_name.clone(); + for (db_idx, shard) in shards.iter().enumerate() { + let path = shard.path.clone(); + let tname = shard.table.clone(); let uname = username.clone(); let is_group2 = is_group; let names_map = names.map.clone(); @@ -3361,11 +4080,15 @@ pub async fn q_attachments( let id2u = load_id2u(&conn); // local_type 在 DB 里可能带高位 flag,过滤要 mask 低 32 bit - let placeholders = lo32_types2.iter().map(|_| "?").collect::>().join(","); - let mut clauses: Vec = vec![ - format!("(local_type & 4294967295) IN ({})", placeholders), - ]; - let mut params: Vec> = lo32_types2.iter() + let placeholders = lo32_types2 + .iter() + .map(|_| "?") + .collect::>() + .join(","); + let mut clauses: Vec = + vec![format!("(local_type & 4294967295) IN ({})", placeholders)]; + let mut params: Vec> = lo32_types2 + .iter() .map(|t| Box::new(*t) as Box) .collect(); if let Some(s) = since2 { @@ -3400,8 +4123,15 @@ pub async fn q_attachments( let ct: i64 = row.get::<_, i64>(5).unwrap_or(0); let content = decompress_message(&content_bytes, ct); let sender = if is_group2 { - sender_label(real_sender_id, &content, true, &uname, - &id2u, &names_map, &group_nicknames2) + sender_label( + real_sender_id, + &content, + true, + &uname, + &id2u, + &names_map, + &group_nicknames2, + ) } else { String::new() }; @@ -3412,6 +4142,9 @@ pub async fn q_attachments( Ok::<_, anyhow::Error>(rows) }) .await??; + if !rows.is_empty() { + shard_hits += 1; + } all_rows.extend(rows); } @@ -3422,7 +4155,9 @@ pub async fn q_attachments( // 翻成 JSON let mut results: Vec = Vec::with_capacity(paged.len()); for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged { - let kind = type_to_kind.get(&lo32).copied() + let kind = type_to_kind + .get(&lo32) + .copied() .unwrap_or(AttachmentKind::Image); // 理论不会 fallthrough let id = AttachmentId { v: 1, @@ -3447,6 +4182,18 @@ pub async fn q_attachments( } results.push(row); } + let unknown_shards = current_unknown_shards(db, names); + let session_ts = session_last_timestamp(db, &username).await; + let meta = meta_for_shards( + scanned, + &shards, + shard_hits, + unknown_shards, + session_ts, + true, + with_meta, + debug_source, + ); Ok(json!({ "chat": display, @@ -3455,6 +4202,7 @@ pub async fn q_attachments( "chat_type": chat_type, "count": results.len(), "attachments": results, + "meta": meta, })) } @@ -3469,8 +4217,7 @@ pub async fn q_extract( use crate::attachment::{ attachment_id::AttachmentId, decoder::{self, V2KeyMaterial}, - image_key, - resolver, + image_key, resolver, }; let id = AttachmentId::decode(attachment_id) @@ -3485,17 +4232,22 @@ pub async fn q_extract( } if let Some(parent) = output_path.parent() { if !parent.as_os_str().is_empty() { - tokio::fs::create_dir_all(parent).await + tokio::fs::create_dir_all(parent) + .await .with_context(|| format!("创建输出目录失败:{}", parent.display()))?; } } // 1) 拿 message_resource.db - let resource_path = db.get("message/message_resource.db").await? + let resource_path = db + .get("message/message_resource.db") + .await? .context("无法解密 message_resource.db(请确认 all_keys.json 包含该 DB 的密钥)")?; // 2) 推 wxchat_base = db_dir.parent(),再拼 attach_root - let wxchat_base = db.db_dir().parent() + let wxchat_base = db + .db_dir() + .parent() .ok_or_else(|| anyhow::anyhow!("db_dir 没有 parent,无法推断 xwechat_files 根目录"))? .to_path_buf(); let attach_root = resolver::attach_root_for(&wxchat_base); @@ -3520,7 +4272,8 @@ pub async fn q_extract( let provider = image_key::default_provider(); let key_material = if let Some(p) = provider.as_ref() { // 从 wxchat_base 末段拿 wxid - let wxid = wxchat_base2.file_name() + let wxid = wxchat_base2 + .file_name() .and_then(|s| s.to_str()) .unwrap_or_default() .to_string(); @@ -3530,7 +4283,10 @@ pub async fn q_extract( match p.get_key(&wxid) { Ok(km) => Some(km), Err(e) => { - eprintln!("[extract] image key 提取失败 (wxid={}): {} — V2 文件将无法解码", wxid, e); + eprintln!( + "[extract] image key 提取失败 (wxid={}): {} — V2 文件将无法解码", + wxid, e + ); None } } @@ -3539,7 +4295,10 @@ pub async fn q_extract( None }; let v2_key = match key_material.as_ref() { - Some(km) => V2KeyMaterial { aes_key: Some(&km.aes_key), xor_key: km.xor_key }, + Some(km) => V2KeyMaterial { + aes_key: Some(&km.aes_key), + xor_key: km.xor_key, + }, None => V2KeyMaterial::default(), }; @@ -3563,7 +4322,8 @@ pub async fn q_extract( "format": decoded.format, "decoder": decoded.decoder, })) - }).await??; + }) + .await??; Ok(report) } @@ -3584,7 +4344,9 @@ fn parse_attachment_kinds( let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { "image" | "img" => (AttachmentKind::Image, 3), "voice" | "audio" | "video" | "file" => { - anyhow::bail!("当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通") + anyhow::bail!( + "当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通" + ) } other => anyhow::bail!("未知附件类型:{}(当前仅支持 image)", other), }; @@ -3765,10 +4527,8 @@ mod group_nickname_tests { #[test] fn group_top_senders_keeps_duplicate_display_names_separate() { - let sender_counts = HashMap::from([ - ("wxid_alice".to_string(), 7), - ("wxid_bob".to_string(), 3), - ]); + let sender_counts = + HashMap::from([("wxid_alice".to_string(), 7), ("wxid_bob".to_string(), 3)]); let names = HashMap::from([ ("wxid_alice".to_string(), "Alice Contact".to_string()), ("wxid_bob".to_string(), "Bob Contact".to_string()), @@ -3792,11 +4552,22 @@ mod group_nickname_tests { mod sns_tests { use super::*; - fn make_post_xml(create_time: &str, desc: &str, username_tag: Option<&str>, media: usize, location: Option<&str>) -> String { - let username = username_tag.map(|u| format!("{}", u)).unwrap_or_default(); + fn make_post_xml( + create_time: &str, + desc: &str, + username_tag: Option<&str>, + media: usize, + location: Option<&str>, + ) -> String { + let username = username_tag + .map(|u| format!("{}", u)) + .unwrap_or_default(); let media_tags = "2".repeat(media); let content_object = if media > 0 { - format!("{}", media_tags) + format!( + "{}", + media_tags + ) } else { String::new() }; diff --git a/src/daemon/server.rs b/src/daemon/server.rs index 9f54076..242edc1 100644 --- a/src/daemon/server.rs +++ b/src/daemon/server.rs @@ -2,15 +2,12 @@ use anyhow::Result; use std::sync::Arc; use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; -use crate::ipc::{Request, Response}; use super::cache::DbCache; use super::query::Names; +use crate::ipc::{Request, Response}; /// 启动 IPC server(Unix socket / Windows named pipe) -pub async fn serve( - db: Arc, - names: Arc>>, -) -> Result<()> { +pub async fn serve(db: Arc, names: Arc>>) -> Result<()> { #[cfg(unix)] serve_unix(db, names).await?; #[cfg(windows)] @@ -19,10 +16,7 @@ pub async fn serve( } #[cfg(unix)] -async fn serve_unix( - db: Arc, - names: Arc>>, -) -> Result<()> { +async fn serve_unix(db: Arc, names: Arc>>) -> Result<()> { use tokio::net::UnixListener; let sock_path = crate::config::sock_path(); @@ -88,9 +82,7 @@ async fn serve_windows( db: Arc, names: Arc>>, ) -> Result<()> { - use interprocess::local_socket::{ - tokio::prelude::*, GenericNamespaced, ListenerOptions, - }; + use interprocess::local_socket::{tokio::prelude::*, GenericNamespaced, ListenerOptions}; // interprocess 的 GenericNamespaced 在 Windows 上会自动拼接 `\\.\pipe\` 前缀, // 这里必须传相对名;client 端用 `\\.\pipe\wx-cli-daemon` 直接打开可以对上 @@ -141,13 +133,9 @@ async fn handle_connection_windows( Ok(()) } -async fn dispatch( - req: Request, - db: &DbCache, - names: &tokio::sync::RwLock>, -) -> Response { - use crate::ipc::Request::*; +async fn dispatch(req: Request, db: &DbCache, names: &tokio::sync::RwLock>) -> Response { use super::query; + use crate::ipc::Request::*; // 取 guard → O(1) clone Arc → 立即 drop 锁。后续 await 期间不持有锁, // 多个并发 IPC 请求可以真正并行。Names 本身不可变(由 daemon 启动时 @@ -159,20 +147,66 @@ async fn dispatch( match req { Ping => Response::ok(serde_json::json!({ "pong": true })), - Sessions { limit } => { - match query::q_sessions(db, &names_arc, limit).await { + Sessions { + limit, + with_meta, + debug_source, + } => match query::q_sessions(db, &names_arc, limit, with_meta, debug_source).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + }, + History { + chat, + limit, + offset, + since, + until, + msg_type, + with_meta, + debug_source, + } => { + match query::q_history( + db, + &names_arc, + &chat, + limit, + offset, + since, + until, + msg_type, + with_meta, + debug_source, + ) + .await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } } - History { chat, limit, offset, since, until, msg_type } => { - match query::q_history(db, &names_arc, &chat, limit, offset, since, until, msg_type).await { - Ok(v) => Response::ok(v), - Err(e) => Response::err(e.to_string()), - } - } - Search { keyword, chats, limit, since, until, msg_type } => { - match query::q_search(db, &names_arc, &keyword, chats, limit, since, until, msg_type).await { + Search { + keyword, + chats, + limit, + since, + until, + msg_type, + with_meta, + debug_source, + } => { + match query::q_search( + db, + &names_arc, + &keyword, + chats, + limit, + since, + until, + msg_type, + with_meta, + debug_source, + ) + .await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } @@ -183,74 +217,145 @@ async fn dispatch( Err(e) => Response::err(e.to_string()), } } - Unread { limit, filter } => { - match query::q_unread(db, &names_arc, limit, filter).await { + Unread { + limit, + filter, + with_meta, + debug_source, + } => match query::q_unread(db, &names_arc, limit, filter, with_meta, debug_source).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + }, + Members { chat } => match query::q_members(db, &names_arc, &chat).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + }, + NewMessages { + state, + limit, + with_meta, + debug_source, + } => { + match query::q_new_messages(db, &names_arc, state, limit, with_meta, debug_source).await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } } - Members { chat } => { - match query::q_members(db, &names_arc, &chat).await { + Favorites { + limit, + fav_type, + query, + } => match query::q_favorites(db, limit, fav_type, query).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + }, + Stats { + chat, + since, + until, + with_meta, + debug_source, + } => { + match query::q_stats(db, &names_arc, &chat, since, until, with_meta, debug_source).await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } } - NewMessages { state, limit } => { - match query::q_new_messages(db, &names_arc, state, limit).await { + SnsNotifications { + limit, + since, + until, + include_read, + } => { + match query::q_sns_notifications(db, &names_arc, limit, since, until, include_read) + .await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } } - Favorites { limit, fav_type, query } => { - match query::q_favorites(db, limit, fav_type, query).await { + SnsFeed { + limit, + since, + until, + user, + } => match query::q_sns_feed(db, &names_arc, limit, since, until, user.as_deref()).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + }, + SnsSearch { + keyword, + limit, + since, + until, + user, + } => { + match query::q_sns_search( + db, + &names_arc, + &keyword, + limit, + since, + until, + user.as_deref(), + ) + .await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } } - Stats { chat, since, until } => { - match query::q_stats(db, &names_arc, &chat, since, until).await { + ReloadConfig => Response::ok(serde_json::json!({ "reloading": true })), + BizArticles { + limit, + account, + since, + until, + unread, + } => { + match query::q_biz_articles(db, &names_arc, limit, account, since, until, unread).await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } } - SnsNotifications { limit, since, until, include_read } => { - match query::q_sns_notifications(db, &names_arc, limit, since, until, include_read).await { - Ok(v) => Response::ok(v), - Err(e) => Response::err(e.to_string()), - } - } - SnsFeed { limit, since, until, user } => { - match query::q_sns_feed(db, &names_arc, limit, since, until, user.as_deref()).await { - Ok(v) => Response::ok(v), - Err(e) => Response::err(e.to_string()), - } - } - SnsSearch { keyword, limit, since, until, user } => { - match query::q_sns_search(db, &names_arc, &keyword, limit, since, until, user.as_deref()).await { - Ok(v) => Response::ok(v), - Err(e) => Response::err(e.to_string()), - } - } - ReloadConfig => { - Response::ok(serde_json::json!({ "reloading": true })) - } - BizArticles { limit, account, since, until, unread } => { - match query::q_biz_articles(db, &names_arc, limit, account, since, until, unread).await { - Ok(v) => Response::ok(v), - Err(e) => Response::err(e.to_string()), - } - } - Attachments { chat, kinds, limit, offset, since, until } => { - match query::q_attachments(db, &names_arc, &chat, kinds, limit, offset, since, until).await { - Ok(v) => Response::ok(v), - Err(e) => Response::err(e.to_string()), - } - } - Extract { attachment_id, output, overwrite } => { - match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite).await { + Attachments { + chat, + kinds, + limit, + offset, + since, + until, + with_meta, + debug_source, + } => { + match query::q_attachments( + db, + &names_arc, + &chat, + kinds, + limit, + offset, + since, + until, + with_meta, + debug_source, + ) + .await + { Ok(v) => Response::ok(v), Err(e) => Response::err(e.to_string()), } } + Extract { + attachment_id, + output, + overwrite, + } => match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + }, } } diff --git a/src/ipc.rs b/src/ipc.rs index a4615eb..fd6d6bf 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -1,6 +1,6 @@ -use std::collections::HashMap; use serde::{Deserialize, Serialize}; use serde_json::Value; +use std::collections::HashMap; /// CLI 向 daemon 发送的请求(换行符分隔 JSON,与 Python 版兼容) #[derive(Debug, Clone, Serialize, Deserialize)] @@ -10,6 +10,10 @@ pub enum Request { Sessions { #[serde(default = "default_limit_20")] limit: usize, + #[serde(default, skip_serializing_if = "is_false")] + with_meta: bool, + #[serde(default, skip_serializing_if = "is_false")] + debug_source: bool, }, History { chat: String, @@ -23,6 +27,10 @@ pub enum Request { until: Option, #[serde(skip_serializing_if = "Option::is_none")] msg_type: Option, + #[serde(default, skip_serializing_if = "is_false")] + with_meta: bool, + #[serde(default, skip_serializing_if = "is_false")] + debug_source: bool, }, Search { keyword: String, @@ -36,6 +44,10 @@ pub enum Request { until: Option, #[serde(skip_serializing_if = "Option::is_none")] msg_type: Option, + #[serde(default, skip_serializing_if = "is_false")] + with_meta: bool, + #[serde(default, skip_serializing_if = "is_false")] + debug_source: bool, }, Contacts { #[serde(skip_serializing_if = "Option::is_none")] @@ -49,6 +61,10 @@ pub enum Request { /// 按会话类型过滤:private / group / official / folded / all,支持多选 #[serde(default, skip_serializing_if = "Option::is_none")] filter: Option>, + #[serde(default, skip_serializing_if = "is_false")] + with_meta: bool, + #[serde(default, skip_serializing_if = "is_false")] + debug_source: bool, }, Members { chat: String, @@ -60,6 +76,10 @@ pub enum Request { state: Option>, #[serde(default = "default_limit_200")] limit: usize, + #[serde(default, skip_serializing_if = "is_false")] + with_meta: bool, + #[serde(default, skip_serializing_if = "is_false")] + debug_source: bool, }, Stats { chat: String, @@ -67,6 +87,10 @@ pub enum Request { since: Option, #[serde(skip_serializing_if = "Option::is_none")] until: Option, + #[serde(default, skip_serializing_if = "is_false")] + with_meta: bool, + #[serde(default, skip_serializing_if = "is_false")] + debug_source: bool, }, Favorites { #[serde(default = "default_limit_50")] @@ -146,6 +170,10 @@ pub enum Request { since: Option, #[serde(skip_serializing_if = "Option::is_none")] until: Option, + #[serde(default, skip_serializing_if = "is_false")] + with_meta: bool, + #[serde(default, skip_serializing_if = "is_false")] + debug_source: bool, }, /// 提取(解密)单个附件的本体到指定路径 Extract { @@ -159,7 +187,6 @@ pub enum Request { }, } - /// daemon 的响应 #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Response { @@ -172,11 +199,19 @@ pub struct Response { impl Response { pub fn ok(data: Value) -> Self { - Self { ok: true, error: None, data } + Self { + ok: true, + error: None, + data, + } } pub fn err(msg: impl Into) -> Self { - Self { ok: false, error: Some(msg.into()), data: Value::Null } + Self { + ok: false, + error: Some(msg.into()), + data: Value::Null, + } } pub fn to_json_line(&self) -> anyhow::Result { @@ -185,6 +220,15 @@ impl Response { } } -fn default_limit_20() -> usize { 20 } -fn default_limit_50() -> usize { 50 } -fn default_limit_200() -> usize { 200 } +fn default_limit_20() -> usize { + 20 +} +fn default_limit_50() -> usize { + 50 +} +fn default_limit_200() -> usize { + 200 +} +fn is_false(v: &bool) -> bool { + !*v +}