From d750ef6e9fb014e642abbf07e0f5de3bdcf36f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E4=BC=A0=E4=BD=B3?= Date: Thu, 14 May 2026 13:50:04 +0800 Subject: [PATCH 01/20] =?UTF-8?q?fix(cli,config):=20=E4=BF=AE=E5=A4=8D=20s?= =?UTF-8?q?udo=20=E4=B8=8B=E5=88=9D=E5=A7=8B=E5=8C=96=E5=A4=B1=E8=B4=A5=20?= =?UTF-8?q?+=20daemon=20=E4=B8=8D=E9=87=8D=E8=BD=BD=E9=97=AE=E9=A2=98=20(#?= =?UTF-8?q?37)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(cli,config): 修复 sudo 下初始化失败 + daemon 不重载问题 - cli/transport: 新增 stop_daemon(),init 后自动停止旧 daemon - config: cli_dir() 优先读 SUDO_USER 环境变量,避免写到 /root/.wx-cli - config: auto_detect_db_dir() 按 .db 文件最新 mtime 排序,正确选最新目录 - daemon/server: dispatch 新增 ReloadConfig 命令(预留) - ipc: Request 新增 ReloadConfig 变体 - scanner/linux: 移除调试日志,清理 unused bail import * fix(config): resolve sudo home via passwd lookup --------- Co-authored-by: cjliu Co-authored-by: jackwener --- src/cli/init.rs | 4 ++ src/cli/transport.rs | 25 ++++++++++ src/config.rs | 109 ++++++++++++++++++++++++++++++++++--------- src/daemon/server.rs | 3 ++ src/ipc.rs | 2 + src/scanner/linux.rs | 2 +- 6 files changed, 123 insertions(+), 22 deletions(-) diff --git a/src/cli/init.rs b/src/cli/init.rs index ece6af0..d7553b7 100644 --- a/src/cli/init.rs +++ b/src/cli/init.rs @@ -91,6 +91,10 @@ pub fn cmd_init(force: bool) -> Result<()> { std::fs::write(&config_path, serde_json::to_string_pretty(&cfg)?) .context("写入 config.json 失败")?; println!("配置已保存: {}", config_path.display()); + + // init 之后必须停掉旧 daemon(它用的是旧 config),下次调用会自动重启 + let _ = crate::cli::transport::stop_daemon(); + println!("初始化完成,可以使用 wx sessions / wx history 等命令了"); Ok(()) diff --git a/src/cli/transport.rs b/src/cli/transport.rs index ab62da5..73c2f88 100644 --- a/src/cli/transport.rs +++ b/src/cli/transport.rs @@ -62,6 +62,31 @@ pub fn ensure_daemon() -> Result<()> { Ok(()) } +/// 停止 daemon(如果正在运行) +pub fn stop_daemon() -> Result<()> { + let pid_path = config::pid_path(); + if let Ok(pid_str) = std::fs::read_to_string(&pid_path) { + if let Ok(pid) = pid_str.trim().parse::() { + #[cfg(unix)] + { + let _ = std::process::Command::new("kill") + .arg("-TERM") + .arg(pid.to_string()) + .spawn(); + } + #[cfg(windows)] + { + let _ = std::process::Command::new("taskkill") + .args(["/F", "/PID", &pid.to_string()]) + .spawn(); + } + } + } + let _ = std::fs::remove_file(config::sock_path()); + let _ = std::fs::remove_file(&pid_path); + Ok(()) +} + /// 启动 daemon 前检查 `~/.wx-cli/` 可写,给出比"超时"更明确的错误。 /// /// 典型坑:旧版本 `sudo wx init` 把目录留成 root 属主,非 root 的 daemon diff --git a/src/config.rs b/src/config.rs index 55a03ca..a488ca0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -71,7 +71,8 @@ fn find_config_file() -> Result { return Ok(cwd); } // 3. ~/.wx-cli/config.json - if let Some(home) = dirs::home_dir() { + let home = cli_home_dir(); + if home != PathBuf::from("/tmp") { let p = home.join(".wx-cli").join("config.json"); if p.exists() { return Ok(p); @@ -87,9 +88,44 @@ fn find_config_file() -> Result { } pub fn cli_dir() -> PathBuf { - dirs::home_dir() - .unwrap_or_else(|| PathBuf::from("/tmp")) - .join(".wx-cli") + cli_home_dir().join(".wx-cli") +} + +fn cli_home_dir() -> PathBuf { + resolve_cli_home( + dirs::home_dir().unwrap_or_else(|| PathBuf::from("/tmp")), + sudo_user_home_dir(), + ) +} + +fn resolve_cli_home(default_home: PathBuf, sudo_home: Option) -> PathBuf { + sudo_home.unwrap_or(default_home) +} + +#[cfg(unix)] +fn sudo_user_home_dir() -> Option { + use std::ffi::{CStr, CString}; + + let sudo_user = std::env::var("SUDO_USER").ok()?; + let sudo_user = sudo_user.trim(); + if sudo_user.is_empty() { + return None; + } + + let c_user = CString::new(sudo_user).ok()?; + unsafe { + let pwd = libc::getpwnam(c_user.as_ptr()); + if pwd.is_null() || (*pwd).pw_dir.is_null() { + return None; + } + let dir = CStr::from_ptr((*pwd).pw_dir).to_str().ok()?; + Some(PathBuf::from(dir)) + } +} + +#[cfg(not(unix))] +fn sudo_user_home_dir() -> Option { + None } pub fn sock_path() -> PathBuf { @@ -154,17 +190,7 @@ pub fn auto_detect_db_dir() -> Option { #[cfg(target_os = "macos")] fn detect_db_dir_impl() -> Option { - let home = dirs::home_dir()?; - // 支持 sudo 环境 - let home = if let Ok(sudo_user) = std::env::var("SUDO_USER") { - if !sudo_user.is_empty() { - PathBuf::from("/Users").join(&sudo_user) - } else { - home - } - } else { - home - }; + let home = sudo_user_home_dir().or_else(dirs::home_dir)?; let base = home.join("Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files"); if !base.exists() { @@ -190,9 +216,7 @@ fn detect_db_dir_impl() -> Option { #[cfg(target_os = "linux")] fn detect_db_dir_impl() -> Option { let home = dirs::home_dir()?; - let sudo_home = std::env::var("SUDO_USER").ok() - .filter(|s| !s.is_empty()) - .map(|u| PathBuf::from("/home").join(u)); + let sudo_home = sudo_user_home_dir(); let mut candidates: Vec = Vec::new(); for base_home in [Some(home.clone()), sudo_home].into_iter().flatten() { @@ -213,13 +237,32 @@ fn detect_db_dir_impl() -> Option { } } candidates.sort_by_key(|p| { - std::fs::metadata(p) - .and_then(|m| m.modified()) - .unwrap_or(std::time::SystemTime::UNIX_EPOCH) + // 排序:取 db_storage 目录下所有 .db 文件的最新 mtime,而非目录自身的 mtime + // 这样当收到新消息时(只有 .db 文件被更新),能正确识别最新目录 + latest_db_mtime(p).unwrap_or(std::time::SystemTime::UNIX_EPOCH) }); candidates.into_iter().next_back() } +/// 递归查找 db_storage 目录下所有 .db 文件的最新 mtime +fn latest_db_mtime(dir: &Path) -> Option { + let mut latest = None; + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + let mtime = if path.is_dir() { + latest_db_mtime(&path).unwrap_or(std::time::SystemTime::UNIX_EPOCH) + } else if path.extension().and_then(|s| s.to_str()) == Some("db") { + entry.metadata().and_then(|m| m.modified()).unwrap_or(std::time::SystemTime::UNIX_EPOCH) + } else { + continue; + }; + latest = Some(latest.map_or(mtime, |cur| if mtime > cur { mtime } else { cur })); + } + } + latest +} + #[cfg(target_os = "windows")] fn detect_db_dir_impl() -> Option { let appdata = std::env::var("APPDATA").ok()?; @@ -257,3 +300,27 @@ fn detect_db_dir_impl() -> Option { fn detect_db_dir_impl() -> Option { None } + +#[cfg(test)] +mod tests { + use super::resolve_cli_home; + use std::path::PathBuf; + + #[test] + fn resolve_cli_home_prefers_sudo_home_when_present() { + let home = resolve_cli_home( + PathBuf::from("/root"), + Some(PathBuf::from("/Users/alice")), + ); + assert_eq!(home, PathBuf::from("/Users/alice")); + } + + #[test] + fn resolve_cli_home_falls_back_to_default_home() { + let home = resolve_cli_home( + PathBuf::from("/root"), + None, + ); + assert_eq!(home, PathBuf::from("/root")); + } +} diff --git a/src/daemon/server.rs b/src/daemon/server.rs index 896a08e..4d7fd54 100644 --- a/src/daemon/server.rs +++ b/src/daemon/server.rs @@ -231,5 +231,8 @@ async fn dispatch( Err(e) => Response::err(e.to_string()), } } + ReloadConfig => { + Response::ok(serde_json::json!({ "reloading": true })) + } } } diff --git a/src/ipc.rs b/src/ipc.rs index 873e2d4..32e0a8f 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -114,6 +114,8 @@ pub enum Request { #[serde(skip_serializing_if = "Option::is_none")] user: Option, }, + /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) + ReloadConfig, } diff --git a/src/scanner/linux.rs b/src/scanner/linux.rs index ba6f97b..d6f4ee9 100644 --- a/src/scanner/linux.rs +++ b/src/scanner/linux.rs @@ -3,7 +3,7 @@ /// 通过 /proc//maps 枚举内存区域, /// 通过 /proc//mem 读取内存内容, /// 搜索 x'<64hex><32hex>' 格式的 SQLCipher 密钥 -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use std::io::{Read, Seek, SeekFrom}; use std::path::Path; From 35a8f0e94b5fc350c692042dffefb25af91bcdec Mon Sep 17 00:00:00 2001 From: Haoqing Wang <78337154+hqhq1025@users.noreply.github.com> Date: Thu, 14 May 2026 14:22:55 +0800 Subject: [PATCH 02/20] =?UTF-8?q?feat(group):=20=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E7=BE=A4=E6=98=B5=E7=A7=B0/=E7=BE=A4=E5=90=8D=E7=89=87?= =?UTF-8?q?=E5=B1=95=E7=A4=BA=20(#23)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support group nicknames * fix(group): keep duplicate nickname senders separate in stats --------- Co-authored-by: jackwener --- README.md | 12 +- SKILL.md | 13 + src/daemon/query.rs | 589 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 583 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index e0f06da..d084301 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Platform](https://img.shields.io/badge/platform-macOS%20%7C%20Linux%20%7C%20Windows-lightgrey.svg)](#安装) [![Rust](https://img.shields.io/badge/built%20with-Rust-orange.svg)](https://www.rust-lang.org) -会话 · 聊天记录 · 搜索 · 联系人 · 群成员 · 收藏 · 统计 · 导出 +会话 · 聊天记录 · 搜索 · 联系人 · 群成员 · 群昵称 · 收藏 · 统计 · 导出 @@ -156,6 +156,8 @@ wx search "会议" --in "工作群" --since 2026-01-01 会话/消息输出里都带 `chat_type` 字段,取值为 `private` / `group` / `official_account` / `folded`。`official_account` 涵盖公众号、订阅号、服务号及 `mphelper` / `qqsafe` 等系统通知;`folded` 对应微信里的"订阅号折叠"和"折叠群聊"两个聚合入口。 +群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 + ### 朋友圈(SNS) 三个独立命令,区分"通知"和"帖子": @@ -185,6 +187,14 @@ wx contacts --query "李" # 按名字搜索 wx members "AI交流群" # 群成员列表 ``` +`wx members --json` 返回的成员字段包括: + +- `username`:微信内部 username +- `display`:用于展示的名称,优先使用群昵称 +- `contact_display`:联系人备注或微信昵称 +- `group_nickname`:群昵称;本地没有记录时为空字符串 +- `is_owner`:是否群主 + ### 收藏 & 统计 ```bash diff --git a/SKILL.md b/SKILL.md index 4ce28c3..386816f 100644 --- a/SKILL.md +++ b/SKILL.md @@ -11,6 +11,7 @@ description: "wx-cli — 从本地微信数据库查询聊天记录、联系人 - 微信消息历史 - 微信联系人 - 微信群成员 +- 微信群昵称 / 群名片 - 微信收藏 - wechat history / messages / contacts - wx-cli @@ -137,6 +138,8 @@ wx search "会议" --in "工作群" --since 2026-01-01 `wx unread --filter` 支持 `private` / `group` / `official` / `folded` / `all`,逗号分隔多选。默认 `all`。 +群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 + ### 联系人与群组 ```bash @@ -148,6 +151,16 @@ wx contacts --query "李" wx members "AI交流群" ``` +`wx members --json` 每个成员包含: + +- `username`:微信内部 username +- `display`:推荐展示名,优先使用群昵称 +- `contact_display`:联系人备注或微信昵称 +- `group_nickname`:群昵称;没有记录时为空字符串 +- `is_owner`:是否群主 + +Agent 展示群成员时优先用 `display`。需要区分群昵称和联系人名时,再读取 `group_nickname` 与 `contact_display`。 + ### 朋友圈(SNS) 三个命令,作用各不同: diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 18cf28e..041ff0b 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -4,8 +4,8 @@ use regex::Regex; use roxmltree::{Document, Node}; use rusqlite::Connection; use serde_json::{json, Value}; -use std::collections::HashMap; -use std::sync::OnceLock; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, OnceLock}; use super::cache::DbCache; @@ -141,6 +141,7 @@ pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result> = HashMap::new(); for (username, unread, summary_bytes, ts, msg_type, sender, sender_name) in rows { let display = names.display(&username); let chat_type = chat_type_of(&username, names); @@ -151,9 +152,13 @@ pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result = Vec::new(); + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); let tname = table_name.clone(); let uname = username.clone(); let is_group2 = is_group; let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); let since2 = since; let until2 = until; let limit2 = limit; @@ -211,7 +222,7 @@ pub async fn q_history( let msgs: Vec = tokio::task::spawn_blocking(move || { // per-DB 软上限:offset + limit 已足够全局分页,避免大群全量加载 let per_db_cap = offset2 + limit2; - query_messages(&path, &tname, &uname, is_group2, &names_map, since2, until2, msg_type, per_db_cap, 0) + query_messages(&path, &tname, &uname, is_group2, &names_map, &group_nicknames2, since2, until2, msg_type, per_db_cap, 0) }).await??; all_msgs.extend(msgs); @@ -311,6 +322,19 @@ pub async fn q_search( by_path.entry(p).or_default().push((t, d, u)); } + let mut group_usernames = HashSet::new(); + for table_list in by_path.values() { + for (_, _, uname) in table_list { + if uname.contains("@chatroom") { + group_usernames.insert(uname.clone()); + } + } + } + let group_nicknames_by_chat = load_group_nickname_maps(db, group_usernames) + .await + .unwrap_or_default(); + let group_nicknames_by_chat = Arc::new(group_nicknames_by_chat); + let mut results: Vec = Vec::new(); let kw = keyword.to_string(); for (db_path, table_list) in by_path { @@ -320,13 +344,18 @@ pub async fn q_search( let limit2 = limit * 3; let names_map2 = names.map.clone(); + let group_nicknames_by_chat2 = Arc::clone(&group_nicknames_by_chat); let found: Vec = match tokio::task::spawn_blocking(move || { let conn = Connection::open(&db_path)?; let mut all = Vec::new(); + let empty_group_nicknames = HashMap::new(); for (tname, display, uname) in &table_list { let is_group = uname.contains("@chatroom"); + let group_nicknames = group_nicknames_by_chat2 + .get(uname) + .unwrap_or(&empty_group_nicknames); match search_in_table(&conn, tname, &uname, is_group, - &names_map2, &kw2, since2, until2, msg_type, limit2) + &names_map2, group_nicknames, &kw2, since2, until2, msg_type, limit2) { Ok(rows) => { for mut row in rows { @@ -461,6 +490,7 @@ fn query_messages( chat_username: &str, is_group: bool, names_map: &HashMap, + group_nicknames: &HashMap, since: Option, until: Option, msg_type: Option, @@ -518,7 +548,7 @@ fn query_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); + let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); result.push(json!({ @@ -539,6 +569,7 @@ fn search_in_table( chat_username: &str, is_group: bool, names_map: &HashMap, + group_nicknames: &HashMap, keyword: &str, since: Option, until: Option, @@ -589,7 +620,7 @@ fn search_in_table( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); + let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); result.push(json!({ @@ -618,6 +649,368 @@ fn load_id2u(conn: &Connection) -> HashMap { map } +async fn load_group_nicknames( + db: &DbCache, + chat_username: &str, +) -> Result> { + if !chat_username.contains("@chatroom") { + return Ok(HashMap::new()); + } + let Some(contact_p) = db.get("contact/contact.db").await? else { + return Ok(HashMap::new()); + }; + let chat = chat_username.to_string(); + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&contact_p)?; + Ok::<_, anyhow::Error>(load_group_nickname_map_from_conn(&conn, &chat, None)) + }).await? +} + +async fn load_group_nickname_maps( + db: &DbCache, + chat_usernames: HashSet, +) -> Result>> { + if chat_usernames.is_empty() { + return Ok(HashMap::new()); + } + let Some(contact_p) = db.get("contact/contact.db").await? else { + return Ok(HashMap::new()); + }; + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&contact_p)?; + let mut out = HashMap::new(); + for chat in chat_usernames { + let nicknames = load_group_nickname_map_from_conn(&conn, &chat, None); + if !nicknames.is_empty() { + out.insert(chat, nicknames); + } + } + Ok::<_, anyhow::Error>(out) + }).await? +} + +fn load_group_nickname_map_from_conn( + conn: &Connection, + chat_username: &str, + targets: Option<&HashSet>, +) -> HashMap { + if !chat_username.contains("@chatroom") { + return HashMap::new(); + } + let ext = load_group_ext_buffer(conn, chat_username); + + let owned_targets = if targets.is_none() { + load_group_member_username_set(conn, chat_username) + } else { + None + }; + let targets = targets.or(owned_targets.as_ref()); + + ext.as_deref() + .map(|buf| parse_group_nickname_map(buf, targets)) + .unwrap_or_default() +} + +fn load_group_ext_buffer( + conn: &Connection, + chat_username: &str, +) -> Option> { + [ + "SELECT ext_buffer FROM chat_room WHERE username = ? LIMIT 1", + "SELECT ext_buffer FROM chat_room WHERE chat_room_name = ? LIMIT 1", + "SELECT ext_buffer FROM chat_room WHERE name = ? LIMIT 1", + ].iter().find_map(|sql| { + conn.query_row(sql, [chat_username], |row| row.get::<_, Option>>(0)) + .ok() + .flatten() + }) +} + +fn load_group_member_username_set( + conn: &Connection, + chat_username: &str, +) -> Option> { + let room_id: i64 = [ + "SELECT id FROM chat_room WHERE username = ?", + "SELECT id FROM chat_room WHERE chat_room_name = ?", + "SELECT id FROM chat_room WHERE name = ?", + ].iter().find_map(|sql| { + conn.query_row(sql, [chat_username], |row| row.get::<_, i64>(0)).ok() + }).unwrap_or(0); + + if room_id == 0 { + return None; + } + + let mut stmt = conn.prepare( + "SELECT c.username + FROM chatroom_member cm + LEFT JOIN contact c ON c.id = cm.member_id + WHERE cm.room_id = ?" + ).ok()?; + let usernames: HashSet = stmt.query_map([room_id], |row| { + row.get::<_, String>(0) + }).ok()? + .filter_map(|r| r.ok()) + .filter(|uid| !uid.is_empty()) + .collect(); + + if usernames.is_empty() { None } else { Some(usernames) } +} + +fn decode_proto_varint(raw: &[u8], offset: usize) -> Option<(u64, usize)> { + let mut value = 0u64; + let mut shift = 0u32; + let mut pos = offset; + while pos < raw.len() { + let byte = raw[pos]; + pos += 1; + value |= u64::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some((value, pos)); + } + shift += 7; + if shift > 63 { + return None; + } + } + None +} + +fn proto_len_fields<'a>(raw: &'a [u8]) -> Vec<(u64, &'a [u8])> { + let mut fields = Vec::new(); + let mut idx = 0usize; + while idx < raw.len() { + let Some((tag, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + let field_no = tag >> 3; + let wire_type = tag & 0x07; + match wire_type { + 0 => { + let Some((_, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + } + 1 => { + let Some(next) = idx.checked_add(8) else { break; }; + if next > raw.len() { break; } + idx = next; + } + 2 => { + let Some((size, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + let Ok(size) = usize::try_from(size) else { break; }; + let Some(end) = idx.checked_add(size) else { break; }; + if end > raw.len() { break; } + fields.push((field_no, &raw[idx..end])); + idx = end; + } + 5 => { + let Some(next) = idx.checked_add(4) else { break; }; + if next > raw.len() { break; } + idx = next; + } + _ => break, + } + } + fields +} + +fn proto_string_fields(raw: &[u8]) -> Vec<(u64, String)> { + proto_len_fields(raw) + .into_iter() + .filter_map(|(field_no, value)| { + if value.is_empty() || value.len() > 256 { + return None; + } + let text = std::str::from_utf8(value).ok()?.trim().to_string(); + if text.is_empty() || text.chars().any(char::is_control) { + return None; + } + Some((field_no, text)) + }) + .collect() +} + +fn is_strong_username_hint(value: &str) -> bool { + value.starts_with("wxid_") + || value.ends_with("@chatroom") + || value.starts_with("gh_") + || value.contains('@') +} + +fn looks_like_username(value: &str) -> bool { + let value = value.trim(); + if value.is_empty() { + return false; + } + if is_strong_username_hint(value) { + return true; + } + if value.len() < 6 || value.len() > 32 || value.chars().any(char::is_whitespace) { + return false; + } + let mut chars = value.chars(); + let Some(first) = chars.next() else { return false; }; + first.is_ascii_alphabetic() + && chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') +} + +fn pick_member_username( + strings: &[(u64, String)], + targets: Option<&HashSet>, +) -> Option { + if let Some(targets) = targets { + return strings + .iter() + .find(|(_, value)| targets.contains(value)) + .map(|(_, value)| value.clone()); + } + + for field_no in [1u64, 4u64] { + if let Some((_, value)) = strings + .iter() + .find(|(f, value)| *f == field_no && looks_like_username(value)) + { + return Some(value.clone()); + } + } + + strings + .iter() + .find(|(_, value)| is_strong_username_hint(value)) + .or_else(|| strings.iter().find(|(_, value)| looks_like_username(value))) + .map(|(_, value)| value.clone()) +} + +fn pick_group_nickname(strings: &[(u64, String)], username: &str) -> Option { + let mut best_score = i64::MIN; + let mut best = String::new(); + + for (idx, (field_no, value)) in strings.iter().enumerate() { + let value = value.trim(); + if value.is_empty() + || value == username + || is_strong_username_hint(value) + || value.contains('\n') + || value.contains('\r') + || value.len() > 64 + { + continue; + } + + let mut score = 0i64; + if *field_no == 2 { + score += 100; + } + if !looks_like_username(value) { + score += 20; + } + score += (32usize.saturating_sub(value.len())) as i64; + score = score * 1000 - idx as i64; + + if score > best_score { + best_score = score; + best = value.to_string(); + } + } + + if best.is_empty() { None } else { Some(best) } +} + +fn parse_group_nickname_map( + ext_buffer: &[u8], + targets: Option<&HashSet>, +) -> HashMap { + let mut out = HashMap::new(); + if ext_buffer.is_empty() { + return out; + } + + for (_, chunk) in proto_len_fields(ext_buffer) { + let strings = proto_string_fields(chunk); + if strings.is_empty() { + continue; + } + let Some(username) = pick_member_username(&strings, targets) else { + continue; + }; + if out.contains_key(&username) { + continue; + } + if let Some(nickname) = pick_group_nickname(&strings, &username) { + out.insert(username, nickname); + } + } + + out +} + +fn contact_display( + uid: &str, + nick: &str, + remark: &str, + names_map: &HashMap, +) -> String { + if !remark.is_empty() { + remark.to_string() + } else if !nick.is_empty() { + nick.to_string() + } else { + names_map.get(uid).cloned().unwrap_or_else(|| uid.to_string()) + } +} + +fn sender_display( + username: &str, + fallback_sender_name: &str, + names: &HashMap, + group_nicknames: &HashMap, +) -> String { + if username.is_empty() { + return String::new(); + } + group_nicknames + .get(username) + .filter(|s| !s.is_empty()) + .cloned() + .or_else(|| names.get(username).cloned()) + .or_else(|| { + if fallback_sender_name.is_empty() { + None + } else { + Some(fallback_sender_name.to_string()) + } + }) + .unwrap_or_else(|| username.to_string()) +} + +fn group_top_senders( + sender_counts: &HashMap, + names: &HashMap, + group_nicknames: &HashMap, + limit: usize, +) -> Vec { + let mut top_senders: Vec = sender_counts.iter() + .map(|(username, count)| json!({ + "sender": sender_display(username, "", names, group_nicknames), + "count": count, + })) + .collect(); + top_senders.sort_by(|a, b| { + b["count"].as_i64().unwrap_or(0) + .cmp(&a["count"].as_i64().unwrap_or(0)) + .then_with(|| { + a["sender"].as_str().unwrap_or("") + .cmp(b["sender"].as_str().unwrap_or("")) + }) + }); + top_senders.truncate(limit); + top_senders +} + fn sender_label( real_sender_id: i64, content: &str, @@ -625,15 +1018,16 @@ fn sender_label( chat_username: &str, id2u: &HashMap, names: &HashMap, + group_nicknames: &HashMap, ) -> String { let sender_uname = id2u.get(&real_sender_id).cloned().unwrap_or_default(); if is_group { if !sender_uname.is_empty() && sender_uname != chat_username { - return names.get(&sender_uname).cloned().unwrap_or(sender_uname); + return sender_display(&sender_uname, "", names, group_nicknames); } if content.contains(":\n") { let raw = content.splitn(2, ":\n").next().unwrap_or(""); - return names.get(raw).cloned().unwrap_or_else(|| raw.to_string()); + return sender_display(raw, "", names, group_nicknames); } return String::new(); } @@ -904,6 +1298,7 @@ pub async fn q_unread( }).await??; let mut results = Vec::new(); + let mut group_nickname_cache: HashMap> = HashMap::new(); for (username, unread, summary_bytes, ts, msg_type, sender, sender_name) in rows { let chat_type = chat_type_of(&username, names); if let Some(ref set) = filter_set { @@ -916,9 +1311,13 @@ pub async fn q_unread( let summary = decompress_or_str(&summary_bytes); let summary = strip_group_prefix(&summary); let sender_display = if is_group && !sender.is_empty() { - names.map.get(&sender).cloned().unwrap_or_else(|| { - if !sender_name.is_empty() { sender_name.clone() } else { sender.clone() } - }) + if !group_nickname_cache.contains_key(&username) { + let nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); + group_nickname_cache.insert(username.clone(), nicknames); + } + let empty = HashMap::new(); + let group_nicknames = group_nickname_cache.get(&username).unwrap_or(&empty); + sender_display(&sender, &sender_name, &names.map, group_nicknames) } else { String::new() }; @@ -955,7 +1354,6 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result // 优先路径:contact.db → chatroom_member + chat_room(完整成员列表) if let Some(contact_p) = db.get("contact/contact.db").await? { let uname2 = username.clone(); - let display2 = display.clone(); let names_map2 = names_map.clone(); let members_opt: Option> = tokio::task::spawn_blocking(move || { @@ -1008,12 +1406,31 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result return Ok(None); } + let target_usernames: HashSet = raw.iter() + .map(|(uid, _, _)| uid.clone()) + .collect(); + let group_nicknames = load_group_nickname_map_from_conn( + &conn, + &uname2, + Some(&target_usernames), + ); + let mut members: Vec = raw.iter().map(|(uid, nick, remark)| { - let disp = if !remark.is_empty() { remark.clone() } - else if !nick.is_empty() { nick.clone() } - else { names_map2.get(uid).cloned().unwrap_or_else(|| uid.clone()) }; + let contact_display = contact_display(uid, nick, remark, &names_map2); + let group_nickname = group_nicknames.get(uid).cloned().unwrap_or_default(); + let disp = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; let is_owner = uid == &owner && !owner.is_empty(); - json!({ "username": uid, "display": disp, "is_owner": is_owner }) + json!({ + "username": uid, + "display": disp, + "contact_display": contact_display, + "group_nickname": group_nickname, + "is_owner": is_owner, + }) }).collect(); // 群主排首位,其余按 display 字典序 @@ -1024,7 +1441,6 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result a["display"].as_str().unwrap_or("").cmp(b["display"].as_str().unwrap_or("")) }); - let _ = display2; // 不在此 closure 内使用 Ok(Some(members)) }).await??; @@ -1075,10 +1491,20 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result sender_set.extend(senders); } + let group_nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); let mut members: Vec = sender_set.iter().map(|u| { + let contact_display = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); + let group_nickname = group_nicknames.get(u).cloned().unwrap_or_default(); + let display = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; json!({ "username": u, - "display": names_map.get(u).cloned().unwrap_or_else(|| u.clone()), + "display": display, + "contact_display": contact_display, + "group_nickname": group_nickname, "is_owner": false, }) }).collect(); @@ -1163,6 +1589,11 @@ pub async fn q_new_messages( let display = names.display(uname); let chat_type = chat_type_of(uname, names); let is_group = chat_type == "group"; + let group_nicknames = if is_group { + load_group_nicknames(db, uname).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); @@ -1170,6 +1601,7 @@ pub async fn q_new_messages( let uname2 = uname.clone(); let display2 = display.clone(); let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); let tname_for_log = tname.clone(); let msgs: Vec = match tokio::task::spawn_blocking(move || { @@ -1201,7 +1633,7 @@ pub async fn q_new_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map); + let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2); let text = fmt_content(local_id, local_type, &content, is_group); result.push(json!({ "chat": display2, @@ -1376,13 +1808,17 @@ pub async fn q_stats( let mut type_counts: HashMap = HashMap::new(); let mut sender_counts: HashMap = HashMap::new(); let mut hour_counts = [0i64; 24]; + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); let tname = table_name.clone(); let uname = username.clone(); let is_group2 = is_group; - let names_map = names.map.clone(); // 用 SQL GROUP BY 在数据库侧聚合,避免把全量消息内容加载进内存 let result: (i64, HashMap, HashMap, [i64; 24]) = @@ -1469,8 +1905,7 @@ pub async fn q_stats( for (id, cnt) in rows.flatten() { if let Some(u) = id2u.get(&id) { if u != &uname { - let name = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); - *sender_c.entry(name).or_insert(0) += cnt; + *sender_c.entry(u.clone()).or_insert(0) += cnt; } } } @@ -1495,11 +1930,7 @@ pub async fn q_stats( by_type.sort_by_key(|v| std::cmp::Reverse(v["count"].as_i64().unwrap_or(0))); // 发言排行,Top 10 - let mut top_senders: Vec = sender_counts.iter() - .map(|(s, c)| json!({ "sender": s, "count": c })) - .collect(); - top_senders.sort_by_key(|v| std::cmp::Reverse(v["count"].as_i64().unwrap_or(0))); - top_senders.truncate(10); + let top_senders = group_top_senders(&sender_counts, &names.map, &group_nicknames, 10); // 24小时分布 let by_hour: Vec = hour_counts.iter().enumerate() @@ -2001,6 +2432,104 @@ pub async fn q_sns_search( Ok(json!({ "keyword": keyword, "posts": posts, "total": total })) } +#[cfg(test)] +mod group_nickname_tests { + use super::*; + + fn varint(mut value: u64) -> Vec { + let mut out = Vec::new(); + loop { + let mut byte = (value & 0x7f) as u8; + value >>= 7; + if value != 0 { + byte |= 0x80; + } + out.push(byte); + if value == 0 { + return out; + } + } + } + + fn len_field(field_no: u64, bytes: &[u8]) -> Vec { + let mut out = varint((field_no << 3) | 2); + out.extend(varint(bytes.len() as u64)); + out.extend(bytes); + out + } + + fn string_field(field_no: u64, value: &str) -> Vec { + len_field(field_no, value.as_bytes()) + } + + fn member_chunk(username: &str, group_nickname: &str) -> Vec { + let mut member = Vec::new(); + member.extend(string_field(1, username)); + member.extend(string_field(2, group_nickname)); + len_field(1, &member) + } + + #[test] + fn parses_group_nickname_member_chunks() { + let mut ext_buffer = Vec::new(); + ext_buffer.extend(member_chunk("wxid_alice", "Alice In Group")); + ext_buffer.extend(member_chunk("bob_123456", "Bob Card")); + + let nicknames = parse_group_nickname_map(&ext_buffer, None); + + assert_eq!( + nicknames.get("wxid_alice").map(String::as_str), + Some("Alice In Group") + ); + assert_eq!( + nicknames.get("bob_123456").map(String::as_str), + Some("Bob Card") + ); + } + + #[test] + fn target_filter_anchors_member_username_choice() { + let mut member = Vec::new(); + member.extend(string_field(3, "candidate_name")); + member.extend(string_field(4, "wxid_target")); + member.extend(string_field(2, "Target Card")); + let ext_buffer = len_field(1, &member); + let targets = HashSet::from(["wxid_target".to_string()]); + + let nicknames = parse_group_nickname_map(&ext_buffer, Some(&targets)); + + assert_eq!( + nicknames.get("wxid_target").map(String::as_str), + Some("Target Card") + ); + assert!(!nicknames.contains_key("candidate_name")); + } + + #[test] + fn group_top_senders_keeps_duplicate_display_names_separate() { + let sender_counts = HashMap::from([ + ("wxid_alice".to_string(), 7), + ("wxid_bob".to_string(), 3), + ]); + let names = HashMap::from([ + ("wxid_alice".to_string(), "Alice Contact".to_string()), + ("wxid_bob".to_string(), "Bob Contact".to_string()), + ]); + let group_nicknames = HashMap::from([ + ("wxid_alice".to_string(), "同名".to_string()), + ("wxid_bob".to_string(), "同名".to_string()), + ]); + + let top = group_top_senders(&sender_counts, &names, &group_nicknames, 10); + + assert_eq!(top.len(), 2); + assert_eq!(top[0]["sender"].as_str(), Some("同名")); + assert_eq!(top[0]["count"].as_i64(), Some(7)); + assert_eq!(top[1]["sender"].as_str(), Some("同名")); + assert_eq!(top[1]["count"].as_i64(), Some(3)); + } +} + #[cfg(test)] mod sns_tests { use super::*; From b0431352ce009f8646bd18256e22884b718d5ed3 Mon Sep 17 00:00:00 2001 From: Haoqing Wang <78337154+hqhq1025@users.noreply.github.com> Date: Thu, 14 May 2026 14:42:03 +0800 Subject: [PATCH 03/20] =?UTF-8?q?feat(appmsg):=20=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=BC=95=E7=94=A8=E6=B6=88=E6=81=AF=E5=8E=9F=E6=96=87=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=20(#28)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(appmsg): parse quoted message content * docs(appmsg): document quote message output --- README.md | 9 ++ SKILL.md | 9 ++ src/daemon/query.rs | 327 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 317 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index d084301..bec8f65 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,15 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 +引用消息会在 `history` / `search` / `new-messages` 输出中显示当前回复和被引用原文: + +```text +[引用] 当前回复 + ↳ 发送者: 被引用内容 +``` + +`--type link` / `--type file` 会包含微信 appmsg 里的链接、文件、合并聊天记录和引用消息等变体;搜索时也会匹配解压后可见的引用原文。 + ### 朋友圈(SNS) 三个独立命令,区分"通知"和"帖子": diff --git a/SKILL.md b/SKILL.md index 386816f..ec02ce8 100644 --- a/SKILL.md +++ b/SKILL.md @@ -140,6 +140,15 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 +引用消息(appmsg `type=57`)在 `history` / `search` / `new-messages` 输出里会展开为两行:第一行是当前回复,第二行以 `↳` 开头显示被引用原文,例如: + +```text +[引用] 当前回复 + ↳ 发送者: 被引用内容 +``` + +`--type link` / `--type file` 会覆盖微信 appmsg 的链接、文件、合并聊天记录和引用消息等变体;`search --type link` 也会匹配解压并格式化后的引用原文。 + ### 联系人与群组 ```bash diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 041ff0b..0c4b106 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -500,19 +500,18 @@ fn query_messages( let conn = Connection::open(db_path)?; let id2u = load_id2u(&conn); - let mut clauses = Vec::new(); + let mut clauses: Vec = Vec::new(); let mut params: Vec> = Vec::new(); if let Some(s) = since { - clauses.push("create_time >= ?"); + clauses.push("create_time >= ?".into()); params.push(Box::new(s)); } if let Some(u) = until { - clauses.push("create_time <= ?"); + clauses.push("create_time <= ?".into()); params.push(Box::new(u)); } if let Some(t) = msg_type { - clauses.push("local_type = ?"); - params.push(Box::new(t)); + push_msg_type_filter(&mut clauses, &mut params, t); } let where_clause = if clauses.is_empty() { String::new() @@ -579,8 +578,14 @@ fn search_in_table( let id2u = load_id2u(conn); // 转义 LIKE 通配符,使用 '\' 作为 ESCAPE 字符 let escaped_kw = keyword.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_"); - let mut clauses = vec!["message_content LIKE ? ESCAPE '\\'".to_string()]; - let mut params: Vec> = vec![Box::new(format!("%{}%", escaped_kw))]; + let search_decoded_content = msg_type == Some(49); + let keyword_lower = keyword.to_lowercase(); + let mut clauses: Vec = Vec::new(); + let mut params: Vec> = Vec::new(); + if !search_decoded_content { + clauses.push("message_content LIKE ? ESCAPE '\\'".to_string()); + params.push(Box::new(format!("%{}%", escaped_kw))); + } if let Some(s) = since { clauses.push("create_time >= ?".into()); params.push(Box::new(s)); @@ -590,17 +595,23 @@ fn search_in_table( params.push(Box::new(u)); } if let Some(t) = msg_type { - clauses.push("local_type = ?".into()); - params.push(Box::new(t)); + push_msg_type_filter(&mut clauses, &mut params, t); } - let where_clause = format!("WHERE {}", clauses.join(" AND ")); + let where_clause = if clauses.is_empty() { + String::new() + } else { + format!("WHERE {}", clauses.join(" AND ")) + }; + let limit_clause = if search_decoded_content { "" } else { " LIMIT ?" }; let sql = format!( "SELECT local_id, local_type, create_time, real_sender_id, message_content, WCDB_CT_message_content - FROM [{}] {} ORDER BY create_time DESC LIMIT ?", - table, where_clause + FROM [{}] {} ORDER BY create_time DESC{}", + table, where_clause, limit_clause ); - params.push(Box::new(limit as i64)); + if !search_decoded_content { + params.push(Box::new(limit as i64)); + } let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; @@ -622,6 +633,9 @@ fn search_in_table( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); + if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) { + continue; + } result.push(json!({ "timestamp": ts, @@ -631,10 +645,32 @@ fn search_in_table( "content": text, "type": fmt_type(local_type), })); + if search_decoded_content && result.len() >= limit { + break; + } } Ok(result) } +fn push_msg_type_filter( + clauses: &mut Vec, + params: &mut Vec>, + msg_type: i64, +) { + clauses.push("(local_type & 4294967295) = ?".into()); + params.push(Box::new(msg_type)); +} + +fn matches_search_text(raw: &str, formatted: &str, keyword: &str, keyword_lower: &str) -> bool { + contains_search_text(raw, keyword, keyword_lower) + || contains_search_text(formatted, keyword, keyword_lower) +} + +fn contains_search_text(haystack: &str, keyword: &str, keyword_lower: &str) -> bool { + haystack.contains(keyword) + || (!keyword_lower.is_empty() && haystack.to_lowercase().contains(keyword_lower)) +} + fn load_id2u(conn: &Connection) -> HashMap { let mut map = HashMap::new(); if let Ok(mut stmt) = conn.prepare("SELECT rowid, user_name FROM Name2Id") { @@ -1163,21 +1199,8 @@ fn parse_appmsg(text: &str) -> Option { match atype.as_str() { "6" => Some(if !title.is_empty() { format!("[文件] {}", title) } else { "[文件]".into() }), "57" => { - let ref_content = extract_xml_text(text, "content") - .map(|s| { - // content 可能是 HTML 转义的 XML(被引用的消息是 appmsg 时) - let unescaped = unescape_html(&s); - // 如果解转义后是 XML,尝试递归解析 - if unescaped.contains(">().join(" "); - if s.chars().count() > 40 { - format!("{}...", s.chars().take(40).collect::()) - } else { s } - }) + let ref_content = quote_refermsg_content(text) + .or_else(|| extract_xml_text(text, "content").and_then(|s| quote_content_text(&s, 40))) .unwrap_or_default(); let quote = if !title.is_empty() { format!("[引用] {}", title) } else { "[引用]".into() }; if !ref_content.is_empty() { @@ -1191,6 +1214,56 @@ fn parse_appmsg(text: &str) -> Option { } } +fn quote_refermsg_content(text: &str) -> Option { + let refer = extract_xml_text(text, "refermsg")?; + let content = extract_xml_text(&refer, "content") + .and_then(|s| quote_content_text(&s, 80)) + .or_else(|| { + extract_xml_text(&refer, "type") + .and_then(|t| quote_refermsg_type_label(&t).map(str::to_string)) + })?; + match extract_xml_text(&refer, "displayname") { + Some(name) if !name.is_empty() => Some(format!("{}: {}", name, content)), + _ => Some(content), + } +} + +fn quote_content_text(raw: &str, max_chars: usize) -> Option { + let unescaped = unescape_html(raw); + if unescaped.contains(" Option<&'static str> { + match t { + "1" => None, + "3" => Some("[图片]"), + "34" => Some("[语音]"), + "43" => Some("[视频]"), + "47" => Some("[表情]"), + "49" => Some("[链接/文件]"), + _ => None, + } +} + +fn collapse_text(text: &str, max_chars: usize) -> String { + let collapsed = text.split_whitespace().collect::>().join(" "); + if collapsed.chars().count() > max_chars { + format!("{}...", collapsed.chars().take(max_chars).collect::()) + } else { + collapsed + } +} + fn extract_xml_text(xml: &str, tag: &str) -> Option { let open = format!("<{}>", tag); let close = format!("", tag); @@ -1223,6 +1296,204 @@ fn unescape_html(s: &str) -> String { .replace("'", "'") } +#[cfg(test)] +mod appmsg_tests { + use super::*; + + #[test] + fn parse_quote_appmsg_reads_refermsg_content() { + let xml = r#" + + + 我也没有用ai啊 + 57 + + + 1 + 不再熬夜 + 昨天用 claude 爬小红书数据来着 + + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn query_messages_filters_appmsg_by_base_type() { + let path = temp_db_path("query_messages_filters_appmsg_by_base_type"); + { + let conn = Connection::open(&path).expect("open temp db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content TEXT, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#, + 0_i64 + ], + ) + .expect("insert quote message"); + } + + let rows = query_messages( + &path, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + None, + None, + Some(49), + 10, + 0, + ) + .expect("query messages"); + + let _ = std::fs::remove_file(&path); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn search_in_table_filters_appmsg_by_base_type() { + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content TEXT, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#, + 0_i64 + ], + ) + .expect("insert quote message"); + + let rows = search_in_table( + &conn, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + "claude", + None, + None, + Some(49), + 10, + ) + .expect("search messages"); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn search_in_table_matches_decompressed_formatted_appmsg_content() { + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content BLOB, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + let xml = r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#; + let compressed = zstd::encode_all(xml.as_bytes(), 0).expect("compress appmsg xml"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + compressed, + 4_i64 + ], + ) + .expect("insert compressed quote message"); + + let rows = search_in_table( + &conn, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + "claude", + None, + None, + Some(49), + 10, + ) + .expect("search messages"); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + fn temp_db_path(name: &str) -> std::path::PathBuf { + let unique = format!( + "wx-cli-{}-{}-{}.db", + name, + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock before unix epoch") + .as_nanos() + ); + std::env::temp_dir().join(unique) + } +} + fn fmt_time(ts: i64, fmt: &str) -> String { Local.timestamp_opt(ts, 0) .single() From 1b00d04598017392994fc09ea1009a7de4b5c990 Mon Sep 17 00:00:00 2001 From: Tsing Date: Thu, 14 May 2026 14:46:34 +0800 Subject: [PATCH 04/20] feat: expose url field for link/appmsg messages (#18) * feat: expose url field for link/appmsg messages Extract from appmsg XML in type-49 messages and append it as a 'url' field in history/search output. The field is omitted when the message has no valid URL (non-link types, empty, non-http). * fix: normalize appmsg urls across query outputs --------- Co-authored-by: tsinghu Co-authored-by: jackwener --- src/daemon/query.rs | 123 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 6 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 0c4b106..8aa14c1 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -549,15 +549,20 @@ fn query_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "sender": sender, "content": text, "type": fmt_type(local_type), "local_id": local_id, - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -636,15 +641,20 @@ fn search_in_table( if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) { continue; } + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "chat": "", "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); if search_decoded_content && result.len() >= limit { break; } @@ -1273,6 +1283,37 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { + if (local_type as u64 & 0xFFFFFFFF) != 49 { + return None; + } + extract_appmsg_url(content) +} + +fn strip_xml_cdata(s: &str) -> &str { + s.strip_prefix("")) + .unwrap_or(s) +} + +/// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) +fn extract_appmsg_url(text: &str) -> Option { + let xml = strip_group_prefix(text); + if !xml.contains(" Option { let open = format!("<{}", tag); let start = xml.find(&open)?; @@ -1906,7 +1947,8 @@ pub async fn q_new_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2); let text = fmt_content(local_id, local_type, &content, is_group); - result.push(json!({ + let url = appmsg_url_for_message(local_type, &content); + let mut msg = json!({ "chat": display2, "username": uname2, "is_group": is_group, @@ -1916,7 +1958,11 @@ pub async fn q_new_messages( "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok::<_, anyhow::Error>(result) }).await { @@ -2926,6 +2972,71 @@ mod sns_tests { assert_eq!(escape_like_pattern(""), ""); } + #[test] + fn extract_appmsg_url_unescapes_html_entities() { + let xml = concat!( + "", + "5", + "https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1") + ); + } + + #[test] + fn extract_appmsg_url_strips_group_prefix_and_cdata() { + let xml = concat!( + "wxid_sender:\n", + "", + "5", + "", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/x?a=1&b=2") + ); + } + + #[test] + fn extract_appmsg_url_falls_back_to_url1() { + let xml = concat!( + "", + "5", + "https://example.com/fallback", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/fallback") + ); + } + + #[test] + fn extract_appmsg_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://bizmsgmenu?msgmenucontent=foo", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_appmsg_url_ignores_refermsg() { + let xml = concat!( + "", + "57", + "https://example.com/nested", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") } From 9d5a78ac04b74d7210819f26793ec9569a4dadef Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 15:13:50 +0800 Subject: [PATCH 05/20] docs(macOS): document TCC csreq invalidation after re-signing WeChat (#48) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macOS TCC binds permissions to (bundle id, csreq) where csreq encodes the app's code signature. `codesign --force --deep --sign -` on WeChat changes the csreq, silently invalidating every existing TCC grant for com.tencent.xinWeChat — yet System Settings still paints each toggle as ON because the UI only checks bundle id, hiding the drift. WeChat then reprompts for screen recording / camera / microphone / file access despite "looking allowed". Three doc-only updates, no code changes: - README.md quick start: add the `tccutil reset` loop right after the codesign step, plus a one-line callout pointing at the deep-dive section. - SKILL.md macOS init flow: same loop in the agent-readable order, so agents executing the steps don't skip it. - docs/macos-permission-guide.md: new section 五 with first-principles root cause, the reset loop, the macOS 26 "录屏与系统录音 / 仅系统 录音" UI split footgun, and ad-hoc signature verification. Builds on the BobbyCat PR #29 — keeps the symptom description and the macOS 26 UI split note, expands scope from ScreenCapture-only to all TCC services that re-signing actually breaks (Camera / Microphone / AppleEvents / AddressBook / Documents / Downloads / Desktop), drops the misleading TCC.db sqlite query (path varies by macOS version, can need FDA, and is no more useful than just trying WeChat's screenshot again), and explicitly leaves the reset as a manual step rather than auto-running it from `wx init` because it would wipe currently-working grants. Co-authored-by: BobbyCat <114374951+BobbyCats@users.noreply.github.com> --- README.md | 12 +++++- SKILL.md | 23 +++++++++- docs/macos-permission-guide.md | 76 ++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bec8f65..b9783ed 100644 --- a/README.md +++ b/README.md @@ -100,10 +100,16 @@ cargo build --release # 1. 签名(只需做一次,WeChat 更新后重做) codesign --force --deep --sign - /Applications/WeChat.app -# 2. 重启微信,等待完全登录 +# 2. 清理旧 TCC 授权记录(重签名后必做,否则微信截图/通话权限可能 silent 失效) +for s in ScreenCapture Camera Microphone AppleEvents AddressBook \ + SystemPolicyDocumentsFolder SystemPolicyDownloadsFolder SystemPolicyDesktopFolder; do + tccutil reset "$s" com.tencent.xinWeChat +done + +# 3. 重启微信,等待完全登录 killall WeChat && open /Applications/WeChat.app -# 3. 初始化 +# 4. 初始化 sudo wx init ``` @@ -112,6 +118,8 @@ sudo wx init > codesign --remove-signature "/Applications/WeChat.app/Contents/Frameworks/vlc_plugins/librtp_mpeg4_plugin.dylib" > codesign --force --deep --sign - /Applications/WeChat.app > ``` +> +> 重签名后 macOS 的 TCC 隐私授权按新 code signature 重新校验,旧记录会失效。如果跳过 `tccutil reset`,微信截图/视频通话/麦克风等权限可能"看起来已开启但实际拒绝"。详见 [macOS 权限与签名指南](docs/macos-permission-guide.md#五重签名后微信权限-silent-失效)。 **Linux** diff --git a/SKILL.md b/SKILL.md index ec02ce8..7d587af 100644 --- a/SKILL.md +++ b/SKILL.md @@ -66,14 +66,33 @@ codesign --remove-signature "/Applications/WeChat.app/Contents/Frameworks/vlc_pl codesign --force --deep --sign - /Applications/WeChat.app ``` -**第二步:重启 WeChat** +**第二步:清理 WeChat 在 macOS TCC 隐私数据库里的旧授权记录**(重签名后必做) + +macOS TCC 按 `bundle id + csreq` 联合校验权限;csreq 编码自代码签名。重签名后旧 csreq 和新签名不再匹配,旧授权记录会 silent 失效(System Settings 仍把开关画成"已允许",运行时实际拒绝)。把 WeChat 在 TCC 里的旧记录抹掉,让 macOS 在下次微信请求权限时按新签名重新生成 csreq: + +```bash +tccutil reset ScreenCapture com.tencent.xinWeChat # 截图 / 屏幕共享 +tccutil reset Camera com.tencent.xinWeChat # 视频通话 / 扫码 +tccutil reset Microphone com.tencent.xinWeChat # 语音消息 / 通话 +tccutil reset AppleEvents com.tencent.xinWeChat # 自动化 / 输入法 +tccutil reset AddressBook com.tencent.xinWeChat # 通讯录 +tccutil reset SystemPolicyDocumentsFolder com.tencent.xinWeChat +tccutil reset SystemPolicyDownloadsFolder com.tencent.xinWeChat +tccutil reset SystemPolicyDesktopFolder com.tencent.xinWeChat +``` + +`tccutil` 对没有授权过的 service 会报 "No such bundle identifier",是 no-op,不影响其他 service 的 reset。 + +**第三步:重启 WeChat** ```bash killall WeChat && open /Applications/WeChat.app # 等待微信完全登录后再继续 ``` -**第三步:初始化** +之后微信触发权限请求时按 GUI 提示重新允许即可。在 macOS 26 上,把 WeChat 加进 **隐私与安全 → 录屏与系统录音** 的上半区,**不要**只勾下半区的"仅系统录音"——后者不能授予截图权限。 + +**第四步:初始化** ```bash sudo wx init diff --git a/docs/macos-permission-guide.md b/docs/macos-permission-guide.md index e5ee463..322cb90 100644 --- a/docs/macos-permission-guide.md +++ b/docs/macos-permission-guide.md @@ -196,3 +196,79 @@ open /Applications/WeChat.app | "SIP 阻止了调试微信" | ❌ SIP 只保护系统进程,微信不受 SIP 保护 | | "加了 sshd 到 FDA 就行" | ❌ 还需要加 `sshd-keygen-wrapper`,且要重连 SSH | | "微信开着也能重签名" | ❌ 运行中的 binary/dylib 被占用,codesign 会失败 | + +--- + +## 五、重签名后微信权限 silent 失效 + +### 现象 + +完成 ad-hoc 重签名后,微信任意以下功能都可能"看起来已授权但实际被拒绝": + +- 截图 / 屏幕共享(`ScreenCapture`) +- 视频通话 / 扫码(`Camera`) +- 语音消息 / 通话(`Microphone`) +- 自动化、第三方输入法(`AppleEvents`) +- 同步通讯录(`AddressBook`) +- 文件发送 / 接收(`SystemPolicyDocumentsFolder` / `Downloads` / `Desktop`) + +System Settings 里通常仍看到"微信.app"开关是 ON,但运行时权限校验失败。微信会反复弹"需要开启 X 权限"。 + +### 根因(第一性原理) + +macOS TCC(Transparency, Consent, and Control)按 **bundle id + csreq** 联合校验权限。`csreq`(code requirement)是从 app 的 code signature 推导出的二进制 blob,存在 `/Library/Application Support/com.apple.TCC/TCC.db` 的 `access` 表里,每条 ~160 字节。 + +`codesign --force --deep --sign -` 把 WeChat 从官方签名换成 ad-hoc 签名(甚至 ad-hoc → ad-hoc 重签也会变),新进程的 csreq 跟旧记录里那条对不上 —— tccd 拒绝。 + +System Settings UI 只按 client 显示开关、不重算 csreq,所以视觉上是"已授权",运行时实际拒绝。这是 silent drift。 + +### 修复步骤 + +把 WeChat 在 TCC 里的旧记录全部抹掉,让 macOS 在下次微信请求权限时按新签名重新生成 csreq: + +```bash +for s in ScreenCapture Camera Microphone AppleEvents AddressBook \ + SystemPolicyDocumentsFolder SystemPolicyDownloadsFolder SystemPolicyDesktopFolder; do + tccutil reset "$s" com.tencent.xinWeChat +done +``` + +`tccutil` 对没有授权过的 service 会报 "No such bundle identifier",这是 no-op,不影响其他 service 的 reset。 + +之后退出并重新打开微信,按 GUI 提示重新允许: + +```bash +killall WeChat +open /Applications/WeChat.app +``` + +> 这一步**应当由用户/agent 手动执行**,不在 `wx init` 里自动跑——TCC 重置会让用户的现有授权失效,需要由人决定时机。 + +#### macOS 26 的 UI 拆分 + +在 macOS 26 上,**隐私与安全 → 录屏与系统录音** 显示为两块,容易踩坑: + +| 区域 | 作用 | +|------|------| +| **录屏与系统录音**(上半区) | 录制屏幕内容 + 系统音频;微信截图、屏幕共享需要这一项 | +| **仅系统录音**(下半区) | 只录系统音频;只打开这一项**不能**修复微信截图 | + +把 WeChat 加进上半区;只勾下半区的"仅系统录音"无效。 + +### 验证 + +确认 WeChat 当前是 ad-hoc 签名(这是修复前提): + +```bash +codesign -dv --verbose=4 /Applications/WeChat.app 2>&1 | grep -E "Signature|flags|TeamIdentifier" +``` + +期望看到: + +```text +flags=0x2(adhoc) +Signature=adhoc +TeamIdentifier=not set +``` + +最直接的功能验证:在微信里使用截图、视频通话、麦克风等功能,按 GUI 弹窗的"允许"重新授权一次,之后正常工作。 From c284b4ade6690c6c4e5d6559db1d5f3f8d380450 Mon Sep 17 00:00:00 2001 From: Haoqing Wang <78337154+hqhq1025@users.noreply.github.com> Date: Thu, 14 May 2026 15:29:01 +0800 Subject: [PATCH 06/20] fix: parse appmsg subtypes from type 49 messages (#24) --- src/daemon/query.rs | 200 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 2 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 8aa14c1..2d33e97 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -1202,8 +1202,25 @@ fn parse_sysmsg(xml: &str) -> Option { } fn parse_appmsg(text: &str) -> Option { - // 简单 XML 解析,避免引入重量级 XML 库(或直接用 minidom) - // 这里用基本字符串搜索实现 + if let Some(parsed) = parse_appmsg_dom(text) { + return Some(parsed); + } + parse_appmsg_legacy(text) +} + +fn parse_appmsg_dom(text: &str) -> Option { + let doc = Document::parse(text).ok()?; + let appmsg = doc.descendants().find(|node| node.has_tag_name("appmsg"))?; + let title = xml_text(xml_child(appmsg, "title")).unwrap_or_default(); + let atype = xml_text(xml_child(appmsg, "type")).unwrap_or_default(); + match atype.as_str() { + "6" => Some(format_file_appmsg(appmsg, &title)), + "19" => Some(format_record_appmsg(appmsg, &title)), + _ => None, + } +} + +fn parse_appmsg_legacy(text: &str) -> Option { let title = extract_xml_text(text, "title")?; let atype = extract_xml_text(text, "type").unwrap_or_default(); match atype.as_str() { @@ -1224,6 +1241,119 @@ fn parse_appmsg(text: &str) -> Option { } } +fn format_file_appmsg<'a, 'input>(appmsg: Node<'a, 'input>, title: &str) -> String { + let mut meta = Vec::new(); + if let Some(size) = xml_child(appmsg, "appattach") + .and_then(|attach| xml_text(xml_child(attach, "totallen"))) + .and_then(|value| value.parse::().ok()) + .filter(|size| *size > 0) + { + meta.push(format_byte_size(size)); + } + if let Some(ext) = xml_child(appmsg, "appattach") + .and_then(|attach| xml_text(xml_child(attach, "fileext"))) + .filter(|ext| !ext.is_empty()) + { + meta.push(ext); + } + + let base = if !title.is_empty() { + format!("[文件] {}", title) + } else { + "[文件]".into() + }; + if meta.is_empty() { + base + } else { + format!("{} ({})", base, meta.join(", ")) + } +} + +fn format_record_appmsg<'a, 'input>(appmsg: Node<'a, 'input>, title: &str) -> String { + let items = record_item_lines(appmsg); + let mut header = if !title.is_empty() { + format!("[合并聊天记录] {}", title) + } else { + "[合并聊天记录]".into() + }; + if !items.is_empty() { + header.push_str(&format!(" ({}条)", items.len())); + } + + let mut lines = vec![header]; + if items.is_empty() { + if let Some(desc) = xml_text(xml_child(appmsg, "des")).filter(|desc| !desc.is_empty()) { + lines.push(format!(" {}", collapse_text(&desc, 120))); + } + } else { + for item in items.iter().take(10) { + lines.push(format!(" - {}", item)); + } + if items.len() > 10 { + lines.push(format!(" - ... 还有{}条", items.len() - 10)); + } + } + lines.join("\n") +} + +fn record_item_lines<'a, 'input>(appmsg: Node<'a, 'input>) -> Vec { + let mut lines = record_item_lines_from_node(appmsg); + if !lines.is_empty() { + return lines; + } + + let Some(record_xml) = xml_text(xml_child(appmsg, "recorditem")).filter(|value| !value.is_empty()) else { + return Vec::new(); + }; + let unescaped = unescape_html(&record_xml); + for candidate in [&record_xml, &unescaped] { + if let Ok(doc) = Document::parse(candidate) { + lines = record_item_lines_from_node(doc.root_element()); + if !lines.is_empty() { + break; + } + } + } + lines +} + +fn record_item_lines_from_node<'a, 'input>(node: Node<'a, 'input>) -> Vec { + node.descendants() + .filter(|child| child.has_tag_name("dataitem")) + .filter_map(format_record_item) + .collect() +} + +fn format_record_item<'a, 'input>(item: Node<'a, 'input>) -> Option { + let name = first_child_text(item, &["sourcename", "datasrcname", "sourceusername"]); + let desc = first_child_text(item, &["datadesc", "datatitle", "datafmt"]) + .or_else(|| item.attribute("datatype").and_then(record_datatype_label).map(str::to_string))?; + let desc = collapse_text(&desc, 100); + if let Some(name) = name.filter(|value| !value.is_empty()) { + Some(format!("{}: {}", name, desc)) + } else { + Some(desc) + } +} + +fn first_child_text<'a, 'input>(node: Node<'a, 'input>, tags: &[&str]) -> Option { + tags.iter() + .find_map(|tag| xml_text(xml_child(node, tag))) + .filter(|value| !value.is_empty()) +} + +fn record_datatype_label(datatype: &str) -> Option<&'static str> { + match datatype { + "1" => Some("[文本]"), + "2" => Some("[图片]"), + "3" => Some("[语音]"), + "4" => Some("[视频]"), + "6" => Some("[文件]"), + "17" => Some("[链接]"), + _ => None, + } +} + fn quote_refermsg_content(text: &str) -> Option { let refer = extract_xml_text(text, "refermsg")?; let content = extract_xml_text(&refer, "content") @@ -1274,6 +1404,30 @@ fn collapse_text(text: &str, max_chars: usize) -> String { } } +fn format_byte_size(bytes: u64) -> String { + const KB: f64 = 1024.0; + const MB: f64 = KB * 1024.0; + const GB: f64 = MB * 1024.0; + let bytes_f = bytes as f64; + if bytes_f >= GB { + format_decimal_unit(bytes_f / GB, "GB") + } else if bytes_f >= MB { + format_decimal_unit(bytes_f / MB, "MB") + } else if bytes_f >= KB { + format_decimal_unit(bytes_f / KB, "KB") + } else { + format!("{} B", bytes) + } +} + +fn format_decimal_unit(value: f64, unit: &str) -> String { + let mut s = format!("{:.1}", value); + if s.ends_with(".0") { + s.truncate(s.len() - 2); + } + format!("{} {}", s, unit) +} + fn extract_xml_text(xml: &str, tag: &str) -> Option { let open = format!("<{}>", tag); let close = format!("", tag); @@ -1341,6 +1495,48 @@ fn unescape_html(s: &str) -> String { mod appmsg_tests { use super::*; + #[test] + fn parse_forwarded_chat_record_expands_record_items() { + let xml = r#" + + + 群聊的聊天记录 + 张三: 早上好 +李四: 收到 + 19 + <recordinfo><datalist count="2"><dataitem datatype="1"><sourcename>张三</sourcename><sourcetime>1710000000</sourcetime><datadesc>早上好 &amp; coffee</datadesc></dataitem><dataitem datatype="2"><sourcename>李四</sourcename><sourcetime>1710000060</sourcetime><datafmt>图片</datafmt><datadesc>[图片]</datadesc></dataitem></datalist></recordinfo> + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[合并聊天记录] 群聊的聊天记录 (2条)\n - 张三: 早上好 & coffee\n - 李四: [图片]") + ); + } + + #[test] + fn parse_file_appmsg_includes_attachment_metadata() { + let xml = r#" + + + report.pdf + 6 + + 1536 + pdf + + abcdef123456 + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[文件] report.pdf (1.5 KB, pdf)") + ); + } + #[test] fn parse_quote_appmsg_reads_refermsg_content() { let xml = r#" From dab3217d3f58691d4596102abc64aebf3796bd32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=BA=90=E6=B3=89?= <84364275+ChenyqThu@users.noreply.github.com> Date: Thu, 14 May 2026 01:07:39 -0700 Subject: [PATCH 07/20] feat(biz): add wx biz-articles command to query public account messages (#33) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(biz): add biz-articles command to query public account messages 加载 biz_message_0.db 提取公众号推送(标题/url/作者/时间)。 - daemon 端通过 DbCache 按需解密 biz_message_0.db(密钥已在 all_keys.json 中) - 新增 IPC 变体 BizArticles(limit/account/since/until 参数) - 新增 query 处理器 q_biz_articles: - 通过 Name2Id 反查 gh_* username → md5 → Msg_ 表映射 - 过滤 local_type & 0xFFFFFFFF = 49(appmsg 公众号文章) - zstd 解压 + extract_cdata 解析 / XML - 支持多文章推送(一条消息含多篇文章) - 输出字段:time/timestamp/recv_time/account/account_username/title/url/digest/cover_url - 新增 CLI 子命令 wx biz-articles,参数:-n / --account / --since / --until / --json - 新增工具函数 extract_cdata(CDATA 块解析)和 parse_biz_xml_items - 新增 8 个单测(biz_tests 模块)覆盖 CDATA 解析和多文章场景 支持工作流: wx biz-articles --since today --json | jq ".[].url" | xargs opencli weixin download Verified: 返朴 ADHD 文章、Datawhale Claude Code 文章、土猛员外知识引擎文章均已正确提取。 * feat(biz-articles): add --unread filter (one latest article per account) 只列「有未读的公众号」的最近 1 篇文章 — 与 'wx unread --filter official' 行为一致,便于扫描"哪些公众号还有未读,标题是啥"。 - ipc.rs: BizArticles 加 unread: bool 字段(serde default = false 向后兼容) - cli/mod.rs: --unread flag - cli/biz_articles.rs: 透传 unread - daemon/server.rs: dispatch 加 unread 参数 - daemon/query.rs: q_biz_articles - 开启 --unread 时先查 session.db 拿 unread_count>0 且 chat_type==official_account 的 username 集合 - 与 --account 取交集(两者都给时进一步缩小范围) - 空交集提前 return,避免无意义全表扫 - 解析后按 pub_time DESC 排,每个 account_username 只保留首条 - 最后再 truncate(limit) * docs: PR draft - update --unread + --until usage * chore(biz-articles): drop PR draft, document command, fix typo - 删除 PR_DRAFT.md(误入 repo 的 PR 描述草稿,不该进 main) - README.md / SKILL.md 补 biz-articles 用法 - query.rs: 密鑰 → 密钥 Co-authored-by: wx-cli-coder --------- Co-authored-by: jackwener Co-authored-by: wx-cli-coder --- README.md | 15 ++ SKILL.md | 27 +++ src/cli/biz_articles.rs | 30 +++ src/cli/mod.rs | 25 +++ src/daemon/query.rs | 391 ++++++++++++++++++++++++++++++++++++++++ src/daemon/server.rs | 6 + src/ipc.rs | 15 ++ 7 files changed, 509 insertions(+) create mode 100644 src/cli/biz_articles.rs diff --git a/README.md b/README.md index b9783ed..de816d1 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,21 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01 朋友圈数据只覆盖你本地刷到过的帖子(微信 app 按需下载)。 +### 公众号文章 + +公众号文章推送存在独立的 `biz_message_0.db`,用 `biz-articles` 单独查: + +```bash +wx biz-articles # 最近 50 篇 +wx biz-articles -n 200 # 更多 +wx biz-articles --account "返朴" # 限定公众号(名称模糊匹配) +wx biz-articles --since 2026-05-01 --until 2026-05-10 +wx biz-articles --unread # 仅有未读的公众号,每号取最新 1 篇 +wx biz-articles --json | jq '.[].url' # 下游消费 URL +``` + +每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 + ### 联系人 & 群组 ```bash diff --git a/SKILL.md b/SKILL.md index 7d587af..fe7418c 100644 --- a/SKILL.md +++ b/SKILL.md @@ -215,6 +215,33 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01 -n 50 > 只保存你本地刷到过的朋友圈(微信 app 按需下载)。没刷到过的帖子不在本地,任何命令都拿不到。 +### 公众号文章 + +公众号的文章推送存在独立的 `biz_message_0.db`,与普通 `message_0.db` 分开: + +```bash +# 最近 50 篇(默认) +wx biz-articles + +# 更多 +wx biz-articles -n 200 + +# 限定公众号(名称模糊匹配 display name / username) +wx biz-articles --account "返朴" + +# 时间范围(YYYY-MM-DD,发布时间,非接收时间) +wx biz-articles --since 2026-05-01 --until 2026-05-10 + +# 仅有未读消息的公众号,每号取最新 1 篇(适合"今天有什么新推送"扫描) +wx biz-articles --unread +wx biz-articles --unread --account "Datawhale" # 与 --account 取交集 + +# 下游消费:拿 URL 做内容抓取 +wx biz-articles --since 2026-05-10 --json | jq '.[].url' +``` + +每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 + ### 收藏与统计 ```bash diff --git a/src/cli/biz_articles.rs b/src/cli/biz_articles.rs new file mode 100644 index 0000000..0c74874 --- /dev/null +++ b/src/cli/biz_articles.rs @@ -0,0 +1,30 @@ +use anyhow::Result; +use crate::ipc::Request; +use super::history::{parse_time, parse_time_end}; +use super::transport; +use super::output::{resolve, print_value}; + +pub fn cmd_biz_articles( + limit: usize, + account: Option, + since: Option, + until: Option, + unread: bool, + json: bool, +) -> Result<()> { + let since_ts = since.as_deref().map(parse_time).transpose()?; + let until_ts = until.as_deref().map(parse_time_end).transpose()?; + + let req = Request::BizArticles { + limit, + account, + since: since_ts, + until: until_ts, + unread, + }; + let resp = transport::send(req)?; + let data = resp.data.get("articles") + .cloned() + .unwrap_or(serde_json::Value::Array(vec![])); + print_value(&data, &resolve(json)) +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 3a28060..b9e71fd 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,4 +1,5 @@ mod init; +pub mod biz_articles; pub mod sessions; pub mod history; pub mod search; @@ -220,6 +221,27 @@ enum Commands { #[arg(long)] json: bool, }, + /// 查询公众号文章推送(本地缓存) + BizArticles { + /// 显示数量 + #[arg(short = 'n', long, default_value = "50")] + limit: usize, + /// 限定公众号(名称模糊匹配) + #[arg(long)] + account: Option, + /// 起始时间 YYYY-MM-DD + #[arg(long)] + since: Option, + /// 结束时间 YYYY-MM-DD + #[arg(long)] + until: Option, + /// 只看有未读的公众号,每个公众号取最新 1 篇 + #[arg(long)] + unread: bool, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, /// 朋友圈全文搜索:匹配正文关键词 SnsSearch { /// 关键词 @@ -304,6 +326,9 @@ fn dispatch(cli: Cli) -> Result<()> { Commands::SnsSearch { keyword, limit, since, until, user, json } => { sns_search::cmd_sns_search(keyword, limit, since, until, user, json) } + Commands::BizArticles { limit, account, since, until, unread, json } => { + biz_articles::cmd_biz_articles(limit, account, since, until, unread, json) + } Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd), } } diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 2d33e97..98574ab 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -2945,6 +2945,397 @@ pub async fn q_sns_search( Ok(json!({ "keyword": keyword, "posts": posts, "total": total })) } +// ─── 公众号文章查询 ─────────────────────────────────────────────────────────── + +/// 一条公众号文章的解析产物 +#[derive(Debug)] +struct BizArticle { + /// 接收该推送的时间戳(即消息的 create_time) + recv_time: i64, + /// 公众号 username + account_username: String, + /// 文章标题 + title: String, + /// 文章链接 + url: String, + /// 摘要 + digest: String, + /// 封面图 + cover: String, + /// 文章发布时间(pub_time,单位秒) + pub_time: i64, +} + +/// 从 biz_message 表的单条 XML 解析出全部 article items +fn parse_biz_xml_items(recv_time: i64, account_username: &str, xml: &str) -> Vec { + let mut items = Vec::new(); + let mut search_from = 0; + loop { + let Some(item_start) = xml[search_from..].find("") else { break; }; + let abs_start = search_from + item_start; + let Some(item_end) = xml[abs_start..].find("") else { break; }; + let abs_end = abs_start + item_end + 7; + let item_xml = &xml[abs_start..abs_end]; + + let title = extract_cdata(item_xml, "title").unwrap_or_default(); + let url = extract_cdata(item_xml, "url").unwrap_or_default(); + // Skip items with no URL or empty title (e.g. payment entries) + if url.is_empty() || title.is_empty() { + search_from = abs_end; + continue; + } + let digest = extract_cdata(item_xml, "digest").unwrap_or_default(); + let cover = extract_cdata(item_xml, "cover").unwrap_or_default(); + let pub_time = extract_xml_text(item_xml, "pub_time") + .and_then(|s| s.parse::().ok()) + .unwrap_or(recv_time); + + items.push(BizArticle { + recv_time, + account_username: account_username.to_string(), + title, + url, + digest, + cover, + pub_time, + }); + search_from = abs_end; + } + items +} + +/// 提取 CDATA 或普通文本内容: `` 或 `...` +/// +/// 注意: 内容匹配到 `` 之前的内容。CDATA 块中的 "]]"已在 "]]\x3e" 之前, +/// 所以 inner 为 `` 或 `" 被 close tag 吸掉) +fn extract_cdata(xml: &str, tag: &str) -> Option { + let open = format!("<{}>", tag); + let close = format!("", tag); + let start = xml.find(&open)? + open.len(); + let end = xml[start..].find(&close)?; + let inner = xml[start..start + end].trim(); + if inner.starts_with("` → strip 9-char `` suffix + let body = &inner[9..]; + // Strip `]]>` (normal) or `]]` (edge case) + let cdata_end = b"]]>"; + let cdata_end2 = b"]]"; + let content: &str = if body.as_bytes().ends_with(cdata_end) { + &body[..body.len() - 3] + } else if body.as_bytes().ends_with(cdata_end2) { + &body[..body.len() - 2] + } else { + body + }; + let content = content.trim(); + if content.is_empty() { None } else { Some(content.to_string()) } + } else if inner.is_empty() { + None + } else { + Some(unescape_html(inner)) + } +} + +/// 查询公众号文章推送(biz_message_0.db) +/// +/// 每条消息可能包含多篇文章(多图文推送)。返回所有文章展开就的平底列表。 +pub async fn q_biz_articles( + db: &DbCache, + names: &Names, + limit: usize, + account: Option, + since: Option, + until: Option, + unread: bool, +) -> Result { + let biz_path = db.get("message/biz_message_0.db").await? + .context("无法解密 biz_message_0.db,请确认 all_keys.json 包含对应密钥")? +; + + // 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集, + // 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。 + let unread_usernames: Option> = if unread { + let session_path = db.get("session/session.db").await? + .context("无法解密 session.db")?; + let session_path2 = session_path.clone(); + let unread_rows: Vec = tokio::task::spawn_blocking(move || { + let conn = Connection::open(&session_path2)?; + let mut stmt = conn.prepare( + "SELECT username FROM SessionTable WHERE unread_count > 0" + )?; + let rows: Vec = stmt.query_map([], |row| row.get::<_, String>(0))? + .filter_map(|r| r.ok()) + .collect(); + Ok::<_, anyhow::Error>(rows) + }).await??; + // 仅保留公众号类型的未读会话 + let set: std::collections::HashSet = unread_rows.into_iter() + .filter(|u| chat_type_of(u, names) == "official_account") + .collect(); + if set.is_empty() { + // 没有未读公众号 → 直接空返回,避免打 biz 表扫描 + return Ok(json!({ "count": 0, "articles": [] })); + } + Some(set) + } else { + None + }; + + // 1. 从 Name2Id 表获取 rowid -> username 映射,再推导 md5 -> username + let biz_path2 = biz_path.clone(); + let id2username: HashMap = tokio::task::spawn_blocking(move || { + let conn = Connection::open(&biz_path2)?; + let mut stmt = conn.prepare("SELECT rowid, user_name FROM Name2Id WHERE user_name LIKE 'gh_%'")? + ; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) + })? + .collect::>>()?; + Ok::<_, anyhow::Error>(rows.into_iter().collect()) + }).await??; + + // 构建 md5(username) -> username 映射 + let md5_to_uname: HashMap = id2username.values() + .map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone())) + .collect(); + + // 2. 如果 指定了 --account,找到匹配的 username 列表 + let account_low = account.as_deref().map(|s| s.to_lowercase()); + let mut target_usernames: Option> = account_low.as_ref().map(|low| { + id2username.values() + .filter(|u| { + let display = names.display(u); + display.to_lowercase().contains(low.as_str()) + || u.to_lowercase().contains(low.as_str()) + }) + .cloned() + .collect() + }); + + // --unread 与 --account 取交集(进一步缩小范围) + if let Some(ref unread_set) = unread_usernames { + target_usernames = Some(match target_usernames.take() { + Some(acc_list) => acc_list.into_iter() + .filter(|u| unread_set.contains(u)) + .collect(), + None => unread_set.iter().cloned().collect(), + }); + // 交集为空 → 提前返回 + if target_usernames.as_ref().map(|v| v.is_empty()).unwrap_or(false) { + return Ok(json!({ "count": 0, "articles": [] })); + } + } + + // 3. 进行数据库查询 + let biz_path3 = biz_path.clone(); + let since2 = since; + let until2 = until; + let target_hashes: Option> = target_usernames.as_ref().map(|unames| { + unames.iter() + .map(|u| format!("{:x}", md5::compute(u.as_bytes()))) + .collect() + }); + + let rows: Vec<(String, i64, i64, Vec, i64)> = tokio::task::spawn_blocking(move || { + let conn = Connection::open(&biz_path3)?; + + // 列出所有 Msg_ 表 + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'" + )?; + let table_names: Vec = stmt.query_map([], |row| row.get(0))? + .filter_map(|r| r.ok()) + .collect(); + + let re = regex::Regex::new(r"^Msg_[0-9a-f]{32}$").unwrap(); + let mut all_rows: Vec<(String, i64, i64, Vec, i64)> = Vec::new(); + + for tname in &table_names { + if !re.is_match(tname) { continue; } + let hash = &tname[4..]; + + // account 过滤 + if let Some(ref hashes) = target_hashes { + if !hashes.iter().any(|h| h == hash) { continue; } + } + + let username = md5_to_uname.get(hash).cloned().unwrap_or_default(); + + // 构建过滤条件 + let mut clauses: Vec = Vec::new(); + let mut params: Vec> = Vec::new(); + // local_type & 0xFFFFFFFF = 49 是 appmsg(公众号文章) + clauses.push("(local_type & 4294967295) = 49".to_string()); + if let Some(s) = since2 { + clauses.push("create_time >= ?".to_string()); + params.push(Box::new(s)); + } + if let Some(u) = until2 { + clauses.push("create_time <= ?".to_string()); + params.push(Box::new(u)); + } + let where_clause = format!("WHERE {}", clauses.join(" AND ")); + + let sql = format!( + "SELECT create_time, WCDB_CT_message_content, message_content \ + FROM [{}] {} ORDER BY create_time DESC", + tname, where_clause + ); + + let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + if let Ok(mut inner_stmt) = conn.prepare(&sql) { + let msg_rows: Vec<_> = inner_stmt + .query_map(params_ref.as_slice(), |row| { + Ok(( + username.clone(), + row.get::<_, i64>(0)?, + row.get::<_, i64>(1).unwrap_or(0), + get_content_bytes(row, 2), + 0i64, + )) + }) + .map(|it| it.filter_map(|r| r.ok()).collect()) + .unwrap_or_default(); + all_rows.extend(msg_rows); + } + } + Ok::<_, anyhow::Error>(all_rows) + }).await??; + + // 4. 解压并解析 XML + let mut articles: Vec = Vec::new(); + for (username, recv_time, ct, content_bytes, _) in rows { + let content = decompress_message(&content_bytes, ct); + if content.is_empty() { continue; } + let items = parse_biz_xml_items(recv_time, &username, &content); + articles.extend(items); + } + + // 5. 按 pub_time DESC 排序 + articles.sort_by_key(|a| std::cmp::Reverse(a.pub_time)); + + // --unread 语义 A:每个公众号只保留最新 1 篇(已按 pub_time 排序,取首条即可) + if unread { + let mut seen = std::collections::HashSet::::new(); + articles.retain(|a| seen.insert(a.account_username.clone())); + } + + articles.truncate(limit); + + let results: Vec = articles.into_iter().map(|a| { + let account_display = names.display(&a.account_username); + json!({ + "time": fmt_time(a.pub_time, "%Y-%m-%d %H:%M"), + "timestamp": a.pub_time, + "recv_time": a.recv_time, + "recv_time_str": fmt_time(a.recv_time, "%Y-%m-%d %H:%M"), + "account": account_display, + "account_username": a.account_username, + "title": a.title, + "url": a.url, + "digest": a.digest, + "cover_url": a.cover, + }) + }).collect(); + + Ok(json!({ "count": results.len(), "articles": results })) +} + +#[cfg(test)] +mod biz_tests { + use super::*; + + #[test] + fn extract_cdata_normal() { + let xml = "<![CDATA[TencentResearch]]>"; + assert_eq!(extract_cdata(xml, "title"), Some("TencentResearch".into())); + } + + #[test] + fn extract_cdata_empty() { + let xml = ""; + assert_eq!(extract_cdata(xml, "cover"), None); + } + + #[test] + fn extract_cdata_url() { + let xml = ""; + let result = extract_cdata(xml, "url"); + assert!(result.is_some()); + let url = result.unwrap(); + assert!(url.starts_with("http://mp.weixin.qq.com")); + assert!(!url.contains("CDATA")); + } + + #[test] + fn extract_cdata_no_cdata_wrapper() { + let xml = "1700000000"; + assert_eq!(extract_cdata(xml, "pub_time"), Some("1700000000".into())); + } + + #[test] + fn parse_biz_xml_items_single_article() { + let xml = r#" + <![CDATA[Test Article Title]]> + + + + 1700000000 + "#; + + let items = parse_biz_xml_items(1699999999, "gh_test123", xml); + assert_eq!(items.len(), 1); + assert_eq!(items[0].title, "Test Article Title"); + assert_eq!(items[0].url, "http://mp.weixin.qq.com/s?test=1"); + assert_eq!(items[0].digest, "Test Digest"); + assert_eq!(items[0].pub_time, 1700000000); + assert_eq!(items[0].account_username, "gh_test123"); + } + + #[test] + fn parse_biz_xml_items_skips_no_url() { + let xml = r#" + <![CDATA[Has Title No URL]]> + + 1700000001 + "#; + let items = parse_biz_xml_items(1700000001, "gh_test", xml); + assert_eq!(items.len(), 0); + } + + #[test] + fn parse_biz_xml_items_multi_article() { + let xml = r#" + + <![CDATA[Article 1]]> + + 1700000010 + + + <![CDATA[Article 2]]> + + 1700000020 + + "#; + let items = parse_biz_xml_items(1700000000, "gh_multi", xml); + assert_eq!(items.len(), 2); + assert_eq!(items[0].title, "Article 1"); + assert_eq!(items[1].title, "Article 2"); + } + + #[test] + fn parse_biz_xml_items_pub_time_fallback() { + // When pub_time is missing, should fall back to recv_time + let xml = r#" + <![CDATA[No PubTime]]> + + "#; + let items = parse_biz_xml_items(1700000099, "gh_fallback", xml); + assert_eq!(items.len(), 1); + assert_eq!(items[0].pub_time, 1700000099); // falls back to recv_time + } +} + #[cfg(test)] mod group_nickname_tests { use super::*; diff --git a/src/daemon/server.rs b/src/daemon/server.rs index 4d7fd54..3b06727 100644 --- a/src/daemon/server.rs +++ b/src/daemon/server.rs @@ -234,5 +234,11 @@ async fn dispatch( ReloadConfig => { Response::ok(serde_json::json!({ "reloading": true })) } + BizArticles { limit, account, since, until, unread } => { + match query::q_biz_articles(db, &names_arc, limit, account, since, until, unread).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } } } diff --git a/src/ipc.rs b/src/ipc.rs index 32e0a8f..c478ee4 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -102,6 +102,21 @@ pub enum Request { #[serde(skip_serializing_if = "Option::is_none")] user: Option, }, + /// 查询公众号文章推送(biz_message_0.db) + BizArticles { + #[serde(default = "default_limit_50")] + limit: usize, + /// 公众号名称过滤(模糊匹配 display name,None = 全部) + #[serde(skip_serializing_if = "Option::is_none")] + account: Option, + #[serde(skip_serializing_if = "Option::is_none")] + since: Option, + #[serde(skip_serializing_if = "Option::is_none")] + until: Option, + /// 只看有未读消息的公众号,每个公众号取最新 1 篇 + #[serde(default)] + unread: bool, + }, /// 朋友圈全文搜索(匹配 contentDesc) SnsSearch { keyword: String, From f0f3d3cf22c7723de146884fc90ac2dc09ceaf25 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 16:08:48 +0800 Subject: [PATCH 08/20] feat(favorites): expose article url field (#50) Co-authored-by: Kyrie --- src/daemon/query.rs | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 98574ab..03b764b 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -1444,6 +1444,15 @@ fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { extract_appmsg_url(content) } +fn extract_favorite_url(content: &str) -> Option { + let url = extract_xml_text(content, "link") + .map(|s| unescape_html(strip_xml_cdata(&s)))?; + if url.is_empty() || !(url.starts_with("http://") || url.starts_with("https://")) { + return None; + } + Some(url) +} + fn strip_xml_cdata(s: &str) -> &str { s.strip_prefix("")) @@ -2275,7 +2284,7 @@ pub async fn q_favorites( }; // WeChat 部分版本的 update_time 为毫秒,10位以上判定为毫秒后转秒 let ts_secs = if ts > 9_999_999_999 { ts / 1000 } else { ts }; - json!({ + let mut item = json!({ "id": local_id, "type": type_str, "type_num": ftype, @@ -2284,7 +2293,13 @@ pub async fn q_favorites( "preview": preview, "from": fromusr, "chat": chatname, - }) + }); + if ftype == 5 { + if let Some(url) = extract_favorite_url(&content) { + item["url"] = Value::String(url); + } + } + item }) .collect(); @@ -3624,6 +3639,31 @@ mod sns_tests { assert_eq!(extract_appmsg_url(xml), None); } + #[test] + fn extract_favorite_url_reads_link_tag() { + let xml = concat!( + "", + "5", + "", + "" + ); + assert_eq!( + extract_favorite_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=foo&mid=1") + ); + } + + #[test] + fn extract_favorite_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://favorites/item/1", + "" + ); + assert_eq!(extract_favorite_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") } From d4587b1c68d837d2e7d6be97b7af2514cb059411 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 17:11:27 +0800 Subject: [PATCH 09/20] fix(query): three correctness/latency fixes from deep review (#51) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - q_contacts: replaced ad-hoc `gh_*`/`biz_*` prefix filter with `chat_type_of == "private"`. The old filter leaked groups (`@chatroom`), folded entries (`brandsessionholder` / `@placeholder_foldgroup`), verified service accounts (`verify_flag != 0`), and internal `@xxx` system accounts into `wx contacts` output. - q_search: parallelized the per-message-DB blocking phase via `JoinSet::spawn_blocking`. Previously the `for (db_path, ...) in by_path { ... .await }` loop ran one DB at a time; users with N message_*.db shards paid N× latency. Each DB now runs concurrently on the blocking pool; total latency collapses to a single slow DB. - q_new_messages: fixed `new_state` reset path so first-run + truncated sessions don't lock `since_ts` at `fallback_ts` forever. Old code always wrote `state[uname] = old_since_ts || fallback_ts` for changed sessions, then advanced only those that appeared in `all_msgs`. On first run (state=None) truncated sessions ended up with `state[uname] = now-86400` and stayed there across calls — every subsequent call re-scanned a window that grew with elapsed time. New logic separates three cases: * in_results → advance to returned_max (incremental fetch) * truncated + state → keep prev since_ts (retry next call) * truncated + none → advance to session_ts (avoid lock-in; old messages remain reachable via `wx history`). --- src/daemon/query.rs | 87 ++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 03b764b..167d88a 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -335,17 +335,22 @@ pub async fn q_search( .unwrap_or_default(); let group_nicknames_by_chat = Arc::new(group_nicknames_by_chat); - let mut results: Vec = Vec::new(); + // 多个 message_*.db 之间没有数据依赖,并发解密 + 查询。每个 DB 内部仍按 + // table 串行(共享同一 sqlite Connection 不能跨线程移动)。原版本是 N 个 DB + // 串行 await,活跃账号上 N 个分片要轮 N 次磁盘 IO;现在 JoinSet 把它们一次 + // 全部 dispatch 到 blocking pool,整体 latency 退化为单 DB 慢路径。 let kw = keyword.to_string(); + let mut join_set: tokio::task::JoinSet>> = tokio::task::JoinSet::new(); for (db_path, table_list) in by_path { let kw2 = kw.clone(); let since2 = since; let until2 = until; let limit2 = limit * 3; - let names_map2 = names.map.clone(); let group_nicknames_by_chat2 = Arc::clone(&group_nicknames_by_chat); - let found: Vec = match tokio::task::spawn_blocking(move || { + let db_path_for_log = db_path.clone(); + + join_set.spawn_blocking(move || { let conn = Connection::open(&db_path)?; let mut all = Vec::new(); let empty_group_nicknames = HashMap::new(); @@ -369,17 +374,20 @@ pub async fn q_search( all.push(row); } } - Err(e) => eprintln!("[search] skip table {}: {}", tname, e), + Err(e) => eprintln!("[search] skip table {} (db={}): {}", tname, db_path_for_log, e), } } - Ok::<_, anyhow::Error>(all) - }).await { - Ok(Ok(v)) => v, - Ok(Err(e)) => { eprintln!("[search] skip DB: {}", e); continue; } - Err(e) => { eprintln!("[search] task error: {}", e); continue; } - }; + Ok(all) + }); + } - results.extend(found); + let mut results: Vec = Vec::new(); + while let Some(joined) = join_set.join_next().await { + match joined { + Ok(Ok(rows)) => results.extend(rows), + Ok(Err(e)) => eprintln!("[search] skip DB: {}", e), + Err(e) => eprintln!("[search] task error: {}", e), + } } results.sort_by_key(|r| std::cmp::Reverse(r["timestamp"].as_i64().unwrap_or(0))); @@ -388,9 +396,14 @@ pub async fn q_search( } /// 查询联系人 +/// +/// 只返回真实联系人(`chat_type_of == "private"`)。`names.map` 是从 `contact` 表 +/// 全量加载的,里面同时包含群(`@chatroom`)、公众号(`gh_*` / `biz_*` / verify_flag != 0)、 +/// 折叠入口(`brandsessionholder` / `@placeholder_foldgroup`)以及微信内部 `@xxx` 系统账号。 +/// 这些都不应该出现在 `wx contacts` 输出里,统一走 `chat_type_of` 这条同样的真相判定。 pub async fn q_contacts(names: &Names, query: Option<&str>, limit: usize) -> Result { let mut contacts: Vec = names.map.iter() - .filter(|(u, _)| !u.starts_with("gh_") && !u.starts_with("biz_")) + .filter(|(u, _)| chat_type_of(u, names) == "private") .map(|(u, d)| json!({ "username": u, "display": d })) .collect(); @@ -2184,24 +2197,40 @@ pub async fn q_new_messages( all_msgs.truncate(limit); // 5. 重建 new_state,防止全局 limit 截断导致消息永久丢失: - // - 未变化的会话:沿用 session.db 的 last_timestamp - // - 变化但全被截断(无消息在最终结果中):保留旧 since_ts,下次重试 - // - 变化且有消息返回:推进到该会话在结果中的最大 timestamp - let mut new_state = session_ts_map; - // 先把 changed 会话重置回旧 since_ts - for (uname, _) in &changed { - let old_ts = state.as_ref() - .and_then(|m| m.get(uname)) - .copied() - .unwrap_or(fallback_ts); - new_state.insert(uname.clone(), old_ts); - } - // 再根据实际返回的消息向前推进 - for m in &all_msgs { - if let (Some(uname), Some(ts)) = (m["username"].as_str(), m["timestamp"].as_i64()) { - let e = new_state.entry(uname.to_string()).or_insert(0); - if ts > *e { *e = ts; } + // - 未变化的会话:沿用 session.db 的 last_timestamp(即 session_ts_map) + // - 变化但全被截断(无消息在最终结果中): + // * 后续调用 (state=Some):保留旧 since_ts,下次重试拿这部分消息 + // * 首次调用 (state=None):advance 到 session_ts,避免 since_ts 锁死在 + // fallback_ts 导致后续每次都回扫 24h。窗口会随调用次数 + 时间累积扩大, + // 性能持续衰退。代价:首次 + 被截断会话的老消息看不到,需走 `wx history`。 + // - 变化且有消息返回:advance 到该会话在结果中的最大 timestamp(增量 fetch 标准语义) + let returned_max_ts: HashMap = { + let mut m: HashMap = HashMap::new(); + for msg in &all_msgs { + if let (Some(u), Some(ts)) = (msg["username"].as_str(), msg["timestamp"].as_i64()) { + let e = m.entry(u.to_string()).or_insert(0); + if ts > *e { *e = ts; } + } } + m + }; + let mut new_state = session_ts_map; + for (uname, _) in &changed { + let in_results = returned_max_ts.contains_key(uname); + let prev = state.as_ref().and_then(|m| m.get(uname)).copied(); + let next_ts = match (in_results, prev) { + (true, _) => { + // 有消息返回:advance 到 returned_max;返回的最大 ts 通常 ≤ session_ts, + // 这样下次查 `since > returned_max` 仍能拿到 returned_max..session_ts 的截断尾巴。 + returned_max_ts[uname] + } + (false, Some(prev)) => prev, // 后续 + 截断:保持旧 since + (false, None) => { + // 首次 + 截断:advance 到 session_ts 兜底,避免 since_ts 锁死。 + new_state.get(uname).copied().unwrap_or(fallback_ts) + } + }; + new_state.insert(uname.clone(), next_ts); } Ok(json!({ From 70aa3a44e332f38d8381e68cbb2d1592c18efa66 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 17:11:42 +0800 Subject: [PATCH 10/20] fix(daemon,scanner,crypto): harden lifecycle, widen Windows page scan, fix SQLCipher short read (#54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - daemon: write pid file only after IPC bound; clean sock+pid on normal return - transport: PidFile JSON metadata + identity verification (ps/QueryFullProcessImageNameW); SIGTERM with poll-timeout; backward-compat read for plain-text pid - daemon_cmd: status/stop work with both new JSON and legacy plain-text pid file - config: cwd → exe_dir → ~/.wx-cli config precedence matches `wx init` write order; Windows DB auto-detect picks newest by latest mtime - crypto: full_decrypt uses read_exact for intermediate pages, zero-pads only the final partial page; tests cover short-chunk reads and early EOF - scanner/windows: page protect check covers PAGE_READWRITE / PAGE_WRITECOPY / PAGE_EXECUTE_*WRITE* with modifier-bit stripping Cross-reviewed by @wx-cli-coder. Windows verified via `cargo check --target x86_64-pc-windows-gnu` (no Windows runtime test). --- src/cli/daemon_cmd.rs | 61 +++---- src/cli/transport.rs | 349 ++++++++++++++++++++++++++++++++++------- src/config.rs | 198 ++++++++++++++++------- src/crypto/mod.rs | 109 +++++++++++-- src/daemon/mod.rs | 31 ++-- src/scanner/windows.rs | 59 +++---- 6 files changed, 601 insertions(+), 206 deletions(-) diff --git a/src/cli/daemon_cmd.rs b/src/cli/daemon_cmd.rs index 31b0792..ded6827 100644 --- a/src/cli/daemon_cmd.rs +++ b/src/cli/daemon_cmd.rs @@ -1,7 +1,7 @@ -use anyhow::Result; -use crate::config; -use crate::cli::DaemonCommands; use crate::cli::transport; +use crate::cli::DaemonCommands; +use crate::config; +use anyhow::Result; pub fn cmd_daemon(cmd: DaemonCommands) -> Result<()> { match cmd { @@ -15,7 +15,13 @@ fn cmd_status() -> Result<()> { if transport::is_alive() { let pid_path = config::pid_path(); let pid = std::fs::read_to_string(&pid_path) - .map(|s| s.trim().to_string()) + .map(|s| { + serde_json::from_str::(&s) + .ok() + .and_then(|v| v.get("pid").and_then(|p| p.as_u64())) + .map(|pid| pid.to_string()) + .unwrap_or_else(|| s.trim().to_string()) + }) .unwrap_or_else(|_| "?".into()); println!("wx-daemon 运行中 (PID {})", pid); } else { @@ -25,42 +31,13 @@ fn cmd_status() -> Result<()> { } fn cmd_stop() -> Result<()> { - let pid_path = config::pid_path(); - if !pid_path.exists() { + if !transport::is_alive() { println!("daemon 未运行"); return Ok(()); } - let pid_str = std::fs::read_to_string(&pid_path)?; - let pid: u32 = pid_str.trim().parse() - .map_err(|_| anyhow::anyhow!("PID 文件格式错误"))?; - - #[cfg(unix)] - { - let ret = unsafe { libc::kill(pid as libc::pid_t, libc::SIGTERM) }; - if ret != 0 { - let errno = std::io::Error::last_os_error().raw_os_error().unwrap_or(0); - if errno == libc::ESRCH { - println!("wx-daemon (PID {}) 已不在运行,清理残留文件", pid); - } else { - anyhow::bail!("发送 SIGTERM 失败 (errno {})", errno); - } - } else { - println!("已停止 wx-daemon (PID {})", pid); - } - } - - #[cfg(windows)] - { - std::process::Command::new("taskkill") - .args(["/PID", &pid.to_string(), "/F"]) - .output()?; - println!("已停止 wx-daemon (PID {})", pid); - } - - let _ = std::fs::remove_file(config::sock_path()); - let _ = std::fs::remove_file(&pid_path); - + transport::stop_daemon()?; + println!("已停止 wx-daemon"); Ok(()) } @@ -89,19 +66,25 @@ fn cmd_logs(follow: bool, lines: usize) -> Result<()> { file.read_to_string(&mut content)?; let all_lines: Vec<&str> = content.lines().collect(); let show = &all_lines[all_lines.len().saturating_sub(lines)..]; - for line in show { println!("{}", line); } + for line in show { + println!("{}", line); + } loop { std::thread::sleep(std::time::Duration::from_millis(500)); let mut buf = String::new(); file.read_to_string(&mut buf)?; - if !buf.is_empty() { print!("{}", buf); } + if !buf.is_empty() { + print!("{}", buf); + } } } } else { let content = std::fs::read_to_string(&log_path)?; let all_lines: Vec<&str> = content.lines().collect(); let show = &all_lines[all_lines.len().saturating_sub(lines)..]; - for line in show { println!("{}", line); } + for line in show { + println!("{}", line); + } } Ok(()) diff --git a/src/cli/transport.rs b/src/cli/transport.rs index 73c2f88..23c3e18 100644 --- a/src/cli/transport.rs +++ b/src/cli/transport.rs @@ -1,50 +1,32 @@ use anyhow::{bail, Context, Result}; +use serde::{Deserialize, Serialize}; use std::io::{BufRead, BufReader, Write}; +use std::path::{Path, PathBuf}; use std::time::Duration; use crate::config; use crate::ipc::{Request, Response}; const STARTUP_TIMEOUT_SECS: u64 = 15; +#[cfg(unix)] +const STOP_TIMEOUT_MS: u64 = 2_000; + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct PidFile { + pid: u32, + #[serde(default)] + exe: Option, +} /// 检查 daemon 是否存活 pub fn is_alive() -> bool { #[cfg(unix)] { - use std::os::unix::net::UnixStream; - let sock_path = config::sock_path(); - if !sock_path.exists() { - return false; - } - let mut stream = match UnixStream::connect(&sock_path) { - Ok(s) => s, - Err(_) => return false, - }; - stream.set_read_timeout(Some(Duration::from_secs(2))).ok(); - stream.set_write_timeout(Some(Duration::from_secs(2))).ok(); - - let req = serde_json::json!({"cmd": "ping"}); - if write!(stream, "{}\n", req).is_err() { - return false; - } - let mut line = String::new(); - let mut reader = BufReader::new(&stream); - if reader.read_line(&mut line).is_err() { - return false; - } - serde_json::from_str::(&line) - .ok() - .and_then(|v| v.get("pong").and_then(|p| p.as_bool())) - .unwrap_or(false) + ping_unix().unwrap_or(false) } #[cfg(windows)] { - use interprocess::local_socket::{prelude::*, GenericNamespaced, Stream}; - // 必须用 interprocess 自己的连接 API,和 server 保持一致 - match "wx-cli-daemon".to_ns_name::() { - Ok(name) => Stream::connect(name).is_ok(), - Err(_) => false, - } + ping_windows().unwrap_or(false) } #[cfg(not(any(unix, windows)))] { @@ -65,25 +47,33 @@ pub fn ensure_daemon() -> Result<()> { /// 停止 daemon(如果正在运行) pub fn stop_daemon() -> Result<()> { let pid_path = config::pid_path(); - if let Ok(pid_str) = std::fs::read_to_string(&pid_path) { - if let Ok(pid) = pid_str.trim().parse::() { - #[cfg(unix)] - { - let _ = std::process::Command::new("kill") - .arg("-TERM") - .arg(pid.to_string()) - .spawn(); + let pid_file = read_pid_file(&pid_path)?; + let daemon_alive = is_alive(); + + match pid_file { + Some(pid_file) => { + let belongs = pid_belongs_to_daemon(&pid_file)?; + if daemon_alive && !belongs { + bail!( + "daemon 正在运行,但 {} 指向的 PID {} 无法确认属于当前 wx-daemon", + pid_path.display(), + pid_file.pid + ); } - #[cfg(windows)] - { - let _ = std::process::Command::new("taskkill") - .args(["/F", "/PID", &pid.to_string()]) - .spawn(); + if belongs { + terminate_pid(pid_file.pid)?; } } + None if daemon_alive => { + bail!( + "daemon 正在运行,但 {} 缺失或损坏,无法安全停止", + pid_path.display() + ); + } + None => {} } - let _ = std::fs::remove_file(config::sock_path()); - let _ = std::fs::remove_file(&pid_path); + + cleanup_ipc_files(); Ok(()) } @@ -123,6 +113,7 @@ fn preflight_cli_dir_writable() -> Result<()> { /// 启动 daemon 进程(自身二进制,设置 WX_DAEMON_MODE=1) fn start_daemon() -> Result<()> { let exe = std::env::current_exe().context("无法获取当前可执行文件路径")?; + let child_pid: u32; // 预检:当前用户是否能写 ~/.wx-cli/。如果不能,给出可操作的错误信息, // 而不是 spawn 一个注定失败的 daemon 然后超时 15s。 @@ -138,7 +129,8 @@ fn start_daemon() -> Result<()> { let _ = std::fs::create_dir_all(parent); } let (stdout_stdio, stderr_stdio) = std::fs::OpenOptions::new() - .create(true).append(true) + .create(true) + .append(true) .open(&log_path) .and_then(|f| f.try_clone().map(|g| (f, g))) .map(|(f, g)| (std::process::Stdio::from(f), std::process::Stdio::from(g))) @@ -149,8 +141,14 @@ fn start_daemon() -> Result<()> { .stdout(stdout_stdio) .stderr(stderr_stdio); // SAFETY: setsid() 在 fork 后的子进程中调用,使 daemon 脱离控制终端 - unsafe { cmd.pre_exec(|| { libc::setsid(); Ok(()) }); } - let _ = cmd.spawn().context("无法启动 daemon 进程")?; + unsafe { + cmd.pre_exec(|| { + libc::setsid(); + Ok(()) + }); + } + let child = cmd.spawn().context("无法启动 daemon 进程")?; + child_pid = child.id(); } #[cfg(windows)] @@ -161,12 +159,13 @@ fn start_daemon() -> Result<()> { let _ = std::fs::create_dir_all(parent); } let (stdout_stdio, stderr_stdio) = std::fs::OpenOptions::new() - .create(true).append(true) + .create(true) + .append(true) .open(&log_path) .and_then(|f| f.try_clone().map(|g| (f, g))) .map(|(f, g)| (std::process::Stdio::from(f), std::process::Stdio::from(g))) .unwrap_or_else(|_| (std::process::Stdio::null(), std::process::Stdio::null())); - let _ = std::process::Command::new(&exe) + let child = std::process::Command::new(&exe) .env("WX_DAEMON_MODE", "1") .stdin(std::process::Stdio::null()) .stdout(stdout_stdio) @@ -174,6 +173,7 @@ fn start_daemon() -> Result<()> { .creation_flags(0x00000008) // DETACHED_PROCESS .spawn() .context("无法启动 daemon 进程")?; + child_pid = child.id(); } // 等待 daemon 就绪(最多 STARTUP_TIMEOUT_SECS 秒) @@ -181,6 +181,7 @@ fn start_daemon() -> Result<()> { while std::time::Instant::now() < deadline { std::thread::sleep(Duration::from_millis(300)); if is_alive() { + write_pid_file(child_pid, &exe)?; return Ok(()); } } @@ -192,6 +193,233 @@ fn start_daemon() -> Result<()> { ) } +fn write_pid_file(pid: u32, exe: &Path) -> Result<()> { + if let Some(parent) = config::pid_path().parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("创建 {} 失败", parent.display()))?; + } + let pid_file = PidFile { + pid, + exe: Some(exe.to_path_buf()), + }; + let content = serde_json::to_string(&pid_file)?; + std::fs::write(config::pid_path(), content) + .with_context(|| format!("写入 {} 失败", config::pid_path().display()))?; + Ok(()) +} + +fn read_pid_file(path: &Path) -> Result> { + let content = match std::fs::read_to_string(path) { + Ok(content) => content, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err).with_context(|| format!("读取 {} 失败", path.display())), + }; + if let Ok(pid_file) = serde_json::from_str::(&content) { + return Ok(Some(pid_file)); + } + if let Ok(pid) = content.trim().parse::() { + return Ok(Some(PidFile { + pid, + exe: std::env::current_exe().ok(), + })); + } + bail!("{} 不是合法的 PID 文件", path.display()) +} + +fn cleanup_ipc_files() { + let _ = std::fs::remove_file(config::sock_path()); + let _ = std::fs::remove_file(config::pid_path()); +} + +#[cfg(unix)] +fn ping_unix() -> Result { + use std::os::unix::net::UnixStream; + let sock_path = config::sock_path(); + if !sock_path.exists() { + return Ok(false); + } + let mut stream = UnixStream::connect(&sock_path)?; + stream.set_read_timeout(Some(Duration::from_secs(2))).ok(); + stream.set_write_timeout(Some(Duration::from_secs(2))).ok(); + + let req = serde_json::to_string(&Request::Ping)? + "\n"; + stream.write_all(req.as_bytes())?; + + let mut line = String::new(); + let mut reader = BufReader::new(&stream); + reader.read_line(&mut line)?; + + let resp: Response = serde_json::from_str(&line)?; + Ok(resp.ok && resp.data.get("pong").and_then(|p| p.as_bool()) == Some(true)) +} + +#[cfg(windows)] +fn ping_windows() -> Result { + use interprocess::local_socket::{prelude::*, GenericNamespaced, Stream}; + + let name = "wx-cli-daemon".to_ns_name::()?; + let stream = Stream::connect(name)?; + let mut reader = BufReader::new(stream); + + let req = serde_json::to_string(&Request::Ping)? + "\n"; + reader.get_mut().write_all(req.as_bytes())?; + + let mut line = String::new(); + reader.read_line(&mut line)?; + + let resp: Response = serde_json::from_str(&line)?; + Ok(resp.ok && resp.data.get("pong").and_then(|p| p.as_bool()) == Some(true)) +} + +fn pid_belongs_to_daemon(pid_file: &PidFile) -> Result { + let expected_exe = pid_file + .exe + .clone() + .or_else(|| std::env::current_exe().ok()); + #[cfg(unix)] + { + unix_pid_matches_daemon(pid_file.pid, expected_exe.as_deref()) + } + #[cfg(windows)] + { + windows_pid_matches_daemon(pid_file.pid, expected_exe.as_deref()) + } + #[cfg(not(any(unix, windows)))] + { + let _ = expected_exe; + Ok(true) + } +} + +#[cfg(unix)] +fn unix_pid_matches_daemon(pid: u32, expected_exe: Option<&Path>) -> Result { + let Some(expected_exe) = expected_exe else { + return Ok(false); + }; + let output = std::process::Command::new("ps") + .args(["-o", "command=", "-p", &pid.to_string()]) + .output() + .with_context(|| format!("读取 PID {} 的 command 失败", pid))?; + if !output.status.success() { + return Ok(false); + } + let command = String::from_utf8_lossy(&output.stdout); + let expected = expected_exe.to_string_lossy(); + if command.contains(expected.as_ref()) { + return Ok(true); + } + let Some(exe_name) = expected_exe.file_name().and_then(|name| name.to_str()) else { + return Ok(false); + }; + Ok(command + .split_whitespace() + .any(|part| part == exe_name || part.ends_with(&format!("/{}", exe_name)))) +} + +#[cfg(windows)] +fn windows_pid_matches_daemon(pid: u32, expected_exe: Option<&Path>) -> Result { + use windows::core::PWSTR; + use windows::Win32::Foundation::CloseHandle; + use windows::Win32::System::Threading::{ + OpenProcess, QueryFullProcessImageNameW, PROCESS_NAME_FORMAT, + PROCESS_QUERY_LIMITED_INFORMATION, + }; + + let Some(expected_exe) = expected_exe else { + return Ok(false); + }; + let handle = match unsafe { OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, false, pid) } { + Ok(handle) => handle, + Err(_) => return Ok(false), + }; + + let mut buf = vec![0u16; 260]; + let mut len = buf.len() as u32; + let actual = unsafe { + let result = QueryFullProcessImageNameW( + handle, + PROCESS_NAME_FORMAT(0), + PWSTR(buf.as_mut_ptr()), + &mut len, + ); + let _ = CloseHandle(handle); + result + }; + if actual.is_err() { + return Ok(false); + } + + let actual_path = PathBuf::from(String::from_utf16_lossy(&buf[..len as usize])); + Ok(normalize_exe_path(&actual_path) == normalize_exe_path(expected_exe)) +} + +#[cfg(windows)] +fn normalize_exe_path(path: &Path) -> String { + path.to_string_lossy() + .replace('\\', "/") + .to_ascii_lowercase() +} + +fn terminate_pid(pid: u32) -> Result<()> { + #[cfg(unix)] + { + terminate_pid_unix(pid) + } + #[cfg(windows)] + { + terminate_pid_windows(pid) + } + #[cfg(not(any(unix, windows)))] + { + let _ = pid; + Ok(()) + } +} + +#[cfg(unix)] +fn terminate_pid_unix(pid: u32) -> Result<()> { + let rc = unsafe { libc::kill(pid as i32, libc::SIGTERM) }; + if rc != 0 { + let err = std::io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::ESRCH) { + return Ok(()); + } + bail!("停止 PID {} 失败: {}", pid, err); + } + + let deadline = std::time::Instant::now() + Duration::from_millis(STOP_TIMEOUT_MS); + while std::time::Instant::now() < deadline { + if !unix_process_exists(pid) { + return Ok(()); + } + std::thread::sleep(Duration::from_millis(50)); + } + + bail!("等待 PID {} 退出超时", pid) +} + +#[cfg(unix)] +fn unix_process_exists(pid: u32) -> bool { + let rc = unsafe { libc::kill(pid as i32, 0) }; + if rc == 0 { + return true; + } + let err = std::io::Error::last_os_error(); + err.raw_os_error() == Some(libc::EPERM) +} + +#[cfg(windows)] +fn terminate_pid_windows(pid: u32) -> Result<()> { + let status = std::process::Command::new("taskkill") + .args(["/F", "/PID", &pid.to_string()]) + .status() + .with_context(|| format!("执行 taskkill /PID {} 失败", pid))?; + if !status.success() { + bail!("停止 PID {} 失败: taskkill exit {:?}", pid, status.code()); + } + Ok(()) +} + /// 向 daemon 发送请求并返回响应 pub fn send(req: Request) -> Result { ensure_daemon()?; @@ -214,10 +442,11 @@ pub fn send(req: Request) -> Result { fn send_unix(req: Request) -> Result { use std::os::unix::net::UnixStream; let sock_path = config::sock_path(); - let mut stream = UnixStream::connect(&sock_path) - .context("连接 daemon socket 失败")?; + let mut stream = UnixStream::connect(&sock_path).context("连接 daemon socket 失败")?; stream.set_read_timeout(Some(Duration::from_secs(120))).ok(); - stream.set_write_timeout(Some(Duration::from_secs(120))).ok(); + stream + .set_write_timeout(Some(Duration::from_secs(120))) + .ok(); let req_str = serde_json::to_string(&req)? + "\n"; stream.write_all(req_str.as_bytes())?; @@ -226,8 +455,7 @@ fn send_unix(req: Request) -> Result { let mut reader = BufReader::new(&stream); reader.read_line(&mut line)?; - let resp: Response = serde_json::from_str(&line) - .context("解析 daemon 响应失败")?; + let resp: Response = serde_json::from_str(&line).context("解析 daemon 响应失败")?; if !resp.ok { bail!("{}", resp.error.as_deref().unwrap_or("未知错误")); @@ -240,10 +468,10 @@ fn send_unix(req: Request) -> Result { fn send_windows(req: Request) -> Result { use interprocess::local_socket::{prelude::*, GenericNamespaced, Stream}; - let name = "wx-cli-daemon".to_ns_name::() + let name = "wx-cli-daemon" + .to_ns_name::() .context("构造 pipe name 失败")?; - let stream = Stream::connect(name) - .context("连接 daemon named pipe 失败")?; + let stream = Stream::connect(name).context("连接 daemon named pipe 失败")?; // interprocess::Stream 同时实现 Read + Write,但需要拆分读写端 let mut reader = BufReader::new(stream); @@ -254,8 +482,7 @@ fn send_windows(req: Request) -> Result { let mut line = String::new(); reader.read_line(&mut line)?; - let resp: Response = serde_json::from_str(&line) - .context("解析 daemon 响应失败")?; + let resp: Response = serde_json::from_str(&line).context("解析 daemon 响应失败")?; if !resp.ok { bail!("{}", resp.error.as_deref().unwrap_or("未知错误")); diff --git a/src/config.rs b/src/config.rs index a488ca0..f74fda3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -11,38 +11,50 @@ pub struct Config { pub wechat_process: String, } -/// 从 /config.json 或 $HOME/.wx-cli/config.json 加载配置 +/// 从当前工作目录 / / $HOME/.wx-cli 加载配置 pub fn load_config() -> Result { let config_path = find_config_file()?; let content = std::fs::read_to_string(&config_path) .with_context(|| format!("读取 config.json 失败: {}", config_path.display()))?; - let raw: serde_json::Value = serde_json::from_str(&content) - .with_context(|| "config.json 格式错误")?; + let raw: serde_json::Value = + serde_json::from_str(&content).with_context(|| "config.json 格式错误")?; - let db_dir = raw.get("db_dir") + let db_dir = raw + .get("db_dir") .and_then(|v| v.as_str()) .map(PathBuf::from) .unwrap_or_else(default_db_dir); let base_dir = config_path.parent().unwrap_or(Path::new(".")); - let keys_file = raw.get("keys_file") + let keys_file = raw + .get("keys_file") .and_then(|v| v.as_str()) .map(|s| { let p = PathBuf::from(s); - if p.is_absolute() { p } else { base_dir.join(p) } + if p.is_absolute() { + p + } else { + base_dir.join(p) + } }) .unwrap_or_else(|| base_dir.join("all_keys.json")); - let decrypted_dir = raw.get("decrypted_dir") + let decrypted_dir = raw + .get("decrypted_dir") .and_then(|v| v.as_str()) .map(|s| { let p = PathBuf::from(s); - if p.is_absolute() { p } else { base_dir.join(p) } + if p.is_absolute() { + p + } else { + base_dir.join(p) + } }) .unwrap_or_else(|| base_dir.join("decrypted")); - let wechat_process = raw.get("wechat_process") + let wechat_process = raw + .get("wechat_process") .and_then(|v| v.as_str()) .unwrap_or(default_wechat_process()) .to_string(); @@ -56,35 +68,56 @@ pub fn load_config() -> Result { } fn find_config_file() -> Result { - // 1. 优先查找可执行文件同目录 - if let Ok(exe) = std::env::current_exe() { - if let Some(dir) = exe.parent() { - let p = dir.join("config.json"); - if p.exists() { - return Ok(p); - } - } + let cwd_dir = std::env::current_dir().ok(); + let exe_dir = std::env::current_exe() + .ok() + .and_then(|exe| exe.parent().map(PathBuf::from)); + let cli_home = cli_home_dir(); + let home_dir = (cli_home != PathBuf::from("/tmp")).then_some(cli_home.as_path()); + + if let Some(path) = find_existing_config_path(cwd_dir.as_deref(), exe_dir.as_deref(), home_dir) + { + return Ok(path); } - // 2. 当前工作目录 - let cwd = std::env::current_dir().unwrap_or_default().join("config.json"); - if cwd.exists() { - return Ok(cwd); - } - // 3. ~/.wx-cli/config.json - let home = cli_home_dir(); - if home != PathBuf::from("/tmp") { - let p = home.join(".wx-cli").join("config.json"); - if p.exists() { - return Ok(p); - } - } - // 返回默认路径(可能不存在,调用方负责处理) - if let Ok(exe) = std::env::current_exe() { - if let Some(dir) = exe.parent() { - return Ok(dir.join("config.json")); - } - } - Ok(PathBuf::from("config.json")) + + Ok(default_config_path( + cwd_dir.as_deref(), + exe_dir.as_deref(), + home_dir, + )) +} + +fn find_existing_config_path( + cwd_dir: Option<&Path>, + exe_dir: Option<&Path>, + home_dir: Option<&Path>, +) -> Option { + let candidates = [ + cwd_dir.map(config_path_in_dir), + exe_dir.map(config_path_in_dir), + home_dir.map(home_config_path), + ]; + candidates.into_iter().flatten().find(|path| path.exists()) +} + +fn default_config_path( + cwd_dir: Option<&Path>, + exe_dir: Option<&Path>, + home_dir: Option<&Path>, +) -> PathBuf { + cwd_dir + .map(config_path_in_dir) + .or_else(|| exe_dir.map(config_path_in_dir)) + .or_else(|| home_dir.map(home_config_path)) + .unwrap_or_else(|| PathBuf::from("config.json")) +} + +fn config_path_in_dir(dir: &Path) -> PathBuf { + dir.join("config.json") +} + +fn home_config_path(home_dir: &Path) -> PathBuf { + home_dir.join(".wx-cli").join("config.json") } pub fn cli_dir() -> PathBuf { @@ -163,8 +196,7 @@ fn default_db_dir() -> PathBuf { } #[cfg(target_os = "windows")] { - PathBuf::from(std::env::var("APPDATA").unwrap_or_default()) - .join("Tencent/xwechat") + PathBuf::from(std::env::var("APPDATA").unwrap_or_default()).join("Tencent/xwechat") } #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] { @@ -174,13 +206,21 @@ fn default_db_dir() -> PathBuf { fn default_wechat_process() -> &'static str { #[cfg(target_os = "macos")] - { "WeChat" } + { + "WeChat" + } #[cfg(target_os = "linux")] - { "wechat" } + { + "wechat" + } #[cfg(target_os = "windows")] - { "Weixin.exe" } + { + "Weixin.exe" + } #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] - { "WeChat" } + { + "WeChat" + } } /// 自动检测微信 db_storage 目录 @@ -244,6 +284,7 @@ fn detect_db_dir_impl() -> Option { candidates.into_iter().next_back() } +#[cfg(any(target_os = "linux", target_os = "windows"))] /// 递归查找 db_storage 目录下所有 .db 文件的最新 mtime fn latest_db_mtime(dir: &Path) -> Option { let mut latest = None; @@ -253,7 +294,10 @@ fn latest_db_mtime(dir: &Path) -> Option { let mtime = if path.is_dir() { latest_db_mtime(&path).unwrap_or(std::time::SystemTime::UNIX_EPOCH) } else if path.extension().and_then(|s| s.to_str()) == Some("db") { - entry.metadata().and_then(|m| m.modified()).unwrap_or(std::time::SystemTime::UNIX_EPOCH) + entry + .metadata() + .and_then(|m| m.modified()) + .unwrap_or(std::time::SystemTime::UNIX_EPOCH) } else { continue; }; @@ -278,8 +322,7 @@ fn detect_db_dir_impl() -> Option { if let Ok(content) = std::fs::read_to_string(&path) { let data_root = content.trim().to_string(); if PathBuf::from(&data_root).is_dir() { - let pattern = PathBuf::from(&data_root) - .join("xwechat_files"); + let pattern = PathBuf::from(&data_root).join("xwechat_files"); if let Ok(entries2) = std::fs::read_dir(&pattern) { for entry2 in entries2.flatten() { let storage = entry2.path().join("db_storage"); @@ -293,7 +336,8 @@ fn detect_db_dir_impl() -> Option { } } } - candidates.into_iter().next() + candidates.sort_by_key(|p| latest_db_mtime(p).unwrap_or(std::time::SystemTime::UNIX_EPOCH)); + candidates.into_iter().next_back() } #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] @@ -303,24 +347,66 @@ fn detect_db_dir_impl() -> Option { #[cfg(test)] mod tests { - use super::resolve_cli_home; + use super::{ + config_path_in_dir, default_config_path, find_existing_config_path, home_config_path, + resolve_cli_home, + }; + use std::fs; use std::path::PathBuf; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = format!( + "wx-cli-config-test-{}-{}-{}", + name, + std::process::id(), + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() + ); + let dir = std::env::temp_dir().join(unique); + fs::create_dir_all(&dir).unwrap(); + dir + } #[test] fn resolve_cli_home_prefers_sudo_home_when_present() { - let home = resolve_cli_home( - PathBuf::from("/root"), - Some(PathBuf::from("/Users/alice")), - ); + let home = resolve_cli_home(PathBuf::from("/root"), Some(PathBuf::from("/Users/alice"))); assert_eq!(home, PathBuf::from("/Users/alice")); } #[test] fn resolve_cli_home_falls_back_to_default_home() { - let home = resolve_cli_home( - PathBuf::from("/root"), - None, - ); + let home = resolve_cli_home(PathBuf::from("/root"), None); assert_eq!(home, PathBuf::from("/root")); } + + #[test] + fn config_path_prefers_cwd_over_exe_and_home() { + let cwd = temp_dir("cwd"); + let exe = temp_dir("exe"); + let home = temp_dir("home"); + fs::write(config_path_in_dir(&cwd), "{}").unwrap(); + fs::write(config_path_in_dir(&exe), "{}").unwrap(); + fs::create_dir_all(home.join(".wx-cli")).unwrap(); + fs::write(home_config_path(&home), "{}").unwrap(); + + let path = find_existing_config_path(Some(&cwd), Some(&exe), Some(&home)).unwrap(); + assert_eq!(path, config_path_in_dir(&cwd)); + + fs::remove_dir_all(cwd).unwrap(); + fs::remove_dir_all(exe).unwrap(); + fs::remove_dir_all(home).unwrap(); + } + + #[test] + fn default_config_path_matches_init_write_order() { + let cwd = PathBuf::from("/tmp/cwd"); + let exe = PathBuf::from("/tmp/exe"); + let home = PathBuf::from("/tmp/home"); + + let path = default_config_path(Some(&cwd), Some(&exe), Some(&home)); + assert_eq!(path, cwd.join("config.json")); + } } diff --git a/src/crypto/mod.rs b/src/crypto/mod.rs index e5407b5..da074e7 100644 --- a/src/crypto/mod.rs +++ b/src/crypto/mod.rs @@ -1,9 +1,9 @@ pub mod wal; -use anyhow::{bail, Result}; use aes::Aes256; -use cbc::Decryptor; +use anyhow::{bail, Result}; use cbc::cipher::{BlockDecryptMut, KeyIvInit}; +use cbc::Decryptor; use std::io::{Read, Write}; use std::path::Path; @@ -65,11 +65,8 @@ fn aes_cbc_decrypt(key: &[u8; 32], iv: &[u8; 16], data: &[u8]) -> Result bail!("密文长度不是 AES 块大小的倍数: {}", data.len()); } // 将 &[u8] 复制为 Block 数组,避免 unsafe from_raw_parts_mut - let mut blocks: Vec = data.chunks_exact(16) - .map(Block::clone_from_slice) - .collect(); - Aes256CbcDec::new(key.into(), iv.into()) - .decrypt_blocks_mut(&mut blocks); + let mut blocks: Vec = data.chunks_exact(16).map(Block::clone_from_slice).collect(); + Aes256CbcDec::new(key.into(), iv.into()).decrypt_blocks_mut(&mut blocks); Ok(blocks.iter().flat_map(|b| b.iter().copied()).collect()) } @@ -92,15 +89,101 @@ pub fn full_decrypt(db_path: &Path, out_path: &Path, enc_key: &[u8; 32]) -> Resu let mut page_buf = vec![0u8; PAGE_SZ]; for pgno in 1..=total_pages { - let n = input.read(&mut page_buf)?; - if n == 0 { break; } - // 不足一页则补零 - if n < PAGE_SZ { - page_buf[n..].fill(0); - } + let page_start = (pgno - 1) * PAGE_SZ; + let bytes_remaining = file_size.saturating_sub(page_start); + read_page(&mut input, &mut page_buf, bytes_remaining)?; let dec = decrypt_page(enc_key, &page_buf, pgno as u32)?; output.write_all(&dec)?; } Ok(()) } + +fn read_page( + input: &mut impl Read, + page_buf: &mut [u8], + bytes_remaining: usize, +) -> std::io::Result { + let expected = bytes_remaining.min(PAGE_SZ); + input.read_exact(&mut page_buf[..expected])?; + if expected < PAGE_SZ { + page_buf[expected..].fill(0); + } + Ok(expected) +} + +#[cfg(test)] +mod tests { + use super::{read_page, PAGE_SZ}; + use std::io::{self, Read}; + + struct ChunkedReader { + chunks: Vec>, + chunk_idx: usize, + offset: usize, + } + + impl ChunkedReader { + fn new(chunks: Vec>) -> Self { + Self { + chunks, + chunk_idx: 0, + offset: 0, + } + } + } + + impl Read for ChunkedReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if self.chunk_idx >= self.chunks.len() { + return Ok(0); + } + let chunk = &self.chunks[self.chunk_idx]; + let remaining = &chunk[self.offset..]; + let n = remaining.len().min(buf.len()); + buf[..n].copy_from_slice(&remaining[..n]); + self.offset += n; + if self.offset == chunk.len() { + self.chunk_idx += 1; + self.offset = 0; + } + Ok(n) + } + } + + #[test] + fn read_page_reads_across_short_chunks() { + let mut reader = ChunkedReader::new(vec![vec![1; 32], vec![2; PAGE_SZ - 32]]); + let mut page_buf = vec![0u8; PAGE_SZ]; + + let n = read_page(&mut reader, &mut page_buf, PAGE_SZ).unwrap(); + + assert_eq!(n, PAGE_SZ); + assert_eq!(page_buf[0], 1); + assert_eq!(page_buf[31], 1); + assert_eq!(page_buf[32], 2); + assert_eq!(page_buf[PAGE_SZ - 1], 2); + } + + #[test] + fn read_page_zero_pads_last_partial_page() { + let mut reader = ChunkedReader::new(vec![vec![7; 8], vec![9; 4]]); + let mut page_buf = vec![0u8; PAGE_SZ]; + + let n = read_page(&mut reader, &mut page_buf, 12).unwrap(); + + assert_eq!(n, 12); + assert_eq!(&page_buf[..8], &[7; 8]); + assert_eq!(&page_buf[8..12], &[9; 4]); + assert!(page_buf[12..].iter().all(|&b| b == 0)); + } + + #[test] + fn read_page_errors_on_early_eof() { + let mut reader = ChunkedReader::new(vec![vec![1; 8]]); + let mut page_buf = vec![0u8; PAGE_SZ]; + + let err = read_page(&mut reader, &mut page_buf, 16).unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof); + } +} diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs index 02dc99f..b4a34c3 100644 --- a/src/daemon/mod.rs +++ b/src/daemon/mod.rs @@ -25,9 +25,7 @@ async fn async_run() -> Result<()> { tokio::fs::create_dir_all(&cli_dir).await?; tokio::fs::create_dir_all(config::cache_dir()).await?; - // 写 PID 文件 let pid = std::process::id(); - tokio::fs::write(config::pid_path(), pid.to_string()).await?; // 注册 SIGTERM / SIGINT 处理 setup_signal_handler().await; @@ -39,7 +37,8 @@ async fn async_run() -> Result<()> { eprintln!("[daemon] DB_DIR: {}", cfg.db_dir.display()); // 加载密钥 - let keys_content = tokio::fs::read_to_string(&cfg.keys_file).await + let keys_content = tokio::fs::read_to_string(&cfg.keys_file) + .await .map_err(|e| anyhow::anyhow!("读取密钥文件 {:?} 失败: {}", cfg.keys_file, e))?; let keys_raw: serde_json::Value = serde_json::from_str(&keys_content)?; let all_keys = extract_keys(&keys_raw); @@ -49,11 +48,14 @@ async fn async_run() -> Result<()> { let db = Arc::new(cache::DbCache::new(cfg.db_dir.clone(), all_keys.clone()).await?); // 收集消息 DB 列表 - let msg_db_keys: Vec = all_keys.keys() + let msg_db_keys: Vec = all_keys + .keys() .filter(|k| { let k = k.replace('\\', "/"); - k.contains("message/message_") && k.ends_with(".db") - && !k.contains("_fts") && !k.contains("_resource") + k.contains("message/message_") + && k.ends_with(".db") + && !k.contains("_fts") + && !k.contains("_resource") }) .cloned() .collect(); @@ -82,7 +84,9 @@ async fn async_run() -> Result<()> { let names_arc = Arc::new(tokio::sync::RwLock::new(Arc::new(names))); // 启动 IPC server(阻塞) - server::serve(Arc::clone(&db), Arc::clone(&names_arc)).await?; + let serve_result = server::serve(Arc::clone(&db), Arc::clone(&names_arc)).await; + cleanup_ipc_files(); + serve_result?; Ok(()) } @@ -96,7 +100,9 @@ fn extract_keys(json: &serde_json::Value) -> HashMap { let mut result = HashMap::new(); if let Some(obj) = json.as_object() { for (k, v) in obj { - if k.starts_with('_') { continue; } + if k.starts_with('_') { + continue; + } let enc_key = if let Some(s) = v.as_str() { s.to_string() } else if let Some(obj2) = v.as_object() { @@ -132,8 +138,13 @@ async fn setup_signal_handler() { }); } +#[cfg(unix)] fn cleanup_and_exit() { - let _ = std::fs::remove_file(config::sock_path()); - let _ = std::fs::remove_file(config::pid_path()); + cleanup_ipc_files(); std::process::exit(0); } + +fn cleanup_ipc_files() { + let _ = std::fs::remove_file(config::sock_path()); + let _ = std::fs::remove_file(config::pid_path()); +} diff --git a/src/scanner/windows.rs b/src/scanner/windows.rs index a6660cb..391ba33 100644 --- a/src/scanner/windows.rs +++ b/src/scanner/windows.rs @@ -5,19 +5,19 @@ /// - OpenProcess: 获取进程句柄(需要 PROCESS_VM_READ | PROCESS_QUERY_INFORMATION) /// - VirtualQueryEx: 枚举内存区域 /// - ReadProcessMemory: 读取内存内容 -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use std::path::Path; use windows::Win32::Foundation::{CloseHandle, HANDLE}; +use windows::Win32::System::Diagnostics::Debug::ReadProcessMemory; use windows::Win32::System::Diagnostics::ToolHelp::{ CreateToolhelp32Snapshot, Process32First, Process32Next, PROCESSENTRY32, TH32CS_SNAPPROCESS, }; use windows::Win32::System::Memory::{ - VirtualQueryEx, MEMORY_BASIC_INFORMATION, MEM_COMMIT, PAGE_READWRITE, + VirtualQueryEx, MEMORY_BASIC_INFORMATION, MEM_COMMIT, PAGE_EXECUTE_READWRITE, + PAGE_EXECUTE_WRITECOPY, PAGE_GUARD, PAGE_NOCACHE, PAGE_READWRITE, PAGE_WRITECOMBINE, + PAGE_WRITECOPY, }; -use windows::Win32::System::Threading::{ - OpenProcess, PROCESS_QUERY_INFORMATION, PROCESS_VM_READ, -}; -use windows::Win32::System::Diagnostics::Debug::ReadProcessMemory; +use windows::Win32::System::Threading::{OpenProcess, PROCESS_QUERY_INFORMATION, PROCESS_VM_READ}; use super::{collect_db_salts, KeyEntry}; @@ -27,9 +27,7 @@ const CHUNK_SIZE: usize = 2 * 1024 * 1024; /// 查找 Weixin.exe 进程 PID fn find_wechat_pid() -> Option { // SAFETY: CreateToolhelp32Snapshot 标准 Windows API - let snap = unsafe { - CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0).ok()? - }; + let snap = unsafe { CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0).ok()? }; let mut entry = PROCESSENTRY32 { dwSize: std::mem::size_of::() as u32, @@ -43,8 +41,8 @@ fn find_wechat_pid() -> Option { return None; } loop { - let name = std::ffi::CStr::from_ptr(entry.szExeFile.as_ptr() as *const i8) - .to_string_lossy(); + let name = + std::ffi::CStr::from_ptr(entry.szExeFile.as_ptr() as *const i8).to_string_lossy(); if name.eq_ignore_ascii_case("Weixin.exe") { let pid = entry.th32ProcessID; let _ = CloseHandle(snap); @@ -60,8 +58,7 @@ fn find_wechat_pid() -> Option { } pub fn scan_keys(db_dir: &Path) -> Result> { - let pid = find_wechat_pid() - .context("找不到 Weixin.exe 进程,请确认微信正在运行")?; + let pid = find_wechat_pid().context("找不到 Weixin.exe 进程,请确认微信正在运行")?; eprintln!("WeChat PID: {}", pid); // SAFETY: OpenProcess 请求读取权限 @@ -78,7 +75,9 @@ pub fn scan_keys(db_dir: &Path) -> Result> { eprintln!("找到 {} 个候选密钥", raw_keys.len()); // SAFETY: 关闭进程句柄 - unsafe { let _ = CloseHandle(process); } + unsafe { + let _ = CloseHandle(process); + } let mut entries = Vec::new(); for (key_hex, salt_hex) in &raw_keys { @@ -119,8 +118,9 @@ fn scan_memory(process: HANDLE) -> Result> { let region_size = mbi.RegionSize; let base = mbi.BaseAddress as usize; - // 只扫描已提交的可读写页面 - if mbi.State == MEM_COMMIT && mbi.Protect == PAGE_READWRITE { + // 只扫描已提交的可读可写页面。Windows 的保护位可能带 modifier bits, + // 也可能是 WRITECOPY / EXECUTE_READWRITE 这种同样可读可写的保护类型。 + if mbi.State == MEM_COMMIT && is_writable_readable_page(mbi.Protect.0) { scan_region(process, base, region_size, &mut results); } @@ -133,12 +133,18 @@ fn scan_memory(process: HANDLE) -> Result> { Ok(results) } -fn scan_region( - process: HANDLE, - base: usize, - size: usize, - results: &mut Vec<(String, String)>, -) { +fn is_writable_readable_page(protect: u32) -> bool { + let base = protect & !(PAGE_GUARD.0 | PAGE_NOCACHE.0 | PAGE_WRITECOMBINE.0); + matches!( + base, + x if x == PAGE_READWRITE.0 + || x == PAGE_WRITECOPY.0 + || x == PAGE_EXECUTE_READWRITE.0 + || x == PAGE_EXECUTE_WRITECOPY.0 + ) +} + +fn scan_region(process: HANDLE, base: usize, size: usize, results: &mut Vec<(String, String)>) { let overlap = HEX_PATTERN_LEN + 3; let mut offset = 0usize; @@ -159,7 +165,8 @@ fn scan_region( buf.as_mut_ptr() as *mut _, chunk_size, Some(&mut bytes_read), - ).is_ok() + ) + .is_ok() }; if ok && bytes_read > 0 { @@ -203,10 +210,8 @@ fn search_pattern(buf: &[u8], results: &mut Vec<(String, String)>) { i += 1; continue; } - let key_hex = String::from_utf8_lossy(&buf[hex_start..hex_start + 64]) - .to_lowercase(); - let salt_hex = String::from_utf8_lossy(&buf[hex_start + 64..hex_start + 96]) - .to_lowercase(); + let key_hex = String::from_utf8_lossy(&buf[hex_start..hex_start + 64]).to_lowercase(); + let salt_hex = String::from_utf8_lossy(&buf[hex_start + 64..hex_start + 96]).to_lowercase(); let is_dup = results.iter().any(|(k, s)| k == &key_hex && s == &salt_hex); if !is_dup { results.push((key_hex, salt_hex)); From c4c3b7279634f24761a66ced22534df1fa8918d2 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 17:20:07 +0800 Subject: [PATCH 11/20] =?UTF-8?q?docs(readme):=20mention=20Windows=20Virtu?= =?UTF-8?q?alQueryEx=20+=20ReadProcessMemory=20in=20=E5=8E=9F=E7=90=86=20s?= =?UTF-8?q?ection=20(#55)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 原理 section previously listed only macOS Mach VM API and Linux /proc//mem, omitting the Windows scanner path that has existed in src/scanner/windows.rs since the Rust rewrite. Add the Windows API pair and the required process access rights so the section accurately reflects all three platforms supported in CI/builds. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de816d1..8a8e23b 100644 --- a/README.md +++ b/README.md @@ -294,7 +294,7 @@ daemon 首次解密后将数据库和 mtime 持久化到 `~/.wx-cli/cache/`。 微信 4.x 使用 SQLCipher 4 加密本地数据库(AES-256-CBC + HMAC-SHA512,PBKDF2 256,000 次迭代)。WCDB 在进程内存中缓存派生后的 raw key,格式为 `x'<64hex_key><32hex_salt>'`。 -wx-cli 通过 macOS Mach VM API(`mach_vm_region` + `mach_vm_read`)或 Linux `/proc//mem` 扫描微信进程内存,匹配该模式提取密钥,daemon 按需解密并缓存。 +wx-cli 通过 macOS Mach VM API(`mach_vm_region` + `mach_vm_read`)、Linux `/proc//mem` 或 Windows `VirtualQueryEx` + `ReadProcessMemory`(需要 `PROCESS_VM_READ | PROCESS_QUERY_INFORMATION` 权限)扫描微信进程内存,匹配该模式提取密钥,daemon 按需解密并缓存。 --- From 5c001b18beb8051f15a910032faa04572b3bd122 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 17:26:20 +0800 Subject: [PATCH 12/20] chore(release): bump version to 0.1.11 --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/platforms/darwin-arm64/package.json | 2 +- npm/platforms/darwin-x64/package.json | 2 +- npm/platforms/linux-arm64/package.json | 2 +- npm/platforms/linux-x64/package.json | 2 +- npm/platforms/win32-x64/package.json | 2 +- npm/wx-cli/package.json | 12 ++++++------ 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 36b1c72..912068a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1307,7 +1307,7 @@ checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "wx-cli" -version = "0.1.10" +version = "0.1.11" dependencies = [ "aes", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 0c5ef05..58c3224 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wx-cli" -version = "0.1.10" +version = "0.1.11" edition = "2021" description = "WeChat 4.x (macOS/Linux) local data CLI — decrypt SQLCipher DBs, query chat history, watch new messages" license = "Apache-2.0" diff --git a/npm/platforms/darwin-arm64/package.json b/npm/platforms/darwin-arm64/package.json index 05b851e..d0661cf 100644 --- a/npm/platforms/darwin-arm64/package.json +++ b/npm/platforms/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-darwin-arm64", - "version": "0.1.10", + "version": "0.1.11", "description": "wx-cli binary for macOS arm64", "os": ["darwin"], "cpu": ["arm64"], diff --git a/npm/platforms/darwin-x64/package.json b/npm/platforms/darwin-x64/package.json index 6fce3b4..badd091 100644 --- a/npm/platforms/darwin-x64/package.json +++ b/npm/platforms/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-darwin-x64", - "version": "0.1.10", + "version": "0.1.11", "description": "wx-cli binary for macOS x64", "os": ["darwin"], "cpu": ["x64"], diff --git a/npm/platforms/linux-arm64/package.json b/npm/platforms/linux-arm64/package.json index d44e7b3..26f73c4 100644 --- a/npm/platforms/linux-arm64/package.json +++ b/npm/platforms/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-linux-arm64", - "version": "0.1.10", + "version": "0.1.11", "description": "wx-cli binary for Linux arm64", "os": ["linux"], "cpu": ["arm64"], diff --git a/npm/platforms/linux-x64/package.json b/npm/platforms/linux-x64/package.json index 0be0893..67d1c05 100644 --- a/npm/platforms/linux-x64/package.json +++ b/npm/platforms/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-linux-x64", - "version": "0.1.10", + "version": "0.1.11", "description": "wx-cli binary for Linux x64", "os": ["linux"], "cpu": ["x64"], diff --git a/npm/platforms/win32-x64/package.json b/npm/platforms/win32-x64/package.json index 32d2eb0..d9edf8f 100644 --- a/npm/platforms/win32-x64/package.json +++ b/npm/platforms/win32-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-win32-x64", - "version": "0.1.10", + "version": "0.1.11", "description": "wx-cli binary for Windows x64", "os": ["win32"], "cpu": ["x64"], diff --git a/npm/wx-cli/package.json b/npm/wx-cli/package.json index 121770f..5befb5a 100644 --- a/npm/wx-cli/package.json +++ b/npm/wx-cli/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli", - "version": "0.1.10", + "version": "0.1.11", "description": "Query your local WeChat data from the command line. Designed for LLM agent tool calls.", "bin": { "wx": "bin/wx.js" @@ -13,11 +13,11 @@ "install.js" ], "optionalDependencies": { - "@jackwener/wx-cli-darwin-arm64": "0.1.10", - "@jackwener/wx-cli-darwin-x64": "0.1.10", - "@jackwener/wx-cli-linux-x64": "0.1.10", - "@jackwener/wx-cli-linux-arm64": "0.1.10", - "@jackwener/wx-cli-win32-x64": "0.1.10" + "@jackwener/wx-cli-darwin-arm64": "0.1.11", + "@jackwener/wx-cli-darwin-x64": "0.1.11", + "@jackwener/wx-cli-linux-x64": "0.1.11", + "@jackwener/wx-cli-linux-arm64": "0.1.11", + "@jackwener/wx-cli-win32-x64": "0.1.11" }, "engines": { "node": ">=14" }, "keywords": ["wechat", "cli", "wx", "llm", "ai", "sqlite", "sqlcipher"], From 14fdfde1d36debd90781bce24b0568f72897476c Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:25:32 +0800 Subject: [PATCH 13/20] feat(attachment): scaffold module + V1 decoders + resource resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lays down the skeleton for聊天附件 (chat attachment) extraction. This commit introduces the `attachment` module with: - `attachment_id`: opaque base64url(json) round-trip handle for CLI/IPC. Carries `(chat, local_id, create_time, kind)` — `local_id` alone is not unique (实测同 chat 内最多 7 条同 local_id 的记录), so create_time is required for disambiguation. - `decoder/`: dispatch by 6B header magic. Three branches: - `V2_MAGIC` → AES-128-ECB + raw + XOR (need image AES key) - `V1_MAGIC` → AES-128-ECB with fixed key `cfcd208495d565ef` (= md5("0")[:16]) - else → legacy single-byte XOR with magic auto-detect Manual ECB + PKCS7 unpad to avoid pulling in another crate. - `resolver`: `message_resource.db` lookup chain `username → ChatName2Id.rowid → MessageResourceInfo.packed_info → md5` + on-disk `.dat` selection (full > _h > _t) under `/msg/attach///Img/[_t|_h].dat`. Honors `message_local_type % 2^32` to strip the high flag bits, and orders by `message_create_time DESC` to handle local_id reuse. - `image_key/`: stub trait + macOS / Windows placeholders. To be filled by codex with the V2 image key extraction (kvcomm + brute-force on macOS, memory scan on Windows). V1 decoder ships with 6 unit tests covering every supported magic + the BMP extra validation; resolver ships with packed_info parser + dat-file selection tests; v2 decoder ships with header validation tests. 21 tests pass. `cargo check` and `cargo check --target x86_64-pc-windows-gnu` both clean. --- Cargo.lock | 7 + Cargo.toml | 3 + src/attachment/attachment_id.rs | 153 ++++++++++++ src/attachment/decoder/mod.rs | 122 ++++++++++ src/attachment/decoder/v1_xor.rs | 166 +++++++++++++ src/attachment/decoder/v2.rs | 130 ++++++++++ src/attachment/image_key/macos.rs | 10 + src/attachment/image_key/mod.rs | 34 +++ src/attachment/image_key/windows.rs | 10 + src/attachment/mod.rs | 28 +++ src/attachment/resolver.rs | 353 ++++++++++++++++++++++++++++ src/main.rs | 1 + 12 files changed, 1017 insertions(+) create mode 100644 src/attachment/attachment_id.rs create mode 100644 src/attachment/decoder/mod.rs create mode 100644 src/attachment/decoder/v1_xor.rs create mode 100644 src/attachment/decoder/v2.rs create mode 100644 src/attachment/image_key/macos.rs create mode 100644 src/attachment/image_key/mod.rs create mode 100644 src/attachment/image_key/windows.rs create mode 100644 src/attachment/mod.rs create mode 100644 src/attachment/resolver.rs diff --git a/Cargo.lock b/Cargo.lock index 912068a..a5cc78b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.11.1" @@ -1311,6 +1317,7 @@ version = "0.1.11" dependencies = [ "aes", "anyhow", + "base64", "cbc", "chrono", "clap", diff --git a/Cargo.toml b/Cargo.toml index 58c3224..a32b845 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,9 @@ dirs = "5" # MD5 (联系人表名 Msg_) md5 = "0.7" +# 附件 ID 编码(base64url) +base64 = "0.22" + # 正则表达式 regex = "1" roxmltree = "0.20" diff --git a/src/attachment/attachment_id.rs b/src/attachment/attachment_id.rs new file mode 100644 index 0000000..8af569e --- /dev/null +++ b/src/attachment/attachment_id.rs @@ -0,0 +1,153 @@ +//! 不透明附件 ID — 跨 CLI / IPC 的圆 trip 句柄。 +//! +//! 编码:`base64url_no_pad(serde_json(payload))`。 +//! 选择 base64url(json) 而不是紧凑 bit-pack: +//! - phase 1 求稳,不发明二进制协议 +//! - 后面加字段(`resource_md5` / `decoder_hint` 之类)老 CLI 不 break +//! - debug 直接 base64 -d | jq 看字段 +//! +//! ⚠️ `local_id` 在同一 chat 内会被 WeChat 复用(实测同 chat 最多 7 条同 local_id), +//! 所以 `(chat, local_id, create_time)` 三元组才是定位资源行的最小集。 + +use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AttachmentKind { + Image, + Video, + File, + Voice, +} + +impl AttachmentKind { + /// 从 message.local_type 推 attachment kind(只覆盖 phase 1 关心的几种)。 + /// 高 32 bit 是版本/会话 flag,要先 mask 到低 32 bit。 + pub fn from_local_type(local_type: i64) -> Option { + let lo = (local_type as u64) & 0xFFFF_FFFF; + match lo { + 3 => Some(AttachmentKind::Image), + 34 => Some(AttachmentKind::Voice), + 43 => Some(AttachmentKind::Video), + // type=49 是 appmsg,里面 subtype=6 才是文件;这里偏宽松返回 File, + // 由 resolver 进一步根据 appmsg subtype 决定是否真的能 extract + 49 => Some(AttachmentKind::File), + _ => None, + } + } + + pub fn as_str(&self) -> &'static str { + match self { + AttachmentKind::Image => "image", + AttachmentKind::Video => "video", + AttachmentKind::File => "file", + AttachmentKind::Voice => "voice", + } + } +} + +/// 附件 ID payload(序列化后 base64url 编码)。 +/// +/// `v` 是版本字段,将来 schema 变了可以走分支兼容。当前 v=1。 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AttachmentId { + /// payload schema version + pub v: u32, + /// 会话 username(同时用于 ChatName2Id 查 chat_id 和拼 attach 路径) + pub chat: String, + /// 消息行的 local_id + pub local_id: i64, + /// 消息行的 create_time(unix 秒)— 用于 disambiguate 同 chat 内 local_id 复用 + pub create_time: i64, + /// 附件类别 + pub kind: AttachmentKind, + /// 可选 hint:消息所在 message_N.db 的 N。给定时 resolver 可跳过 shard 扫描; + /// 缺省时 resolver 会按 `find_msg_tables` 逻辑全量扫 + #[serde(default, skip_serializing_if = "Option::is_none")] + pub db: Option, +} + +impl AttachmentId { + pub fn encode(&self) -> Result { + let json = serde_json::to_vec(self).context("序列化 AttachmentId")?; + Ok(URL_SAFE_NO_PAD.encode(json)) + } + + pub fn decode(s: &str) -> Result { + let bytes = URL_SAFE_NO_PAD + .decode(s.trim()) + .map_err(|e| anyhow!("attachment_id 不是合法 base64url: {}", e))?; + let id: AttachmentId = + serde_json::from_slice(&bytes).context("attachment_id payload 非合法 JSON")?; + if id.v != 1 { + return Err(anyhow!("不支持的 attachment_id 版本 v={}", id.v)); + } + Ok(id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn round_trip_minimal() { + let id = AttachmentId { + v: 1, + chat: "wxid_abc".to_string(), + local_id: 12345, + create_time: 1_715_678_901, + kind: AttachmentKind::Image, + db: None, + }; + let s = id.encode().unwrap(); + let back = AttachmentId::decode(&s).unwrap(); + assert_eq!(back.chat, id.chat); + assert_eq!(back.local_id, id.local_id); + assert_eq!(back.create_time, id.create_time); + assert_eq!(back.kind, id.kind); + assert_eq!(back.db, id.db); + } + + #[test] + fn round_trip_with_db_hint() { + let id = AttachmentId { + v: 1, + chat: "1234@chatroom".to_string(), + local_id: 42, + create_time: 1, + kind: AttachmentKind::Image, + db: Some(2), + }; + let s = id.encode().unwrap(); + assert!(!s.contains('=')); // base64url no-pad + let back = AttachmentId::decode(&s).unwrap(); + assert_eq!(back.db, Some(2)); + } + + #[test] + fn local_type_mask_high_bits() { + // monitor_web.py 里 image push 路径:高位带 flag,低 32 bit 是 3 + let high_flag = (0xDEAD_BEEFu64 << 32) as i64 | 3; + assert_eq!( + AttachmentKind::from_local_type(high_flag), + Some(AttachmentKind::Image) + ); + } + + #[test] + fn rejects_unknown_version() { + let id = AttachmentId { + v: 99, + chat: "x".to_string(), + local_id: 0, + create_time: 0, + kind: AttachmentKind::Image, + db: None, + }; + let s = id.encode().unwrap(); + assert!(AttachmentId::decode(&s).is_err()); + } +} diff --git a/src/attachment/decoder/mod.rs b/src/attachment/decoder/mod.rs new file mode 100644 index 0000000..a5723c5 --- /dev/null +++ b/src/attachment/decoder/mod.rs @@ -0,0 +1,122 @@ +//! `.dat` 文件解码:根据 6B header magic 分发到具体 decoder。 +//! +//! 三档: +//! | header[0..6] | decoder | 备注 | +//! |-------------------------|-------------------|-----------------------------------------| +//! | `07 08 V2 08 07` | `v2` | AES-128-ECB + XOR 混合,需要 image AES key | +//! | `07 08 V1 08 07` | `v1_aes` | 固定 AES key `cfcd208495d565ef` | +//! | (其他, 通常无 magic) | `v1_xor` | legacy single-byte XOR,magic 自动探测 | +//! +//! 决策点放在 `dispatch`,让上层(`resolver` / CLI extract 命令)只跟一个入口打交道。 + +use anyhow::{anyhow, Result}; + +pub mod v1_xor; +pub mod v2; + +/// 完整 V2 magic:`\x07\x08V2\x08\x07` +pub const V2_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'2', 0x08, 0x07]; +/// 完整 V1 magic:`\x07\x08V1\x08\x07` +pub const V1_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'1', 0x08, 0x07]; + +/// 解码后的产物 + 探测出的图片格式 +#[derive(Debug)] +pub struct DecodedImage { + pub data: Vec, + /// 推断出的图片扩展名(不带点),由 magic 决定。例如 "jpg" / "png" / "gif" / "webp" / + /// "tif" / "bmp" / "hevc"(wxgf 容器)/ "bin"(未识别) + pub format: &'static str, + /// 解码器名称("legacy_xor" / "v1_aes" / "v2"),用于 CLI 调试输出 + pub decoder: &'static str, +} + +/// 由 caller 提供的 V2 image AES key(codex 的 `image_key` 模块负责拿到)。 +/// 缺省时遇到 V2 文件会返回 `Err`,caller 可以拿到具体错误信息再处理。 +#[derive(Debug, Clone, Copy, Default)] +pub struct V2KeyMaterial<'a> { + pub aes_key: Option<&'a [u8; 16]>, + /// XOR key — WeChat 4.x 默认 0x88,可 override + pub xor_key: u8, +} + +impl<'a> V2KeyMaterial<'a> { + pub fn with_aes(key: &'a [u8; 16]) -> Self { + Self { aes_key: Some(key), xor_key: 0x88 } + } +} + +/// 根据 `dat_bytes` 头部 magic 自动分发到对应 decoder。 +/// +/// `v2_key` 仅在文件是 V2 magic 时被消费。 +pub fn dispatch(dat_bytes: &[u8], v2_key: V2KeyMaterial<'_>) -> Result { + if dat_bytes.len() >= 6 { + let head: &[u8; 6] = dat_bytes[..6].try_into().unwrap(); + if head == &V2_MAGIC { + return v2::decode(dat_bytes, v2_key); + } + if head == &V1_MAGIC { + // V1 fixed-AES: 固定 key = md5("0")[:16] = "cfcd208495d565ef" + let fixed_key: [u8; 16] = *b"cfcd208495d565ef"; + return v2::decode( + dat_bytes, + V2KeyMaterial { aes_key: Some(&fixed_key), xor_key: v2_key.xor_key }, + ) + .map(|mut d| { + d.decoder = "v1_aes"; + d + }); + } + } + if dat_bytes.is_empty() { + return Err(anyhow!("空 .dat 文件")); + } + v1_xor::decode(dat_bytes) +} + +/// 从解密后的字节流头部探测图片格式扩展名。 +/// +/// 与上游 `decode_image.py::detect_image_format` 一致;新增 wxgf (HEVC 裸流) 的探测, +/// 因为 V2 解码后产物可能直接是 wxgf 容器。 +pub fn detect_image_format(bytes: &[u8]) -> &'static str { + if bytes.len() >= 4 && &bytes[..4] == b"wxgf" { + return "hevc"; + } + if bytes.len() >= 3 && bytes[..3] == [0xFF, 0xD8, 0xFF] { + return "jpg"; + } + if bytes.len() >= 4 && bytes[..4] == [0x89, 0x50, 0x4E, 0x47] { + return "png"; + } + if bytes.len() >= 3 && &bytes[..3] == b"GIF" { + return "gif"; + } + if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WEBP" { + return "webp"; + } + if bytes.len() >= 4 && bytes[..4] == [0x49, 0x49, 0x2A, 0x00] { + return "tif"; + } + if bytes.len() >= 2 && &bytes[..2] == b"BM" { + return "bmp"; + } + "bin" +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detect_basic_formats() { + assert_eq!(detect_image_format(&[0xFF, 0xD8, 0xFF, 0xE0]), "jpg"); + assert_eq!(detect_image_format(&[0x89, 0x50, 0x4E, 0x47]), "png"); + assert_eq!(detect_image_format(b"GIF89a"), "gif"); + assert_eq!(detect_image_format(b"BM\0\0\0\0\0\0\0\0\0\0\0\0"), "bmp"); + let mut webp = b"RIFF\0\0\0\0WEBP".to_vec(); + webp.extend_from_slice(&[0; 4]); + assert_eq!(detect_image_format(&webp), "webp"); + assert_eq!(detect_image_format(&[0x49, 0x49, 0x2A, 0x00]), "tif"); + assert_eq!(detect_image_format(b"wxgfXXXX"), "hevc"); + assert_eq!(detect_image_format(&[0, 0, 0, 0]), "bin"); + } +} diff --git a/src/attachment/decoder/v1_xor.rs b/src/attachment/decoder/v1_xor.rs new file mode 100644 index 0000000..788383e --- /dev/null +++ b/src/attachment/decoder/v1_xor.rs @@ -0,0 +1,166 @@ +//! Legacy single-byte XOR decoder(无 magic 头的旧 .dat) +//! +//! 算法:用已知图片 magic 反推 XOR key —— `key = file[0] ^ magic[0]`。 +//! 然后用同一个 key 校验 `file[i] ^ key == magic[i]`,全部命中才接受这个 key。 +//! +//! 优先级(按 magic 长度降序,避免短 magic 假阳性): +//! PNG (4) > GIF (4) > TIF (4) > WEBP (4, RIFF) > JPG (3) > BMP (2, 需额外校验) +//! +//! BMP 只有 2 字节 magic,假阳性高;额外用 BMP file header 里的 +//! `bf_size`(offset 2, u32 LE)和 `bf_offset`(offset 10, u32 LE)做合理性校验: +//! - `|bf_size - file_size| < 1024`(允许微小 padding 差) +//! - `14 <= bf_offset <= 1078`(最大调色板 256*4 + header 14 = 1038,留点余量) + +use anyhow::{anyhow, Result}; + +use super::{detect_image_format, DecodedImage}; + +const PNG: &[u8] = &[0x89, 0x50, 0x4E, 0x47]; +const GIF: &[u8] = &[0x47, 0x49, 0x46, 0x38]; +const TIF: &[u8] = &[0x49, 0x49, 0x2A, 0x00]; +const WEBP_RIFF: &[u8] = &[0x52, 0x49, 0x46, 0x46]; +const JPG: &[u8] = &[0xFF, 0xD8, 0xFF]; +const BMP: &[u8] = &[0x42, 0x4D]; + +/// 在 `header` 上尝试一个固定 magic:返回 `Some(key)` 当且仅当所有字节都对得上。 +fn try_magic(header: &[u8], magic: &[u8]) -> Option { + if header.len() < magic.len() { + return None; + } + let key = header[0] ^ magic[0]; + for i in 1..magic.len() { + if header[i] ^ key != magic[i] { + return None; + } + } + Some(key) +} + +/// 探测 XOR key。失败返回 `None`(caller 决定是不是错)。 +pub fn detect_key(file_bytes: &[u8]) -> Option { + if file_bytes.len() < 4 { + return None; + } + let header = &file_bytes[..file_bytes.len().min(16)]; + + // 先试 3+ 字节 magic + for magic in [PNG, GIF, TIF, WEBP_RIFF, JPG] { + if let Some(k) = try_magic(header, magic) { + return Some(k); + } + } + + // 最后试 BMP(只有 2B magic,需额外校验) + if let Some(k) = try_magic(header, BMP) { + if header.len() >= 14 { + // 解 BMP file header 14 字节 + let mut dec = [0u8; 14]; + for i in 0..14 { + dec[i] = header[i] ^ k; + } + let bmp_size = u32::from_le_bytes([dec[2], dec[3], dec[4], dec[5]]); + let bmp_offset = u32::from_le_bytes([dec[10], dec[11], dec[12], dec[13]]); + let file_size = file_bytes.len() as u32; + // 允许 1024 字节 padding 差;offset 在合理范围 + if file_size.abs_diff(bmp_size) < 1024 && (14..=1078).contains(&bmp_offset) { + return Some(k); + } + } + } + + None +} + +/// XOR 解码整个 `.dat` 内容。 +pub fn decode(file_bytes: &[u8]) -> Result { + let key = + detect_key(file_bytes).ok_or_else(|| anyhow!("legacy XOR: 无法识别图片 magic(key 探测失败)"))?; + let data: Vec = file_bytes.iter().map(|b| b ^ key).collect(); + let format = detect_image_format(&data); + if format == "bin" { + return Err(anyhow!("legacy XOR: 解出 key=0x{:02x} 但产物 magic 不识别", key)); + } + Ok(DecodedImage { data, format, decoder: "legacy_xor" }) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// 把一段 plaintext 用单字节 key XOR 加密,模拟 .dat 文件 + fn xor_encrypt(plain: &[u8], key: u8) -> Vec { + plain.iter().map(|b| b ^ key).collect() + } + + #[test] + fn detect_jpg_key() { + let plain = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46]; + let enc = xor_encrypt(&plain, 0x3C); + assert_eq!(detect_key(&enc), Some(0x3C)); + } + + #[test] + fn detect_png_key() { + let mut plain = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0xA5); + assert_eq!(detect_key(&enc), Some(0xA5)); + } + + #[test] + fn detect_gif_key() { + let mut plain = b"GIF89a".to_vec(); + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0x77); + assert_eq!(detect_key(&enc), Some(0x77)); + } + + #[test] + fn detect_webp_riff_key() { + let mut plain = b"RIFF\x00\x00\x00\x00WEBP".to_vec(); + plain.extend_from_slice(&[0; 8]); + let enc = xor_encrypt(&plain, 0x12); + assert_eq!(detect_key(&enc), Some(0x12)); + } + + #[test] + fn detect_tif_key() { + let mut plain = vec![0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00]; + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0xC3); + assert_eq!(detect_key(&enc), Some(0xC3)); + } + + #[test] + fn detect_bmp_with_valid_header() { + // BMP 14B header: 'BM' + size(u32 LE) + reserved(2*u16) + offset(u32 LE) + let mut plain = Vec::new(); + plain.extend_from_slice(b"BM"); + plain.extend_from_slice(&100u32.to_le_bytes()); // file_size = 100 + plain.extend_from_slice(&[0; 4]); // reserved + plain.extend_from_slice(&54u32.to_le_bytes()); // pixel data offset = 54 + plain.resize(100, 0); // 整个文件 100 字节,匹配 file_size + let enc = xor_encrypt(&plain, 0x55); + assert_eq!(detect_key(&enc), Some(0x55)); + } + + #[test] + fn reject_random_bytes() { + // 全 0 文件:BMP 检测会算出 key = 0x42 ^ 0 = 0x42, + // 但解密出的 BMP file_size = 0 vs file_size = 100,差距 > 1024 → + // 应该 reject + let bytes = vec![0u8; 100]; + assert_eq!(detect_key(&bytes), None); + } + + #[test] + fn decode_round_trip_jpg() { + let mut plain = vec![0xFF, 0xD8, 0xFF, 0xE0]; + plain.extend_from_slice(b"JFIF padding here"); + let enc = xor_encrypt(&plain, 0xAB); + let out = decode(&enc).unwrap(); + assert_eq!(out.format, "jpg"); + assert_eq!(out.decoder, "legacy_xor"); + assert_eq!(out.data, plain); + } +} diff --git a/src/attachment/decoder/v2.rs b/src/attachment/decoder/v2.rs new file mode 100644 index 0000000..1c90f29 --- /dev/null +++ b/src/attachment/decoder/v2.rs @@ -0,0 +1,130 @@ +//! V2 .dat 解码:`AES-128-ECB(PKCS7) + raw + XOR` 三段拼接。 +//! +//! 文件结构(来自上游 `decode_image.py::v2_decrypt_file`): +//! `[6B magic V2/V1] [4B aes_size LE] [4B xor_size LE] [1B padding]` +//! `[aligned_aes_size bytes AES-ECB ciphertext]` +//! `[len - aligned_aes_size - xor_size bytes raw_data (不加密)]` +//! `[xor_size bytes XOR (单字节 key)]` +//! +//! `aligned_aes_size`:把 `aes_size` 向上对齐到 16 的倍数;当 `aes_size` 本身是 +//! 16 的倍数时,PKCS7 还会再加一整块 padding,所以再 +16。等价于 +//! `aes_size + (16 - aes_size % 16)`。 +//! +//! ⚠️ 此模块由 codex 落地完整 V2 实现 + image key 模块。当前只提供一个 +//! `decode` 入口骨架,方便 v1_aes 路径(固定 key)和 dispatch 一起编译过。 +//! `aes_key=None` 时返回带具体诊断信息的错误。 + +use anyhow::{anyhow, bail, Result}; + +use super::{detect_image_format, DecodedImage, V2KeyMaterial, V1_MAGIC, V2_MAGIC}; + +const HEADER_SIZE: usize = 15; + +pub fn decode(file_bytes: &[u8], key: V2KeyMaterial<'_>) -> Result { + if file_bytes.len() < HEADER_SIZE { + bail!("V2 .dat: 文件过短({} < {} 字节)", file_bytes.len(), HEADER_SIZE); + } + let magic: &[u8; 6] = file_bytes[..6].try_into().unwrap(); + if magic != &V2_MAGIC && magic != &V1_MAGIC { + bail!("V2 .dat: header magic 不匹配 V1/V2"); + } + + let aes_key = key.aes_key.ok_or_else(|| { + anyhow!("V2 .dat: 需要 image AES key(codex 的 image_key 模块尚未填充)") + })?; + + let aes_size = u32::from_le_bytes(file_bytes[6..10].try_into().unwrap()) as usize; + let xor_size = u32::from_le_bytes(file_bytes[10..14].try_into().unwrap()) as usize; + + // PKCS7 对齐:aes_size 不是 16 的倍数 → 向上对齐;是 16 的倍数 → 再加一整块 + let aligned_aes_size = aes_size + (16 - (aes_size % 16)); + + let aes_end = HEADER_SIZE.checked_add(aligned_aes_size).ok_or_else(|| anyhow!("aes 段长度溢出"))?; + if aes_end > file_bytes.len() { + bail!( + "V2 .dat: 头部宣称 aes_size={} (aligned={}) 超过文件长度 {}", + aes_size, + aligned_aes_size, + file_bytes.len() + ); + } + let raw_end = file_bytes.len().checked_sub(xor_size).ok_or_else(|| { + anyhow!("V2 .dat: 头部宣称 xor_size={} 超过文件长度 {}", xor_size, file_bytes.len()) + })?; + if aes_end > raw_end { + bail!( + "V2 .dat: aes_end={} > raw_end={}(aes/xor 段重叠)", + aes_end, + raw_end + ); + } + + // === AES-128-ECB 解密 + PKCS7 unpad === + let aes_data = &file_bytes[HEADER_SIZE..aes_end]; + let dec_aes = aes_ecb_decrypt_pkcs7(aes_key, aes_data)?; + + // === Raw 段(未加密) === + let raw_data = &file_bytes[aes_end..raw_end]; + + // === XOR 段 === + let xor_data: Vec = file_bytes[raw_end..].iter().map(|b| b ^ key.xor_key).collect(); + + let mut out = Vec::with_capacity(dec_aes.len() + raw_data.len() + xor_data.len()); + out.extend_from_slice(&dec_aes); + out.extend_from_slice(raw_data); + out.extend_from_slice(&xor_data); + + let format = detect_image_format(&out); + if format == "bin" { + bail!("V2 .dat: AES 解密成功但产物 magic 不识别(key 可能错)"); + } + Ok(DecodedImage { data: out, format, decoder: "v2" }) +} + +/// AES-128-ECB 解密 + PKCS7 unpad。失败时返回 `Err`,不返回半结果。 +/// +/// 不引第三方 ECB 包;ECB 本身就是 block-by-block,手工跑就行。 +/// PKCS7 padding 由本函数最后一段做 strict 校验:长度 1..=16,且尾部全是同值字节。 +fn aes_ecb_decrypt_pkcs7(key: &[u8; 16], cipher: &[u8]) -> Result> { + use aes::cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit}; + if cipher.is_empty() || cipher.len() % 16 != 0 { + bail!("AES 输入长度 {} 不是 16 的倍数", cipher.len()); + } + let aes = aes::Aes128::new(key.into()); + let mut out = Vec::with_capacity(cipher.len()); + for chunk in cipher.chunks_exact(16) { + let mut block = GenericArray::clone_from_slice(chunk); + aes.decrypt_block(&mut block); + out.extend_from_slice(&block); + } + let pad = *out.last().ok_or_else(|| anyhow!("AES PKCS7: 空输出"))? as usize; + if pad == 0 || pad > 16 || pad > out.len() { + bail!("AES PKCS7: 非法 padding 长度 {}", pad); + } + let tail = &out[out.len() - pad..]; + if !tail.iter().all(|&b| b as usize == pad) { + bail!("AES PKCS7: padding 字节不一致"); + } + out.truncate(out.len() - pad); + Ok(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rejects_short_file() { + let r = decode(&[0u8; 4], V2KeyMaterial::default()); + assert!(r.is_err()); + } + + #[test] + fn rejects_v2_without_key() { + let mut buf = V2_MAGIC.to_vec(); + buf.extend_from_slice(&[0u8; HEADER_SIZE - 6]); + let r = decode(&buf, V2KeyMaterial::default()); + let err = r.unwrap_err().to_string(); + assert!(err.contains("AES key"), "{}", err); + } +} diff --git a/src/attachment/image_key/macos.rs b/src/attachment/image_key/macos.rs new file mode 100644 index 0000000..234d4e5 --- /dev/null +++ b/src/attachment/image_key/macos.rs @@ -0,0 +1,10 @@ +//! macOS V2 image AES key 提取。 +//! +//! 主路径:从 `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/key__*.statistic` +//! 文件名拿 uin,然后 `md5(str(uin) + sanitize(wxid)).hex()[:16]` 派生 AES key。 +//! +//! Fallback:枚举 uin 候选 2^24 个(`uint32`,但 wxid 4-byte 前缀只看后 24 bit), +//! 通过 `md5(str(uin))[:4] == wxid 后 4 字节` 匹配。 +//! 上游 `find_image_key_macos.py` 实测 1-2s 完成。 +//! +//! ⚠️ codex 落实现。 diff --git a/src/attachment/image_key/mod.rs b/src/attachment/image_key/mod.rs new file mode 100644 index 0000000..ec4f8ad --- /dev/null +++ b/src/attachment/image_key/mod.rs @@ -0,0 +1,34 @@ +//! V2 image AES key 提取 — 平台相关。 +//! +//! ⚠️ 此模块由 codex 落地。本文件只放公共 trait + 平台 dispatch 占位。 +//! +//! 路径: +//! - macOS:磁盘派生(`key__*.statistic` 文件名拿 uin → `md5(str(uin) + wxid)[:16]`) +//! + brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24) +//! - Windows:扫 `Weixin.exe` 内存,匹配 `[a-zA-Z0-9]{32}` 候选,按已知 AES ciphertext-block +//! 反验(`find_image_key.py` / `find_image_key.c` 已写实) +//! - Linux:上游空白;当前不实现,遇到 V2 .dat 返回 unsupported 错误 + +#[allow(dead_code)] +pub mod macos; +#[allow(dead_code)] +pub mod windows; + +use anyhow::Result; + +/// 单个 wxid 的 V2 image key 提取接口。 +/// +/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时是稳定的)。 +pub trait ImageKeyProvider { + /// 返回当前 wxid 的 16 字节 AES key。失败要带可执行的诊断(例如「macOS 没找到 + /// kvcomm cache,请确认微信已登录」/「Windows 进程不在跑」)。 + fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]>; +} + +/// 平台默认实现(codex 后续填)。 +/// +/// 调用方目前可以直接传 `None`,让 resolver 在遇到 V2 .dat 时报「image key 未提取」错。 +pub fn default_provider() -> Option> { + // TODO(codex): 按 cfg(target_os) 返回 macOS / Windows / 不支持 + None +} diff --git a/src/attachment/image_key/windows.rs b/src/attachment/image_key/windows.rs new file mode 100644 index 0000000..1a0080a --- /dev/null +++ b/src/attachment/image_key/windows.rs @@ -0,0 +1,10 @@ +//! Windows V2 image AES key 提取。 +//! +//! 扫 `Weixin.exe` 进程内存,匹配模式 `(?) → message_resource.db (ChatName2Id + MessageResourceInfo) +//! → packed_info protobuf md5 提取 → xwechat_files//msg/attach/.../Img/[_t|_h].dat +//! → magic 分发 (legacy XOR / V1 fixed-AES / V2 AES+XOR) → 写出实际图片 +//! +//! 模块切分: +//! - `attachment_id`:跨 IPC / CLI 的不透明 ID(base64url(json)) +//! - `resolver`:从 `attachment_id` 反查 message_resource.db,定位本地 .dat +//! - `decoder`:根据文件 magic 分发到具体解码器(V1 / V2 等) +//! - `image_key`:V2 image AES key 提取(macOS / Windows) +//! +//! V2 / image_key 模块由 codex 落地,先放空 stub 以便 V1 / resolver / CLI 不被 block。 + +// 此模块由分多个 PR/commit 增量启用: +// 1) 先落 attachment_id / decoder / resolver / image_key 骨架(本 commit) +// 2) IPC + CLI + daemon route 把它们串起来(后续 commit) +// 3) image_key 平台实现(codex 后续 commit) +// 在 step 1 完成、step 2 未到时,大量公开 API 仍未被引用,#[allow(dead_code)] 抑制噪音 +#![allow(dead_code)] + +pub mod attachment_id; +pub mod decoder; +pub mod resolver; +pub mod image_key; + +pub use attachment_id::{AttachmentId, AttachmentKind}; diff --git a/src/attachment/resolver.rs b/src/attachment/resolver.rs new file mode 100644 index 0000000..c32d84e --- /dev/null +++ b/src/attachment/resolver.rs @@ -0,0 +1,353 @@ +//! 把 `AttachmentId` 翻译成本地 `.dat` 路径。 +//! +//! 流程: +//! 1. `chat` username → `ChatName2Id.rowid`(资源库) +//! 2. `(chat_id, local_id)` + `ORDER BY message_create_time DESC LIMIT 1` → +//! `MessageResourceInfo.packed_info` +//! 3. 从 `packed_info` (protobuf) 提取 32 字节 ASCII hex MD5 +//! 4. 在 `/msg/attach///Img/[_t|_h].dat` +//! 下找对应文件,按 full > _h > _t 优先级选一个 +//! +//! `` 由 daemon 已知(同 `db_dir` 的父目录),路径 layout 平台差异: +//! - Linux: `~/Documents/xwechat_files/` +//! - macOS: `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/` +//! ⚠️ msg/attach/... 子树 layout 待我用真实账号验证;上游 docstring 只写了 Windows +//! - Windows: `\xwechat_files\`(root 从 `%APPDATA%\Tencent\xwechat\config\*.ini` 读) + +use anyhow::{anyhow, Context, Result}; +use chrono::TimeZone; +use rusqlite::Connection; +use std::path::{Path, PathBuf}; + +use super::AttachmentId; + +/// 单条 attachment 在资源库 + 本地 attach 树下的解析结果。 +#[derive(Debug, Clone)] +pub struct ResolvedAttachment { + pub id: AttachmentId, + /// 从 `packed_info` 提取出的资源 MD5(小写 hex) + pub md5: String, + /// 命中的本地 .dat 路径(按 full > _h > _t 优先级选一个) + pub dat_path: PathBuf, + /// 文件 size(字节) + pub size: u64, +} + +/// 仅 schema lookup(不去找本地 .dat)。 +/// 用于 `wx attachments` 列表时填 `md5` 字段——文件可能根本不在本地。 +#[derive(Debug, Clone)] +pub struct AttachmentMetadata { + pub md5: String, +} + +/// 用 `(chat, local_id)` 查 message_resource.db 拿 file md5。 +/// +/// 调用方传已经解密好的 `message_resource.db` 路径(由 daemon 的 `DBCache` 准备)。 +/// 同步函数 — caller 在 `spawn_blocking` 里跑。 +pub fn lookup_md5_blocking( + resource_db_path: &Path, + chat: &str, + local_id: i64, + msg_local_type_lo32: i64, +) -> Result> { + let conn = Connection::open_with_flags( + resource_db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI, + ) + .with_context(|| format!("打开 message_resource.db {:?}", resource_db_path))?; + + // 1) ChatName2Id: user_name -> rowid + let chat_id: Option = conn + .query_row( + "SELECT rowid FROM ChatName2Id WHERE user_name = ?1", + [chat], + |row| row.get(0), + ) + .ok(); + let Some(chat_id) = chat_id else { + return Ok(None); + }; + + // 2) MessageResourceInfo: 同 chat 内 local_id 也会复用,按 create_time DESC 取最新 + // message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型 + let packed: Option> = conn + .query_row( + "SELECT packed_info FROM MessageResourceInfo + WHERE chat_id = ?1 + AND message_local_id = ?2 + AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3) + ORDER BY message_create_time DESC + LIMIT 1", + rusqlite::params![chat_id, local_id, msg_local_type_lo32], + |row| row.get(0), + ) + .ok(); + + let Some(blob) = packed else { + return Ok(None); + }; + Ok(extract_md5_from_packed_info(&blob).map(|md5| AttachmentMetadata { md5 })) +} + +/// 从 `MessageResourceInfo.packed_info` (protobuf) 提取 32 字节 ASCII hex md5。 +/// +/// 主路径:搜 4 字节 marker `12 22 0a 20`(field=2 LEN, length=34, sub field=1 LEN, length=32), +/// 紧跟 32 字节 ASCII hex。 +/// Fallback:扫整个 blob 找连续 32 字节合法 hex 字符。 +pub fn extract_md5_from_packed_info(blob: &[u8]) -> Option { + const MARKER: &[u8; 4] = &[0x12, 0x22, 0x0A, 0x20]; + + // 主路径 + if let Some(pos) = find_subslice(blob, MARKER) { + let start = pos + MARKER.len(); + if start + 32 <= blob.len() { + if let Ok(s) = std::str::from_utf8(&blob[start..start + 32]) { + if s.chars().all(|c| c.is_ascii_hexdigit()) { + return Some(s.to_ascii_lowercase()); + } + } + } + } + + // Fallback:连续 32 字节合法 hex + if blob.len() >= 32 { + for start in 0..=blob.len() - 32 { + let chunk = &blob[start..start + 32]; + if let Ok(s) = std::str::from_utf8(chunk) { + if s.chars().all(|c| c.is_ascii_hexdigit()) { + return Some(s.to_ascii_lowercase()); + } + } + } + } + None +} + +/// 简单的子串扫描(避免拉 memchr/memmem 依赖;blob 通常 < 1KB) +fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() || needle.len() > haystack.len() { + return None; + } + haystack + .windows(needle.len()) + .position(|w| w == needle) +} + +/// 在 `///Img/[_t|_h].dat` 下找文件。 +/// +/// 优先级:full > `_h`(HD thumbnail)> `_t`(thumbnail)。返回最优的一个; +/// 找不到返回 None。 +/// +/// `attach_root` = `/msg/attach`。 +/// `create_time` 用于先定位 `` 子目录;找不到时再 fallback 全月份扫描, +/// 因为 WeChat 的 `YYYY-MM` 目录有时跟消息时间差 1 个月(按收到时间归档)。 +pub fn find_dat_file( + attach_root: &Path, + chat: &str, + file_md5: &str, + create_time: i64, +) -> Option { + let chat_hash = format!("{:x}", md5::compute(chat.as_bytes())); + let chat_dir = attach_root.join(&chat_hash); + if !chat_dir.is_dir() { + return None; + } + + // 第一步:试 create_time 当月 + 前后各一个月(共 3 个候选目录) + let candidates_ym: Vec = three_month_candidates(create_time); + for ym in &candidates_ym { + let img_dir = chat_dir.join(ym).join("Img"); + if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + return Some(p); + } + } + + // 第二步 fallback:扫整个 chat_dir 的所有月份子目录 + let entries = std::fs::read_dir(&chat_dir).ok()?; + let mut all_months: Vec = entries + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.is_dir()) + .collect(); + // 已经试过的 3 个候选可以跳过,但成本极小;保留全量扫 + all_months.sort(); + for month_dir in all_months { + let img_dir = month_dir.join("Img"); + if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + return Some(p); + } + } + None +} + +fn pick_best_in_img_dir(img_dir: &Path, file_md5: &str) -> Option { + if !img_dir.is_dir() { + return None; + } + let full = img_dir.join(format!("{}.dat", file_md5)); + if full.is_file() { + return Some(full); + } + let hd = img_dir.join(format!("{}_h.dat", file_md5)); + if hd.is_file() { + return Some(hd); + } + let thumb = img_dir.join(format!("{}_t.dat", file_md5)); + if thumb.is_file() { + return Some(thumb); + } + None +} + +fn three_month_candidates(unix_ts: i64) -> Vec { + use chrono::{Datelike, Duration}; + let dt = match chrono::Local.timestamp_opt(unix_ts, 0).single() { + Some(d) => d, + None => return Vec::new(), + }; + let prev = dt - Duration::days(31); + let next = dt + Duration::days(31); + [prev, dt, next] + .iter() + .map(|d| format!("{:04}-{:02}", d.year(), d.month())) + .collect() +} + +/// 把 `` (即 `db_storage` 父目录)拼成 `/msg/attach`。 +pub fn attach_root_for(wxchat_base: &Path) -> PathBuf { + wxchat_base.join("msg").join("attach") +} + +/// 完整流程:用 `attachment_id` 拿 md5 + 找 .dat。失败返回带具体诊断信息的 `Err`。 +/// +/// `resource_db_path` 由 daemon 提供(DBCache 已经解密好); +/// `attach_root` 由 caller 拼好(`attach_root_for(wxchat_base)`)。 +/// 同步函数 — caller 在 `spawn_blocking` 里跑。 +pub fn resolve_blocking( + id: &AttachmentId, + resource_db_path: &Path, + attach_root: &Path, +) -> Result { + let lo32_type: i64 = match id.kind { + super::AttachmentKind::Image => 3, + super::AttachmentKind::Voice => 34, + super::AttachmentKind::Video => 43, + super::AttachmentKind::File => 49, + }; + + let meta = lookup_md5_blocking(resource_db_path, &id.chat, id.local_id, lo32_type)? + .ok_or_else(|| { + anyhow!( + "message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)", + id.chat, + id.local_id, + lo32_type + ) + })?; + + let dat_path = find_dat_file(attach_root, &id.chat, &meta.md5, id.create_time).ok_or_else( + || { + anyhow!( + "找不到本地 .dat(md5={} chat={} create_time={})— 微信可能尚未下载该附件,或附件已被清理", + meta.md5, + id.chat, + id.create_time + ) + }, + )?; + let size = std::fs::metadata(&dat_path).map(|m| m.len()).unwrap_or(0); + + Ok(ResolvedAttachment { id: id.clone(), md5: meta.md5, dat_path, size }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extract_md5_main_path() { + // 构造一段含 12 22 0a 20 marker 的 blob + let mut blob = vec![0xAA, 0xBB, 0xCC]; + blob.extend_from_slice(&[0x12, 0x22, 0x0A, 0x20]); + blob.extend_from_slice(b"deadbeefcafebabe1234567890abcdef"); + blob.extend_from_slice(&[0xFF, 0xFF]); + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("deadbeefcafebabe1234567890abcdef".to_string()) + ); + } + + #[test] + fn extract_md5_fallback_no_marker() { + // 没有 marker,但 blob 里有合法 32 字节 hex + let mut blob = vec![0xFF, 0x00]; + blob.extend_from_slice(b"00112233445566778899aabbccddeeff"); + blob.extend_from_slice(&[0x01]); + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("00112233445566778899aabbccddeeff".to_string()) + ); + } + + #[test] + fn extract_md5_uppercase_normalized_to_lower() { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"DEADBEEFCAFEBABE1234567890ABCDEF"); + // 上游/CI/本地 file md5 都是 lowercase;强制小写化避免大小写不一致导致命中失败 + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("deadbeefcafebabe1234567890abcdef".to_string()) + ); + } + + #[test] + fn extract_md5_returns_none_on_garbage() { + let blob = vec![0; 16]; + assert!(extract_md5_from_packed_info(&blob).is_none()); + } + + #[test] + fn three_month_candidates_includes_prev_curr_next() { + // 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09 + let ts = chrono::Local + .with_ymd_and_hms(2025, 8, 15, 12, 0, 0) + .unwrap() + .timestamp(); + let v = three_month_candidates(ts); + assert!(v.contains(&"2025-07".to_string())); + assert!(v.contains(&"2025-08".to_string())); + assert!(v.contains(&"2025-09".to_string())); + } + + #[test] + fn pick_best_prefers_full_then_h_then_t() { + let tmp = tempdir_for_test(); + let img = tmp.join("Img"); + std::fs::create_dir_all(&img).unwrap(); + let md5 = "abcd1234"; + std::fs::write(img.join(format!("{}_t.dat", md5)), b"thumb").unwrap(); + std::fs::write(img.join(format!("{}_h.dat", md5)), b"hd").unwrap(); + // 只有 _t / _h 时取 _h + assert_eq!( + pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + format!("{}_h.dat", md5).as_str() + ); + // 加 full 后取 full + std::fs::write(img.join(format!("{}.dat", md5)), b"full").unwrap(); + assert_eq!( + pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + format!("{}.dat", md5).as_str() + ); + } + + fn tempdir_for_test() -> PathBuf { + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let p = std::env::temp_dir().join(format!("wx-cli-attach-test-{}-{}", pid, nanos)); + std::fs::create_dir_all(&p).unwrap(); + p + } +} diff --git a/src/main.rs b/src/main.rs index 6c3f9a2..e6385fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod crypto; mod scanner; mod daemon; mod cli; +mod attachment; fn main() { if std::env::var("WX_DAEMON_MODE").is_ok() { From bf8d0d934af2afda963ed79909a11f8dde532934 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:34:38 +0800 Subject: [PATCH 14/20] feat(attachment): implement V2 image key providers --- src/attachment/image_key/linux.rs | 11 + src/attachment/image_key/macos.rs | 427 +++++++++++++++++++++++++++- src/attachment/image_key/mod.rs | 332 ++++++++++++++++++++- src/attachment/image_key/windows.rs | 244 +++++++++++++++- 4 files changed, 987 insertions(+), 27 deletions(-) create mode 100644 src/attachment/image_key/linux.rs diff --git a/src/attachment/image_key/linux.rs b/src/attachment/image_key/linux.rs new file mode 100644 index 0000000..4100ab2 --- /dev/null +++ b/src/attachment/image_key/linux.rs @@ -0,0 +1,11 @@ +use anyhow::{bail, Result}; + +use super::{ImageKeyMaterial, ImageKeyProvider}; + +pub struct LinuxImageKeyProvider; + +impl ImageKeyProvider for LinuxImageKeyProvider { + fn get_key(&self, _wxid: &str) -> Result { + bail!("Linux V2 图片 key 当前未实现;请先用 legacy/V1 图片或在 README 中标注 unsupported") + } +} diff --git a/src/attachment/image_key/macos.rs b/src/attachment/image_key/macos.rs index 234d4e5..127d81c 100644 --- a/src/attachment/image_key/macos.rs +++ b/src/attachment/image_key/macos.rs @@ -1,10 +1,423 @@ //! macOS V2 image AES key 提取。 //! -//! 主路径:从 `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/key__*.statistic` -//! 文件名拿 uin,然后 `md5(str(uin) + sanitize(wxid)).hex()[:16]` 派生 AES key。 +//! 主路径:从 `key__*.statistic` 文件名拿 uin,然后 +//! `md5(str(uin) + normalize(wxid)).hex()[:16]` 派生 AES key。 //! -//! Fallback:枚举 uin 候选 2^24 个(`uint32`,但 wxid 4-byte 前缀只看后 24 bit), -//! 通过 `md5(str(uin))[:4] == wxid 后 4 字节` 匹配。 -//! 上游 `find_image_key_macos.py` 实测 1-2s 完成。 -//! -//! ⚠️ codex 落实现。 +//! fallback:通过 `md5(str(uin))[:4] == wxid_suffix` + `uin & 0xff == xor_key` +//! 把搜索空间压到 2^24,再用 V2 模板反验 AES key。 + +use anyhow::{bail, Context, Result}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{mpsc, Arc, Mutex}; + +use crate::config; + +use super::{ + attach_root_for_db_dir, configured_db_dir_for_wxid, derive_xor_key_from_v2_dat, + find_v2_template_ciphertexts, join_components, normalize_wxid, verify_aes_key, wxid_from_db_dir, + ImageKeyMaterial, ImageKeyProvider, +}; + +pub struct MacosImageKeyProvider { + configured_db_dir: Result, + cache: Mutex>, +} + +impl MacosImageKeyProvider { + pub fn from_current_config() -> Self { + let configured_db_dir = config::load_config() + .map(|cfg| cfg.db_dir) + .map_err(|err| err.to_string()); + Self { + configured_db_dir, + cache: Mutex::new(HashMap::new()), + } + } +} + +impl ImageKeyProvider for MacosImageKeyProvider { + fn get_key(&self, wxid: &str) -> Result { + let cache_key = normalize_wxid(wxid); + if let Some(found) = self.cache.lock().unwrap().get(&cache_key).copied() { + return Ok(found); + } + + let configured_db_dir = self + .configured_db_dir + .as_ref() + .map_err(|err| anyhow::anyhow!("读取 config.db_dir 失败: {}", err))?; + let db_dir = configured_db_dir_for_wxid(configured_db_dir, wxid); + let attach_dir = attach_root_for_db_dir(&db_dir); + let key = derive_key_for_paths(&db_dir, &attach_dir)?; + self.cache.lock().unwrap().insert(cache_key, key); + Ok(key) + } +} + +fn derive_key_for_paths(db_dir: &Path, attach_dir: &Path) -> Result { + let templates = find_v2_template_ciphertexts(attach_dir, 3, 64)?; + if templates.is_empty() { + bail!("在 {} 下找不到 V2 模板文件", attach_dir.display()); + } + + if let Some(found) = find_via_kvcomm(db_dir, &templates)? { + return Ok(found); + } + + let (wxid_full, wxid_norm, suffix) = + extract_wxid_parts(db_dir).context("db_dir 不含可用于 fallback 的 wxid 4 位后缀")?; + let (xor_key, _votes, _total) = derive_xor_key_from_v2_dat(attach_dir, 10, 3)? + .context("V2 .dat 样本不足,无法投票反推 xor_key")?; + + for wxid in preferred_wxid_candidates(&wxid_full, &wxid_norm) { + if let Some(aes_key) = bruteforce_aes_key(xor_key, &suffix, wxid, &templates)? { + return Ok(ImageKeyMaterial { aes_key, xor_key }); + } + } + + bail!("macOS V2 图片 key 派生失败") +} + +fn find_via_kvcomm(db_dir: &Path, templates: &[[u8; 16]]) -> Result> { + let Some(kvcomm_dir) = find_existing_kvcomm_dir(db_dir) else { + return Ok(None); + }; + + let codes = collect_kvcomm_codes(&kvcomm_dir)?; + if codes.is_empty() { + return Ok(None); + } + let wxids = collect_wxid_candidates(db_dir); + if wxids.is_empty() { + return Ok(None); + } + + for wxid in wxids { + for code in &codes { + let candidate = derive_image_key_material(*code, &wxid); + if verify_aes_key(&candidate.aes_key, templates) { + return Ok(Some(candidate)); + } + } + } + Ok(None) +} + +fn derive_image_key_material(code: u32, wxid: &str) -> ImageKeyMaterial { + let xor_key = (code & 0xFF) as u8; + let digest = format!("{:x}", md5::compute(format!("{}{}", code, wxid))); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + ImageKeyMaterial { aes_key, xor_key } +} + +fn collect_wxid_candidates(db_dir: &Path) -> Vec { + let Some(raw) = wxid_from_db_dir(db_dir) else { + return Vec::new(); + }; + let mut out = vec![raw.clone()]; + let normalized = normalize_wxid(&raw); + if normalized != raw { + out.push(normalized); + } + out +} + +fn extract_wxid_parts(db_dir: &Path) -> Option<(String, String, String)> { + let raw = wxid_from_db_dir(db_dir)?; + let idx = raw.rfind('_')?; + let suffix = &raw[idx + 1..]; + if suffix.len() != 4 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) { + return None; + } + Some((raw.clone(), normalize_wxid(&raw), suffix.to_ascii_lowercase())) +} + +fn preferred_wxid_candidates<'a>(raw: &'a str, normalized: &'a str) -> Vec<&'a str> { + if raw == normalized { + vec![raw] + } else { + vec![normalized, raw] + } +} + +fn derive_kvcomm_dir_candidates(db_dir: &Path) -> Vec { + let parts: Vec = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()) + .collect(); + + let mut candidates = Vec::new(); + if let Some(idx) = parts.iter().position(|part| part == "xwechat_files") { + let documents_root = join_components(&parts[..idx]); + candidates.push(documents_root.join("app_data/net/kvcomm")); + candidates.push(documents_root.join("xwechat/net/kvcomm")); + if idx >= 1 { + let container_root = join_components(&parts[..idx - 1]); + candidates.push( + container_root + .join("Library/Application Support/com.tencent.xinWeChat/xwechat/net/kvcomm"), + ); + candidates.push( + container_root.join("Library/Application Support/com.tencent.xinWeChat/net/kvcomm"), + ); + } + } + if let Some(home) = dirs::home_dir() { + candidates.push( + home.join("Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm"), + ); + } + + let mut dedup = Vec::new(); + for candidate in candidates { + if !dedup.contains(&candidate) { + dedup.push(candidate); + } + } + dedup +} + +fn find_existing_kvcomm_dir(db_dir: &Path) -> Option { + derive_kvcomm_dir_candidates(db_dir) + .into_iter() + .find(|path| path.is_dir()) +} + +fn collect_kvcomm_codes(kvcomm_dir: &Path) -> Result> { + let mut codes = std::collections::BTreeSet::new(); + for entry in std::fs::read_dir(kvcomm_dir)? { + let entry = entry?; + let Some(name) = entry.file_name().to_str().map(|value| value.to_string()) else { + continue; + }; + let Some(rest) = name.strip_prefix("key_") else { + continue; + }; + let Some((code, _)) = rest.split_once('_') else { + continue; + }; + if let Ok(code) = code.parse::() { + codes.insert(code); + } + } + Ok(codes.into_iter().collect()) +} + +fn bruteforce_aes_key( + xor_key: u8, + suffix_hex: &str, + wxid: &str, + templates: &[[u8; 16]], +) -> Result> { + let suffix = hex_prefix_to_bytes(suffix_hex)?; + let workers = std::thread::available_parallelism() + .map(|count| count.get()) + .unwrap_or(1) + .max(1); + let total = 1u32 << 24; + let chunk = total / workers as u32; + let stop = Arc::new(AtomicBool::new(false)); + let (tx, rx) = mpsc::channel(); + let wxid = Arc::new(wxid.as_bytes().to_vec()); + let templates = Arc::new(templates.to_vec()); + + std::thread::scope(|scope| { + for idx in 0..workers { + let start = idx as u32 * chunk; + let end = if idx + 1 == workers { + total + } else { + (idx as u32 + 1) * chunk + }; + let stop = Arc::clone(&stop); + let tx = tx.clone(); + let wxid = Arc::clone(&wxid); + let templates = Arc::clone(&templates); + scope.spawn(move || { + for upper in start..end { + if stop.load(Ordering::Relaxed) { + break; + } + let uin = (upper << 8) | xor_key as u32; + let uin_ascii = uin.to_string(); + let digest = md5::compute(uin_ascii.as_bytes()); + if digest.0[0] != suffix[0] || digest.0[1] != suffix[1] { + continue; + } + + let mut input = Vec::with_capacity(uin_ascii.len() + wxid.len()); + input.extend_from_slice(uin_ascii.as_bytes()); + input.extend_from_slice(&wxid); + let aes_hex = format!("{:x}", md5::compute(input)); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&aes_hex.as_bytes()[..16]); + if verify_aes_key(&aes_key, &templates) { + stop.store(true, Ordering::Relaxed); + let _ = tx.send(aes_key); + break; + } + } + }); + } + }); + drop(tx); + Ok(rx.try_iter().next()) +} + +fn hex_prefix_to_bytes(hex: &str) -> Result<[u8; 2]> { + if hex.len() != 4 { + bail!("wxid suffix 不是 4 位 hex: {}", hex); + } + let hi = u8::from_str_radix(&hex[..2], 16)?; + let lo = u8::from_str_radix(&hex[2..], 16)?; + Ok([hi, lo]) +} + +#[cfg(test)] +mod tests { + use super::{derive_key_for_paths, find_existing_kvcomm_dir}; + use super::collect_wxid_candidates; + use crate::attachment::image_key::normalize_wxid; + use aes::cipher::{generic_array::GenericArray, BlockEncrypt, KeyInit}; + use aes::Aes128; + use std::fs; + use std::path::Path; + + fn temp_dir(label: &str) -> std::path::PathBuf { + let mut dir = std::env::temp_dir(); + dir.push(format!( + "wx-cli-image-key-macos-{}-{:?}", + label, + std::thread::current().id() + )); + let _ = fs::remove_dir_all(&dir); + fs::create_dir_all(&dir).unwrap(); + dir + } + + fn write_v2_template(path: &Path, aes_key: &[u8; 16], xor_key: u8, plaintext: &[u8; 16]) { + let cipher = Aes128::new(aes_key.into()); + let mut block = GenericArray::clone_from_slice(plaintext); + cipher.encrypt_block(&mut block); + + let mut data = Vec::new(); + data.extend_from_slice(&crate::attachment::decoder::V2_MAGIC); + data.extend_from_slice(&0u32.to_le_bytes()); + data.extend_from_slice(&0u32.to_le_bytes()); + data.push(0); + data.extend_from_slice(&block); + data.push(0); + data.push(0xD9 ^ xor_key); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(path, data).unwrap(); + } + + #[test] + fn normalize_wxid_matches_expected_shapes() { + assert_eq!(normalize_wxid("wxid_abc_def"), "wxid_abc"); + assert_eq!(normalize_wxid("your_wxid_a1b2"), "your_wxid"); + assert_eq!(normalize_wxid("plain"), "plain"); + } + + #[test] + fn kvcomm_path_detection_works() { + let dir = temp_dir("kvcomm"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + let kvcomm = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm", + ); + fs::create_dir_all(&db_dir).unwrap(); + fs::create_dir_all(&kvcomm).unwrap(); + assert_eq!(find_existing_kvcomm_dir(&db_dir), Some(kvcomm)); + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn derives_key_via_kvcomm() { + let dir = temp_dir("via-kvcomm"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + let attach = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/msg/attach/chat/2026-05/Img", + ); + let kvcomm = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm", + ); + fs::create_dir_all(&db_dir).unwrap(); + fs::create_dir_all(&kvcomm).unwrap(); + fs::write(kvcomm.join("key_42_x.statistic"), b"").unwrap(); + + let digest = format!("{:x}", md5::compute("42your_wxid")); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + write_v2_template( + &attach.join("sample_t.dat"), + &aes_key, + 42, + b"\xFF\xD8\xFFtemplate-001!", + ); + + let derived = derive_key_for_paths(&db_dir, db_dir.parent().unwrap().join("msg/attach").as_path()) + .unwrap(); + assert_eq!(derived.aes_key, aes_key); + assert_eq!(derived.xor_key, 42); + + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn derives_key_via_bruteforce_fallback() { + let dir = temp_dir("via-fallback"); + let suffix = format!("{:x}", md5::compute("42")) + .chars() + .take(4) + .collect::(); + let raw_wxid = format!("mywxid_{}", suffix); + let db_dir = dir.join(format!( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/{}/db_storage", + raw_wxid + )); + let attach = dir.join(format!( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/{}/msg/attach/chat/2026-05/Img", + raw_wxid + )); + fs::create_dir_all(&db_dir).unwrap(); + + let digest = format!("{:x}", md5::compute("42mywxid")); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + for idx in 0..3 { + write_v2_template( + &attach.join(format!("sample{}_t.dat", idx)), + &aes_key, + 42, + b"\xFF\xD8\xFFtemplate-001!", + ); + } + + let derived = derive_key_for_paths(&db_dir, db_dir.parent().unwrap().join("msg/attach").as_path()) + .unwrap(); + assert_eq!(derived.aes_key, aes_key); + assert_eq!(derived.xor_key, 42); + + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn collects_raw_and_normalized_wxid() { + let dir = temp_dir("wxid"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + fs::create_dir_all(&db_dir).unwrap(); + let wxids = collect_wxid_candidates(&db_dir); + assert_eq!(wxids, vec!["your_wxid_a1b2".to_string(), "your_wxid".to_string()]); + let _ = fs::remove_dir_all(dir); + } +} diff --git a/src/attachment/image_key/mod.rs b/src/attachment/image_key/mod.rs index ec4f8ad..74eee30 100644 --- a/src/attachment/image_key/mod.rs +++ b/src/attachment/image_key/mod.rs @@ -1,7 +1,5 @@ //! V2 image AES key 提取 — 平台相关。 //! -//! ⚠️ 此模块由 codex 落地。本文件只放公共 trait + 平台 dispatch 占位。 -//! //! 路径: //! - macOS:磁盘派生(`key__*.statistic` 文件名拿 uin → `md5(str(uin) + wxid)[:16]`) //! + brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24) @@ -9,26 +7,336 @@ //! 反验(`find_image_key.py` / `find_image_key.c` 已写实) //! - Linux:上游空白;当前不实现,遇到 V2 .dat 返回 unsupported 错误 -#[allow(dead_code)] +#[cfg(target_os = "linux")] +pub mod linux; +#[cfg(target_os = "macos")] pub mod macos; -#[allow(dead_code)] +#[cfg(target_os = "windows")] pub mod windows; use anyhow::Result; +use regex::bytes::Regex; +use std::collections::HashSet; +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::OnceLock; + +use crate::attachment::decoder::{detect_image_format, V2_MAGIC}; + +/// V2 图片真正需要的是两份材料: +/// - 16 字节 ASCII AES key +/// - XOR key(macOS 上来自 uin & 0xff,不是总能硬编码成 0x88) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ImageKeyMaterial { + pub aes_key: [u8; 16], + pub xor_key: u8, +} /// 单个 wxid 的 V2 image key 提取接口。 /// -/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时是稳定的)。 +/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时通常稳定)。 pub trait ImageKeyProvider { - /// 返回当前 wxid 的 16 字节 AES key。失败要带可执行的诊断(例如「macOS 没找到 - /// kvcomm cache,请确认微信已登录」/「Windows 进程不在跑」)。 - fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]>; + fn get_key(&self, wxid: &str) -> Result; + + fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]> { + Ok(self.get_key(wxid)?.aes_key) + } + + fn get_xor_key(&self, wxid: &str) -> Result { + Ok(self.get_key(wxid)?.xor_key) + } } -/// 平台默认实现(codex 后续填)。 -/// -/// 调用方目前可以直接传 `None`,让 resolver 在遇到 V2 .dat 时报「image key 未提取」错。 +/// 平台默认实现。 pub fn default_provider() -> Option> { - // TODO(codex): 按 cfg(target_os) 返回 macOS / Windows / 不支持 + #[cfg(target_os = "macos")] + { + return Some(Box::new(macos::MacosImageKeyProvider::from_current_config())); + } + #[cfg(target_os = "windows")] + { + return Some(Box::new(windows::WindowsImageKeyProvider::from_current_config())); + } + #[cfg(target_os = "linux")] + { + return Some(Box::new(linux::LinuxImageKeyProvider)); + } + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] + { + None + } +} + +pub(crate) fn configured_db_dir_for_wxid(configured_db_dir: &Path, requested_wxid: &str) -> PathBuf { + if requested_wxid.trim().is_empty() { + return configured_db_dir.to_path_buf(); + } + + let configured_leaf = wxid_from_db_dir(configured_db_dir); + if let Some(leaf) = configured_leaf.as_deref() { + if same_wxid(leaf, requested_wxid) { + return configured_db_dir.to_path_buf(); + } + } + + xwechat_files_root(configured_db_dir) + .map(|root| root.join(requested_wxid).join("db_storage")) + .unwrap_or_else(|| configured_db_dir.to_path_buf()) +} + +pub(crate) fn wxid_from_db_dir(db_dir: &Path) -> Option { + let mut components = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()); + while let Some(component) = components.next() { + if component == "xwechat_files" { + return components.next(); + } + } None } + +pub(crate) fn xwechat_files_root(db_dir: &Path) -> Option { + let parts: Vec<_> = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()) + .collect(); + let idx = parts.iter().position(|part| part == "xwechat_files")?; + Some(join_components(&parts[..=idx])) +} + +pub(crate) fn normalize_wxid(raw: &str) -> String { + let raw = raw.trim(); + if raw.is_empty() { + return String::new(); + } + if let Some(stripped) = raw.strip_prefix("wxid_") { + let head = stripped.split('_').next().unwrap_or(stripped); + return format!("wxid_{}", head); + } + if let Some((base, suffix)) = raw.rsplit_once('_') { + if suffix.len() == 4 && suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) { + return base.to_string(); + } + } + raw.to_string() +} + +pub(crate) fn same_wxid(a: &str, b: &str) -> bool { + a == b || normalize_wxid(a) == normalize_wxid(b) +} + +pub(crate) fn join_components(parts: &[String]) -> PathBuf { + let mut out = if parts.first().map(|part| part.is_empty()).unwrap_or(false) { + PathBuf::from("/") + } else { + PathBuf::new() + }; + for part in parts { + if part.is_empty() { + continue; + } + out.push(part); + } + out +} + +pub(crate) fn attach_root_for_db_dir(db_dir: &Path) -> PathBuf { + db_dir + .parent() + .map(|base| base.join("msg").join("attach")) + .unwrap_or_else(|| PathBuf::from("msg/attach")) +} + +pub(crate) fn find_v2_template_ciphertexts( + attach_dir: &Path, + max_templates: usize, + max_files: usize, +) -> Result> { + if !attach_dir.is_dir() { + return Ok(Vec::new()); + } + + let mut out = collect_templates_with_suffix(attach_dir, "_t.dat", max_templates, max_files)?; + if out.is_empty() { + out = collect_templates_with_suffix(attach_dir, ".dat", max_templates, max_files)?; + } + Ok(out) +} + +pub(crate) fn derive_xor_key_from_v2_dat( + attach_dir: &Path, + sample: usize, + min_samples: usize, +) -> Result> { + if !attach_dir.is_dir() { + return Ok(None); + } + let mut votes = Vec::new(); + visit_files(attach_dir, &mut |path| -> Result { + let Some(name) = path.file_name().and_then(|value| value.to_str()) else { + return Ok(false); + }; + if !name.ends_with(".dat") { + return Ok(false); + } + + let meta = fs::metadata(path)?; + if meta.len() < 0x20 { + return Ok(false); + } + + let bytes = fs::read(path)?; + if bytes.starts_with(&V2_MAGIC) { + let last = *bytes.last().unwrap(); + votes.push(last ^ 0xD9); + if votes.len() >= sample { + return Ok(true); + } + } + Ok(false) + })?; + + if votes.len() < min_samples { + return Ok(None); + } + + let mut counts = [0usize; 256]; + for vote in &votes { + counts[*vote as usize] += 1; + } + let (xor_key, top_votes) = counts + .iter() + .enumerate() + .max_by_key(|(_, count)| *count) + .map(|(idx, count)| (idx as u8, *count)) + .expect("votes 非空"); + Ok(Some((xor_key, top_votes, votes.len()))) +} + +pub(crate) fn verify_aes_key(aes_key: &[u8; 16], templates: &[[u8; 16]]) -> bool { + !templates.is_empty() + && templates + .iter() + .all(|template| decrypt_template_block(aes_key, template).is_some()) +} + +pub(crate) fn ascii_alnum_candidates<'a>(buf: &'a [u8], len: usize) -> Vec<&'a [u8]> { + let re = match len { + 16 => regex16(), + 32 => regex32(), + _ => return Vec::new(), + }; + + re.find_iter(buf) + .filter_map(|matched| { + let start = matched.start(); + let end = matched.end(); + let left_ok = start == 0 || !buf[start - 1].is_ascii_alphanumeric(); + let right_ok = end == buf.len() || !buf[end].is_ascii_alphanumeric(); + (left_ok && right_ok).then_some(&buf[start..end]) + }) + .collect() +} + +fn collect_templates_with_suffix( + dir: &Path, + suffix: &str, + max_templates: usize, + max_files: usize, +) -> Result> { + let mut out = Vec::new(); + let mut seen = HashSet::new(); + let mut examined = 0usize; + visit_files(dir, &mut |path| -> Result { + let Some(name) = path.file_name().and_then(|value| value.to_str()) else { + return Ok(false); + }; + if !name.ends_with(suffix) { + return Ok(false); + } + examined += 1; + let bytes = fs::read(path)?; + if bytes.len() >= 0x1F && bytes.starts_with(&V2_MAGIC) { + let template: [u8; 16] = bytes[0x0F..0x1F].try_into().unwrap(); + if seen.insert(template) { + out.push(template); + if out.len() >= max_templates { + return Ok(true); + } + } + } + Ok(examined >= max_files && !out.is_empty()) + })?; + Ok(out) +} + +fn visit_files(dir: &Path, f: &mut F) -> Result +where + F: FnMut(&Path) -> Result, +{ + let mut entries: Vec = fs::read_dir(dir)? + .flatten() + .map(|entry| entry.path()) + .collect(); + entries.sort(); + + for path in entries { + if path.is_dir() { + if visit_files(&path, f)? { + return Ok(true); + } + continue; + } + if f(&path)? { + return Ok(true); + } + } + Ok(false) +} + +fn decrypt_template_block(aes_key: &[u8; 16], ciphertext: &[u8; 16]) -> Option<&'static str> { + use aes::cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit}; + + let cipher = aes::Aes128::new(aes_key.into()); + let mut block = GenericArray::clone_from_slice(ciphertext); + cipher.decrypt_block(&mut block); + let block: [u8; 16] = block.as_slice().try_into().ok()?; + let format = detect_image_format(&block); + (format != "bin").then_some(format) +} + +fn regex16() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"[A-Za-z0-9]{16}").unwrap()) +} + +fn regex32() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"[A-Za-z0-9]{32}").unwrap()) +} + +#[cfg(test)] +mod tests { + use super::{ascii_alnum_candidates, normalize_wxid, same_wxid}; + + #[test] + fn regex_candidates_respect_boundaries() { + let buf = b"xx 0123456789ABCDef yy"; + let hits = ascii_alnum_candidates(buf, 16); + assert_eq!(hits, vec![&buf[3..19]]); + } + + #[test] + fn regex_candidates_ignore_embedded_runs() { + let buf = b"x0123456789ABCDefz"; + assert!(ascii_alnum_candidates(buf, 16).is_empty()); + } + + #[test] + fn wxid_normalization_matches_expected_forms() { + assert_eq!(normalize_wxid("wxid_abc_def"), "wxid_abc"); + assert_eq!(normalize_wxid("your_wxid_a1b2"), "your_wxid"); + assert!(same_wxid("your_wxid_a1b2", "your_wxid")); + } +} diff --git a/src/attachment/image_key/windows.rs b/src/attachment/image_key/windows.rs index 1a0080a..0b7acd8 100644 --- a/src/attachment/image_key/windows.rs +++ b/src/attachment/image_key/windows.rs @@ -1,10 +1,238 @@ //! Windows V2 image AES key 提取。 //! -//! 扫 `Weixin.exe` 进程内存,匹配模式 `(?, + cache: Mutex>, +} + +impl WindowsImageKeyProvider { + pub fn from_current_config() -> Self { + let configured_db_dir = config::load_config() + .map(|cfg| cfg.db_dir) + .map_err(|err| err.to_string()); + Self { + configured_db_dir, + cache: Mutex::new(HashMap::new()), + } + } +} + +impl ImageKeyProvider for WindowsImageKeyProvider { + fn get_key(&self, wxid: &str) -> Result { + let cache_key = wxid.trim().to_string(); + if let Some(found) = self.cache.lock().unwrap().get(&cache_key).copied() { + return Ok(found); + } + + let configured_db_dir = self + .configured_db_dir + .as_ref() + .map_err(|err| anyhow::anyhow!("读取 config.db_dir 失败: {}", err))?; + let db_dir = configured_db_dir_for_wxid(configured_db_dir, wxid); + let attach_dir = attach_root_for_db_dir(&db_dir); + let key = derive_key_for_paths(&attach_dir)?; + self.cache.lock().unwrap().insert(cache_key, key); + Ok(key) + } +} + +fn derive_key_for_paths(attach_dir: &std::path::Path) -> Result { + let templates = find_v2_template_ciphertexts(attach_dir, 3, 64)?; + if templates.is_empty() { + bail!("在 {} 下找不到 V2 模板文件", attach_dir.display()); + } + let xor_key = derive_xor_key_from_v2_dat(attach_dir, 10, 3)? + .map(|(key, _, _)| key) + .unwrap_or(0x88); + + let pid = find_wechat_pid().context("找不到 Weixin.exe 进程,请确认微信正在运行")?; + let process = unsafe { + OpenProcess(PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, false, pid) + .context("OpenProcess 失败,请以管理员权限运行")? + }; + + let aes_key = scan_memory_for_key(process, &templates); + unsafe { + let _ = CloseHandle(process); + } + + Ok(ImageKeyMaterial { + aes_key: aes_key?, + xor_key, + }) +} + +fn find_wechat_pid() -> Option { + let snapshot = unsafe { CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0).ok()? }; + let mut entry = PROCESSENTRY32 { + dwSize: std::mem::size_of::() as u32, + ..Default::default() + }; + + unsafe { + if Process32First(snapshot, &mut entry).is_err() { + let _ = CloseHandle(snapshot); + return None; + } + loop { + let name = + std::ffi::CStr::from_ptr(entry.szExeFile.as_ptr() as *const i8).to_string_lossy(); + if name.eq_ignore_ascii_case("Weixin.exe") { + let pid = entry.th32ProcessID; + let _ = CloseHandle(snapshot); + return Some(pid); + } + if Process32Next(snapshot, &mut entry).is_err() { + break; + } + } + let _ = CloseHandle(snapshot); + } + None +} + +fn scan_memory_for_key(process: HANDLE, templates: &[[u8; 16]]) -> Result<[u8; 16]> { + let mut seen = HashSet::<[u8; 16]>::new(); + let mut address = 0usize; + + loop { + let mut mbi = MEMORY_BASIC_INFORMATION::default(); + let ret = unsafe { + VirtualQueryEx( + process, + Some(address as *const _), + &mut mbi, + std::mem::size_of::(), + ) + }; + if ret == 0 { + break; + } + + let base = mbi.BaseAddress as usize; + let size = mbi.RegionSize; + if mbi.State == MEM_COMMIT && is_candidate_page(mbi.Protect.0) && size <= MAX_REGION_SIZE { + if let Some(aes_key) = scan_region(process, base, size, templates, &mut seen)? { + return Ok(aes_key); + } + } + + address = base.saturating_add(size); + if address == 0 { + break; + } + } + + bail!("Windows 进程内存里没有找到可验证的 V2 AES key") +} + +fn scan_region( + process: HANDLE, + base: usize, + size: usize, + templates: &[[u8; 16]], + seen: &mut HashSet<[u8; 16]>, +) -> Result> { + let overlap = 31usize; + let mut offset = 0usize; + + while offset < size { + let chunk_size = std::cmp::min(CHUNK_SIZE, size - offset); + let addr = base + offset; + let mut buf = vec![0u8; chunk_size]; + let mut bytes_read = 0usize; + + let ok = unsafe { + ReadProcessMemory( + process, + addr as *const _, + buf.as_mut_ptr() as *mut _, + chunk_size, + Some(&mut bytes_read), + ) + .is_ok() + }; + + if ok && bytes_read > 0 { + buf.truncate(bytes_read); + if let Some(key) = scan_candidate_buffer(&buf, templates, seen) { + return Ok(Some(key)); + } + } + + offset += if chunk_size > overlap { + chunk_size - overlap + } else { + chunk_size + }; + } + + Ok(None) +} + +fn scan_candidate_buffer( + buf: &[u8], + templates: &[[u8; 16]], + seen: &mut HashSet<[u8; 16]>, +) -> Option<[u8; 16]> { + for candidate in ascii_alnum_candidates(buf, 32) { + let mut key = [0u8; 16]; + key.copy_from_slice(&candidate[..16]); + if seen.insert(key) && verify_aes_key(&key, templates) { + return Some(key); + } + } + for candidate in ascii_alnum_candidates(buf, 16) { + let mut key = [0u8; 16]; + key.copy_from_slice(candidate); + if seen.insert(key) && verify_aes_key(&key, templates) { + return Some(key); + } + } + None +} + +fn is_candidate_page(protect: u32) -> bool { + if protect == PAGE_NOACCESS.0 || (protect & PAGE_GUARD.0) != 0 { + return false; + } + let base = protect & !(PAGE_GUARD.0 | PAGE_NOCACHE.0 | PAGE_WRITECOMBINE.0); + matches!( + base, + value if value == PAGE_READWRITE.0 + || value == PAGE_WRITECOPY.0 + || value == PAGE_EXECUTE_READWRITE.0 + || value == PAGE_EXECUTE_WRITECOPY.0 + ) +} From 2d88c9542dcba52c21405416c57b97ab47febe8b Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:40:57 +0800 Subject: [PATCH 15/20] feat(attachment): wire wx attachments / wx extract end-to-end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 把 V1 (legacy XOR + V1 fixed-AES) + 平台相关 V2 (macOS / Windows) image 解 密能力一路接到 CLI: - ipc: 新增 Attachments / Extract 两个 Request variant - daemon/server: dispatch 路由到 query::q_attachments / q_extract - daemon/cache: DbCache::db_dir() 公开,让 resolver 推 wxchat_base - daemon/query: q_attachments 走 Msg_ 表按 (local_type & 0xFFFFFFFF) IN (...) 过滤、按 ts DESC 全局排序后分页,返回不透明 attachment_id; q_extract 解码 attachment_id → 查 message_resource.db → 找本地 .dat → 按 magic 分发 v1/v2 解码 → 写盘。bridge 用 ImageKeyMaterial.{aes_key, xor_key}(codex 实测真实账号 xor_key=0xa2,不能硬编码 0x88) - cli: 新增 wx attachments / wx extract 两个子命令,flag 风格与现有 history / biz-articles 对齐 - README + SKILL: 加附件提取章节,含三档解码档位与 V2 image key 派生说明 --- README.md | 29 ++++ SKILL.md | 28 ++++ src/cli/attachments.rs | 42 ++++++ src/cli/extract.rs | 25 ++++ src/cli/mod.rs | 46 ++++++ src/daemon/cache.rs | 6 + src/daemon/query.rs | 312 +++++++++++++++++++++++++++++++++++++++++ src/daemon/server.rs | 12 ++ src/ipc.rs | 26 ++++ 9 files changed, 526 insertions(+) create mode 100644 src/cli/attachments.rs create mode 100644 src/cli/extract.rs diff --git a/README.md b/README.md index 8a8e23b..35589cd 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,35 @@ wx biz-articles --json | jq '.[].url' # 下游消费 URL 每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 +### 附件提取(图片 / 视频 / 文件 / 语音) + +聊天里的附件本体存在 `xwechat_files//msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。 + +```bash +# 1) 列出会话里的附件,先拿到不透明的 attachment_id(默认 image,可多选) +wx attachments "张三" +wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 + +# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等) +wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/x.jpg --overwrite +``` + +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。 + +`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 + +支持的解码档位: +- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) +- **V1 fixed-AES**(`07 08 V1 08 07`):AES-128-ECB + 固定 key `cfcd208495d565ef` +- **V2 AES + XOR**(`07 08 V2 08 07`):AES-128-ECB + raw + XOR;AES key 平台派生 + +V2 image key 提取: +- **macOS**:`kvcomm` cache(`key__*.statistic` 文件名取 uin → `md5(str(uin) + wxid)[:16]`)+ brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24);xor_key = `uin & 0xff`,**不是硬编码 0x88** +- **Windows**:扫 `Weixin.exe` 内存匹配 `[A-Za-z0-9]{32|16}` 候选,按 V2 template ciphertext-block 反验 +- **Linux**:上游空白,遇到 V2 .dat 会报 unsupported + ### 联系人 & 群组 ```bash diff --git a/SKILL.md b/SKILL.md index fe7418c..ddf02e1 100644 --- a/SKILL.md +++ b/SKILL.md @@ -242,6 +242,34 @@ wx biz-articles --since 2026-05-10 --json | jq '.[].url' 每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 +### 附件提取(图片 / 视频 / 文件 / 语音) + +聊天里的图片/视频/文件本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: + +```bash +# 1) 先列出附件,拿到不透明的 attachment_id(默认 image,可多选) +wx attachments "张三" +wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 + +# 2) 用 attachment_id 把单个资源解密写到指定路径 +wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/x.jpg --overwrite +``` + +`attachments` 输出每条带:`attachment_id` / `kind`(image/voice/video/file)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。 + +`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 + +支持的解码档位: +- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) +- **V1 fixed-AES**(`07 08 V1 08 07`):AES-128-ECB + 固定 key `cfcd208495d565ef` +- **V2 AES + XOR**(`07 08 V2 08 07`):AES-128-ECB + raw + XOR;AES key 平台派生 + +V2 image key 提取(macOS / Windows 自动;Linux 暂不支持): +- macOS:`kvcomm` cache(`key__*.statistic` 文件名取 uin → `md5(str(uin) + wxid)[:16]`)+ brute-force fallback;`xor_key = uin & 0xff` +- Windows:扫 `Weixin.exe` 内存匹配 `[A-Za-z0-9]{32|16}` 候选,按 V2 template ciphertext-block 反验 + ### 收藏与统计 ```bash diff --git a/src/cli/attachments.rs b/src/cli/attachments.rs new file mode 100644 index 0000000..662c256 --- /dev/null +++ b/src/cli/attachments.rs @@ -0,0 +1,42 @@ +use anyhow::Result; + +use crate::ipc::Request; +use super::history::{parse_time, parse_time_end}; +use super::output::{print_value, resolve}; +use super::transport; + +/// `wx attachments` — 列出指定会话的附件消息(默认 image,可多选)。 +/// +/// 输出每条 `attachment_id`,再传给 `wx extract` 才真正读 message_resource.db +/// 与本地 .dat 解码。这一步只查 `Msg_` 表,几千条群聊也能秒返。 +pub fn cmd_attachments( + chat: String, + kinds: Vec, + limit: usize, + offset: usize, + since: Option, + until: Option, + json: bool, +) -> Result<()> { + let since_ts = since.as_deref().map(parse_time).transpose()?; + let until_ts = until.as_deref().map(parse_time_end).transpose()?; + + // CLI 收上来的 Vec 为空时按默认(image)走,让 daemon 决定 fallback。 + let kinds_param = if kinds.is_empty() { None } else { Some(kinds) }; + + let req = Request::Attachments { + chat, + kinds: kinds_param, + limit, + offset, + since: since_ts, + until: until_ts, + }; + let resp = transport::send(req)?; + let data = resp + .data + .get("attachments") + .cloned() + .unwrap_or(serde_json::Value::Array(vec![])); + print_value(&data, &resolve(json)) +} diff --git a/src/cli/extract.rs b/src/cli/extract.rs new file mode 100644 index 0000000..a0eba0d --- /dev/null +++ b/src/cli/extract.rs @@ -0,0 +1,25 @@ +use anyhow::Result; + +use crate::ipc::Request; +use super::output::{print_value, resolve}; +use super::transport; + +/// `wx extract` — 把单个 `attachment_id` 对应的资源解密写到指定路径。 +/// +/// daemon 端:解析 `attachment_id` → 查 `message_resource.db` 拿 file md5 → +/// 在 `/msg/attach/...` 找 .dat → 按 magic 分发到 v1/v2 解码器 → +/// 写出真实图片/文件。 +pub fn cmd_extract( + attachment_id: String, + output: String, + overwrite: bool, + json: bool, +) -> Result<()> { + let req = Request::Extract { + attachment_id, + output, + overwrite, + }; + let resp = transport::send(req)?; + print_value(&resp.data, &resolve(json)) +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index b9e71fd..5fe4e8c 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,5 +1,7 @@ mod init; +pub mod attachments; pub mod biz_articles; +pub mod extract; pub mod sessions; pub mod history; pub mod search; @@ -262,6 +264,44 @@ enum Commands { #[arg(long)] json: bool, }, + /// 列出某会话的附件(图片 / 视频 / 文件 / 语音),返回不透明 attachment_id + Attachments { + /// 会话名称(联系人显示名 / wxid / @chatroom username 都可以) + chat: String, + /// 类型(多选,默认 image)。可选:image / voice / video / file + #[arg(long = "kind", value_name = "KIND", + value_parser = ["image", "voice", "video", "file", "audio", "img"])] + kinds: Vec, + /// 显示数量 + #[arg(short = 'n', long, default_value = "50")] + limit: usize, + /// 分页偏移 + #[arg(long, default_value = "0")] + offset: usize, + /// 起始时间 YYYY-MM-DD + #[arg(long)] + since: Option, + /// 结束时间 YYYY-MM-DD + #[arg(long)] + until: Option, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, + /// 把单个 attachment_id 对应的资源解密写到指定文件路径 + Extract { + /// 由 `wx attachments` 输出的不透明 ID(base64url 字符串) + attachment_id: String, + /// 输出文件路径(绝对或相对当前工作目录均可;扩展名建议保留为 .jpg 等) + #[arg(short = 'o', long)] + output: String, + /// 目标已存在时覆盖 + #[arg(long)] + overwrite: bool, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, /// 管理 wx-daemon Daemon { #[command(subcommand)] @@ -329,6 +369,12 @@ fn dispatch(cli: Cli) -> Result<()> { Commands::BizArticles { limit, account, since, until, unread, json } => { biz_articles::cmd_biz_articles(limit, account, since, until, unread, json) } + Commands::Attachments { chat, kinds, limit, offset, since, until, json } => { + attachments::cmd_attachments(chat, kinds, limit, offset, since, until, json) + } + Commands::Extract { attachment_id, output, overwrite, json } => { + extract::cmd_extract(attachment_id, output, overwrite, json) + } Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd), } } diff --git a/src/daemon/cache.rs b/src/daemon/cache.rs index 9801396..56e307c 100644 --- a/src/daemon/cache.rs +++ b/src/daemon/cache.rs @@ -54,6 +54,12 @@ impl DbCache { Ok(cache) } + /// 数据库根目录(即 `/db_storage`)。 + /// 上层(attachment resolver)需要 `db_dir.parent()` 来定位 `msg/attach/...` 解密图片。 + pub fn db_dir(&self) -> &Path { + &self.db_dir + } + fn cache_file_path(&self, rel_key: &str) -> PathBuf { let hash = format!("{:x}", md5::compute(rel_key.as_bytes())); self.cache_dir.join(format!("{}.db", hash)) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 167d88a..5a5d1b9 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -3285,6 +3285,318 @@ pub async fn q_biz_articles( Ok(json!({ "count": results.len(), "articles": results })) } +// ─── 附件(图片 / 视频 / 文件 / 语音)查询与提取 ───────────────────────────────── +// +// 设计要点: +// - `q_attachments` 只走 `Msg_` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤 +// 出附件消息行,再编出 `attachment_id`。**不**去翻 `message_resource.db`,因为列出动作 +// 要可枚举几千条;resource lookup 留到 `q_extract` 才做。 +// - `q_extract` 走完整链:`AttachmentId` → `message_resource.db` 查 md5 → +// `/msg/attach/...` 找 .dat → 按 magic 分发到 v1/v2 decoder → 写盘。 +// - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。 +// 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。 + +/// 列出某会话内的附件消息(默认 image,可多选)。返回每条的 `attachment_id`, +/// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。 +pub async fn q_attachments( + db: &DbCache, + names: &Names, + chat: &str, + kinds: Option>, + limit: usize, + offset: usize, + since: Option, + until: Option, +) -> Result { + use crate::attachment::{AttachmentId, AttachmentKind}; + + let username = resolve_username(chat, names) + .with_context(|| format!("找不到联系人: {}", chat))?; + let display = names.display(&username); + let chat_type = chat_type_of(&username, names); + let is_group = chat_type == "group"; + + // 解析 kinds → 低 32 bit local_type 集合 + let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?; + if kind_filters.is_empty() { + anyhow::bail!("kinds 为空 — 至少传一种 image/video/file/voice"); + } + let lo32_types: Vec = kind_filters.iter().map(|(_, t)| *t).collect(); + // local_type → AttachmentKind 反查(mask 完后定 kind) + let type_to_kind: HashMap = kind_filters.iter() + .map(|(k, t)| (*t, *k)) + .collect(); + + let tables = find_msg_tables(db, names, &username).await?; + if tables.is_empty() { + anyhow::bail!("找不到 {} 的消息记录", display); + } + + // 群聊需要 sender 显示名 + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; + + let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new(); + // 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx) + for (db_idx, (db_path, table_name)) in tables.iter().enumerate() { + let path = db_path.clone(); + let tname = table_name.clone(); + let uname = username.clone(); + let is_group2 = is_group; + let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); + let lo32_types2 = lo32_types.clone(); + let since2 = since; + let until2 = until; + // per-DB 软上限避免巨群全量加载 + let per_db_cap = (offset + limit).max(limit) * 2; + let db_idx2 = db_idx as i64; + + let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&path)?; + let id2u = load_id2u(&conn); + + // local_type 在 DB 里可能带高位 flag,过滤要 mask 低 32 bit + let placeholders = lo32_types2.iter().map(|_| "?").collect::>().join(","); + let mut clauses: Vec = vec![ + format!("(local_type & 4294967295) IN ({})", placeholders), + ]; + let mut params: Vec> = lo32_types2.iter() + .map(|t| Box::new(*t) as Box) + .collect(); + if let Some(s) = since2 { + clauses.push("create_time >= ?".into()); + params.push(Box::new(s)); + } + if let Some(u) = until2 { + clauses.push("create_time <= ?".into()); + params.push(Box::new(u)); + } + let where_clause = format!("WHERE {}", clauses.join(" AND ")); + + let sql = format!( + "SELECT local_id, local_type, create_time, real_sender_id, + message_content, WCDB_CT_message_content + FROM [{}] {} ORDER BY create_time DESC LIMIT ?", + tname, where_clause + ); + params.push(Box::new(per_db_cap as i64)); + + let params_ref: Vec<&dyn rusqlite::types::ToSql> = + params.iter().map(|p| p.as_ref()).collect(); + let mut stmt = conn.prepare(&sql)?; + let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = stmt + .query_map(params_ref.as_slice(), |row| { + let local_id: i64 = row.get(0)?; + let raw_type: i64 = row.get(1)?; + let lo32 = (raw_type as u64 & 0xFFFFFFFF) as i64; + let ts: i64 = row.get(2)?; + let real_sender_id: i64 = row.get(3)?; + let content_bytes = get_content_bytes(row, 4); + let ct: i64 = row.get::<_, i64>(5).unwrap_or(0); + let content = decompress_message(&content_bytes, ct); + let sender = if is_group2 { + sender_label(real_sender_id, &content, true, &uname, + &id2u, &names_map, &group_nicknames2) + } else { + String::new() + }; + Ok((local_id, lo32, ts, real_sender_id, sender, ts, db_idx2)) + })? + .filter_map(|r| r.ok()) + .collect(); + Ok::<_, anyhow::Error>(rows) + }) + .await??; + all_rows.extend(rows); + } + + // 全局按 ts DESC 排序后分页 + all_rows.sort_by_key(|r| std::cmp::Reverse(r.5)); + let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect(); + + // 翻成 JSON + let mut results: Vec = Vec::with_capacity(paged.len()); + for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged { + let kind = type_to_kind.get(&lo32).copied() + .unwrap_or(AttachmentKind::Image); // 理论不会 fallthrough + let id = AttachmentId { + v: 1, + chat: username.clone(), + local_id, + create_time: ts, + kind, + db: None, + }; + let id_str = id.encode()?; + + let mut row = json!({ + "attachment_id": id_str, + "kind": kind.as_str(), + "type": fmt_type(lo32), + "local_id": local_id, + "timestamp": ts, + "time": fmt_time(ts, "%Y-%m-%d %H:%M"), + }); + if is_group && !sender.is_empty() { + row["sender"] = Value::String(sender); + } + results.push(row); + } + + Ok(json!({ + "chat": display, + "username": username, + "is_group": is_group, + "chat_type": chat_type, + "count": results.len(), + "attachments": results, + })) +} + +/// 解码 attachment_id → 查 message_resource.db → 找本地 .dat → 解密 → 写盘。 +pub async fn q_extract( + db: &DbCache, + _names: &Names, + attachment_id: &str, + output: &str, + overwrite: bool, +) -> Result { + use crate::attachment::{ + attachment_id::AttachmentId, + decoder::{self, V2KeyMaterial}, + image_key, + resolver, + }; + + let id = AttachmentId::decode(attachment_id) + .context("解析 attachment_id 失败(不是合法 base64url(json)?)")?; + + let output_path = std::path::PathBuf::from(output); + if output_path.exists() && !overwrite { + anyhow::bail!( + "目标已存在:{}(加 --overwrite 覆盖)", + output_path.display() + ); + } + if let Some(parent) = output_path.parent() { + if !parent.as_os_str().is_empty() { + tokio::fs::create_dir_all(parent).await + .with_context(|| format!("创建输出目录失败:{}", parent.display()))?; + } + } + + // 1) 拿 message_resource.db + let resource_path = db.get("message/message_resource.db").await? + .context("无法解密 message_resource.db(请确认 all_keys.json 包含该 DB 的密钥)")?; + + // 2) 推 wxchat_base = db_dir.parent(),再拼 attach_root + let wxchat_base = db.db_dir().parent() + .ok_or_else(|| anyhow::anyhow!("db_dir 没有 parent,无法推断 xwechat_files 根目录"))? + .to_path_buf(); + let attach_root = resolver::attach_root_for(&wxchat_base); + + // 3) blocking pool 跑 resolver + 读盘 + 解码 + let id_for_task = id.clone(); + let resource_path2 = resource_path.clone(); + let attach_root2 = attach_root.clone(); + let wxchat_base2 = wxchat_base.clone(); + let output_path2 = output_path.clone(); + + let report: Value = tokio::task::spawn_blocking(move || -> Result { + let resolved = resolver::resolve_blocking(&id_for_task, &resource_path2, &attach_root2)?; + + let dat_bytes = std::fs::read(&resolved.dat_path) + .with_context(|| format!("读取 .dat 失败:{}", resolved.dat_path.display()))?; + + // V2 image key — 平台相关。`ImageKeyMaterial` 同时给 aes_key + xor_key。 + // xor_key 不能硬编码 0x88:实测 macOS 真实账号上是 `uin & 0xff` 派生的(0xa2 等), + // 所以这里桥接时必须把 provider 的 xor_key 透传给 V2KeyMaterial。 + // 缺 key 时让 decoder 自己抛带诊断的错。 + let provider = image_key::default_provider(); + let key_material = if let Some(p) = provider.as_ref() { + // 从 wxchat_base 末段拿 wxid + let wxid = wxchat_base2.file_name() + .and_then(|s| s.to_str()) + .unwrap_or_default() + .to_string(); + if wxid.is_empty() { + None + } else { + match p.get_key(&wxid) { + Ok(km) => Some(km), + Err(e) => { + eprintln!("[extract] image key 提取失败 (wxid={}): {} — V2 文件将无法解码", wxid, e); + None + } + } + } + } else { + None + }; + let v2_key = match key_material.as_ref() { + Some(km) => V2KeyMaterial { aes_key: Some(&km.aes_key), xor_key: km.xor_key }, + None => V2KeyMaterial::default(), + }; + + let decoded = decoder::dispatch(&dat_bytes, v2_key)?; + + // 写盘 + std::fs::write(&output_path2, &decoded.data) + .with_context(|| format!("写出文件失败:{}", output_path2.display()))?; + + Ok(json!({ + "ok": true, + "attachment_id": attachment_id_str(&id_for_task)?, + "kind": id_for_task.kind.as_str(), + "md5": resolved.md5, + "dat_path": resolved.dat_path.display().to_string(), + "dat_size": resolved.size, + "output": output_path2.display().to_string(), + "output_size": decoded.data.len(), + "format": decoded.format, + "decoder": decoded.decoder, + })) + }).await??; + + Ok(report) +} + +/// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。 +/// 缺省(None / 空)按 image 处理。 +fn parse_attachment_kinds( + kinds: Option<&[String]>, +) -> Result> { + use crate::attachment::AttachmentKind; + let raw = kinds.unwrap_or(&[]); + if raw.is_empty() { + return Ok(vec![(AttachmentKind::Image, 3)]); + } + let mut out: Vec<(AttachmentKind, i64)> = Vec::with_capacity(raw.len()); + let mut seen = HashSet::<&'static str>::new(); + for k in raw { + let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { + "image" | "img" => (AttachmentKind::Image, 3), + "voice" | "audio" => (AttachmentKind::Voice, 34), + "video" => (AttachmentKind::Video, 43), + "file" => (AttachmentKind::File, 49), + other => anyhow::bail!("未知附件类型:{}(支持 image/voice/video/file)", other), + }; + if seen.insert(kind.as_str()) { + out.push((kind, t)); + } + } + Ok(out) +} + +fn attachment_id_str(id: &crate::attachment::AttachmentId) -> Result { + id.encode() +} + #[cfg(test)] mod biz_tests { use super::*; diff --git a/src/daemon/server.rs b/src/daemon/server.rs index 3b06727..9f54076 100644 --- a/src/daemon/server.rs +++ b/src/daemon/server.rs @@ -240,5 +240,17 @@ async fn dispatch( Err(e) => Response::err(e.to_string()), } } + Attachments { chat, kinds, limit, offset, since, until } => { + match query::q_attachments(db, &names_arc, &chat, kinds, limit, offset, since, until).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } + Extract { attachment_id, output, overwrite } => { + match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } } } diff --git a/src/ipc.rs b/src/ipc.rs index c478ee4..78d6278 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -131,6 +131,32 @@ pub enum Request { }, /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) ReloadConfig, + /// 列出某个会话里的附件(图片 / 视频 / 文件 / 语音) + /// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体 + Attachments { + chat: String, + /// 类型过滤:image / video / file / voice,多选;缺省返回 image + #[serde(default, skip_serializing_if = "Option::is_none")] + kinds: Option>, + #[serde(default = "default_limit_50")] + limit: usize, + #[serde(default)] + offset: usize, + #[serde(skip_serializing_if = "Option::is_none")] + since: Option, + #[serde(skip_serializing_if = "Option::is_none")] + until: Option, + }, + /// 提取(解密)单个附件的本体到指定路径 + Extract { + /// `Attachments` 返回的不透明 ID + attachment_id: String, + /// 写入的绝对路径(daemon 直接写盘,不经 socket 传 binary) + output: String, + /// 已存在时是否覆盖 + #[serde(default)] + overwrite: bool, + }, } From 7feacc63714fd4f446c1548147dd813a57f1f9e6 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:48:46 +0800 Subject: [PATCH 16/20] fix(daemon): drop redundant `ok` from extract payload (collides with Response.ok) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Response 用 #[serde(flatten)] 把 q_* 返回的 Value 拼到 `{ok, error, ...data}` 里,q_extract 里再塞一个 `"ok": true` 就会在 wire 上写出两个同名 key,CLI 端 `serde_json::from_str::` 直接报「duplicate field `ok`」,对外 表现是「extract 失败 / 解析 daemon 响应失败」,但 daemon 实际已经把图解出来 了。其他 q_* 都没塞 ok(biz_articles / sessions / history 等),保持一致。 --- src/daemon/query.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 5a5d1b9..6bd46b2 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -3549,9 +3549,11 @@ pub async fn q_extract( std::fs::write(&output_path2, &decoded.data) .with_context(|| format!("写出文件失败:{}", output_path2.display()))?; + // 注意:不要在这里塞 `ok: true`。dispatch 会用 Response::ok(v) 包一层, + // Response 的 `data: Value` 字段是 #[serde(flatten)] 写出的,本 payload + // 的 `ok` 会和 Response 自带的 `ok` 在线上拼成两个同名 key,CLI 反序列化时 + // serde_json 直接报 "duplicate field",业务请求看上去像 daemon 解析失败。 Ok(json!({ - "ok": true, - "attachment_id": attachment_id_str(&id_for_task)?, "kind": id_for_task.kind.as_str(), "md5": resolved.md5, "dat_path": resolved.dat_path.display().to_string(), @@ -3593,10 +3595,6 @@ fn parse_attachment_kinds( Ok(out) } -fn attachment_id_str(id: &crate::attachment::AttachmentId) -> Result { - id.encode() -} - #[cfg(test)] mod biz_tests { use super::*; From b63589b368fc2d9c60018983231064fed026c8b1 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 19:10:03 +0800 Subject: [PATCH 17/20] review: tighten attachment extraction scope --- README.md | 8 ++-- SKILL.md | 10 ++-- src/attachment/resolver.rs | 94 ++++++++++++++++++++++++++++++++++++-- src/cli/mod.rs | 6 +-- src/daemon/query.rs | 16 +++---- src/ipc.rs | 4 +- 6 files changed, 112 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 35589cd..29c8736 100644 --- a/README.md +++ b/README.md @@ -211,14 +211,14 @@ wx biz-articles --json | jq '.[].url' # 下游消费 URL 每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 -### 附件提取(图片 / 视频 / 文件 / 语音) +### 附件提取(图片) 聊天里的附件本体存在 `xwechat_files//msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。 ```bash -# 1) 列出会话里的附件,先拿到不透明的 attachment_id(默认 image,可多选) +# 1) 列出会话里的图片附件,先拿到不透明的 attachment_id wx attachments "张三" -wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 # 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等) @@ -226,7 +226,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。 +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/SKILL.md b/SKILL.md index ddf02e1..6b79e0d 100644 --- a/SKILL.md +++ b/SKILL.md @@ -242,14 +242,14 @@ wx biz-articles --since 2026-05-10 --json | jq '.[].url' 每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 -### 附件提取(图片 / 视频 / 文件 / 语音) +### 附件提取(图片) -聊天里的图片/视频/文件本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: +聊天里的图片本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: ```bash -# 1) 先列出附件,拿到不透明的 attachment_id(默认 image,可多选) +# 1) 先列出图片附件,拿到不透明的 attachment_id wx attachments "张三" -wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 # 2) 用 attachment_id 把单个资源解密写到指定路径 @@ -257,7 +257,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind`(image/voice/video/file)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。 +`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/src/attachment/resolver.rs b/src/attachment/resolver.rs index c32d84e..8db4f41 100644 --- a/src/attachment/resolver.rs +++ b/src/attachment/resolver.rs @@ -48,6 +48,7 @@ pub fn lookup_md5_blocking( resource_db_path: &Path, chat: &str, local_id: i64, + create_time: i64, msg_local_type_lo32: i64, ) -> Result> { let conn = Connection::open_with_flags( @@ -68,9 +69,25 @@ pub fn lookup_md5_blocking( return Ok(None); }; - // 2) MessageResourceInfo: 同 chat 内 local_id 也会复用,按 create_time DESC 取最新 + // 2) MessageResourceInfo: + // 同 chat 内 local_id 会复用,所以先用 create_time 精确命中; + // 若资源库里的时间戳跟 message_N.db 不完全对齐,再 fallback 到“同 local_id/type 取最新” // message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型 - let packed: Option> = conn + let packed_exact: Option> = conn + .query_row( + "SELECT packed_info FROM MessageResourceInfo + WHERE chat_id = ?1 + AND message_local_id = ?2 + AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3) + AND message_create_time = ?4 + ORDER BY rowid DESC + LIMIT 1", + rusqlite::params![chat_id, local_id, msg_local_type_lo32, create_time], + |row| row.get(0), + ) + .ok(); + + let packed: Option> = packed_exact.or_else(|| conn .query_row( "SELECT packed_info FROM MessageResourceInfo WHERE chat_id = ?1 @@ -81,7 +98,7 @@ pub fn lookup_md5_blocking( rusqlite::params![chat_id, local_id, msg_local_type_lo32], |row| row.get(0), ) - .ok(); + .ok()); let Some(blob) = packed else { return Ok(None); @@ -235,7 +252,13 @@ pub fn resolve_blocking( super::AttachmentKind::File => 49, }; - let meta = lookup_md5_blocking(resource_db_path, &id.chat, id.local_id, lo32_type)? + let meta = lookup_md5_blocking( + resource_db_path, + &id.chat, + id.local_id, + id.create_time, + lo32_type, + )? .ok_or_else(|| { anyhow!( "message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)", @@ -306,6 +329,69 @@ mod tests { assert!(extract_md5_from_packed_info(&blob).is_none()); } + #[test] + fn lookup_md5_prefers_exact_create_time_over_latest_reuse() { + let dir = tempdir_for_test(); + let db_path = dir.join("message_resource.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute( + "CREATE TABLE ChatName2Id (user_name TEXT)", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO ChatName2Id (rowid, user_name) VALUES (1, 'room@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "CREATE TABLE MessageResourceInfo ( + chat_id INTEGER, + message_local_id INTEGER, + message_local_type INTEGER, + message_create_time INTEGER, + packed_info BLOB + )", + [], + ) + .unwrap(); + + let old_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"11111111111111111111111111111111"); + blob + }; + let new_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"22222222222222222222222222222222"); + blob + }; + + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 1000i64, old_blob], + ) + .unwrap(); + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 2000i64, new_blob], + ) + .unwrap(); + + let old = lookup_md5_blocking(&db_path, "room@chatroom", 7, 1000, 3) + .unwrap() + .unwrap(); + let new = lookup_md5_blocking(&db_path, "room@chatroom", 7, 2000, 3) + .unwrap() + .unwrap(); + assert_eq!(old.md5, "11111111111111111111111111111111"); + assert_eq!(new.md5, "22222222222222222222222222222222"); + } + #[test] fn three_month_candidates_includes_prev_curr_next() { // 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09 diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 5fe4e8c..2ec2476 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -264,13 +264,13 @@ enum Commands { #[arg(long)] json: bool, }, - /// 列出某会话的附件(图片 / 视频 / 文件 / 语音),返回不透明 attachment_id + /// 列出某会话的图片附件,返回不透明 attachment_id Attachments { /// 会话名称(联系人显示名 / wxid / @chatroom username 都可以) chat: String, - /// 类型(多选,默认 image)。可选:image / voice / video / file + /// 类型(当前仅支持 image) #[arg(long = "kind", value_name = "KIND", - value_parser = ["image", "voice", "video", "file", "audio", "img"])] + value_parser = ["image", "img"])] kinds: Vec, /// 显示数量 #[arg(short = 'n', long, default_value = "50")] diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 6bd46b2..634ff2d 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -3285,7 +3285,7 @@ pub async fn q_biz_articles( Ok(json!({ "count": results.len(), "articles": results })) } -// ─── 附件(图片 / 视频 / 文件 / 语音)查询与提取 ───────────────────────────────── +// ─── 附件(当前先支持图片)查询与提取 ───────────────────────────────── // // 设计要点: // - `q_attachments` 只走 `Msg_` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤 @@ -3296,7 +3296,7 @@ pub async fn q_biz_articles( // - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。 // 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。 -/// 列出某会话内的附件消息(默认 image,可多选)。返回每条的 `attachment_id`, +/// 列出某会话内的附件消息(当前仅 image)。返回每条的 `attachment_id`, /// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。 pub async fn q_attachments( db: &DbCache, @@ -3319,7 +3319,7 @@ pub async fn q_attachments( // 解析 kinds → 低 32 bit local_type 集合 let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?; if kind_filters.is_empty() { - anyhow::bail!("kinds 为空 — 至少传一种 image/video/file/voice"); + anyhow::bail!("kinds 为空 — 当前至少传一种 image"); } let lo32_types: Vec = kind_filters.iter().map(|(_, t)| *t).collect(); // local_type → AttachmentKind 反查(mask 完后定 kind) @@ -3569,7 +3569,7 @@ pub async fn q_extract( } /// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。 -/// 缺省(None / 空)按 image 处理。 +/// 当前只支持 image;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 fn parse_attachment_kinds( kinds: Option<&[String]>, ) -> Result> { @@ -3583,10 +3583,10 @@ fn parse_attachment_kinds( for k in raw { let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { "image" | "img" => (AttachmentKind::Image, 3), - "voice" | "audio" => (AttachmentKind::Voice, 34), - "video" => (AttachmentKind::Video, 43), - "file" => (AttachmentKind::File, 49), - other => anyhow::bail!("未知附件类型:{}(支持 image/voice/video/file)", other), + "voice" | "audio" | "video" | "file" => { + anyhow::bail!("当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通") + } + other => anyhow::bail!("未知附件类型:{}(当前仅支持 image)", other), }; if seen.insert(kind.as_str()) { out.push((kind, t)); diff --git a/src/ipc.rs b/src/ipc.rs index 78d6278..a4615eb 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -131,11 +131,11 @@ pub enum Request { }, /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) ReloadConfig, - /// 列出某个会话里的附件(图片 / 视频 / 文件 / 语音) + /// 列出某个会话里的图片附件 /// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体 Attachments { chat: String, - /// 类型过滤:image / video / file / voice,多选;缺省返回 image + /// 类型过滤:当前仅支持 image #[serde(default, skip_serializing_if = "Option::is_none")] kinds: Option>, #[serde(default = "default_limit_50")] From b032b8be04411de15cdb12796c1a83696dcf696a Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 19:24:02 +0800 Subject: [PATCH 18/20] fix(cache): apply WAL incrementally instead of full re-decrypting on WAL mtime change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DbCache 之前只要 .db 或 .db-wal 任一 mtime 变就 full_decrypt。WeChat 在写消息 时会持续 append WAL(无 checkpoint 时),导致每次 attachments/extract 请求都 重新解密 1.8GB 的 message_0.db(实测 ~120s/次)。 改成三种 hit 路径: 1. db_mt + wal_mt 都不变 → 直接返回 cached path 2. db_mt 不变、wal_mt 变了 → 在 cached 产物上**再 apply 一次 WAL** (apply_wal 是幂等的:旧帧 redo 同样的 page 写入,新帧追加生效) 3. db_mt 变了 → 全量解密 + apply WAL(旧路径) 效果:典型 WAL(< 10MB)从 ~120s 压到 < 1s;100MB 大 WAL 也只在 ~7s。 SQLite 不会自发"主库不变 + WAL 清空",所以 path 2 的边角不需要特殊处理。 测试覆盖三条路径: - exact_mtime_hit_skips_decrypt - wal_only_change_uses_incremental_path - db_mtime_change_triggers_full_decrypt 区分手段:cached file 大小是否被 full_decrypt 重写到 PAGE_SZ 倍数。 --- src/daemon/cache.rs | 262 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 238 insertions(+), 24 deletions(-) diff --git a/src/daemon/cache.rs b/src/daemon/cache.rs index 56e307c..3780c25 100644 --- a/src/daemon/cache.rs +++ b/src/daemon/cache.rs @@ -30,6 +30,7 @@ struct CacheEntry { pub struct DbCache { db_dir: PathBuf, cache_dir: PathBuf, + mtime_file: PathBuf, all_keys: HashMap, // rel_key -> enc_key(hex) inner: Arc>>, } @@ -39,15 +40,24 @@ impl DbCache { db_dir: PathBuf, all_keys: HashMap, ) -> Result { - let cache_dir = config::cache_dir(); + Self::with_dirs(db_dir, config::cache_dir(), config::mtime_file(), all_keys).await + } + + /// 注入 `cache_dir` / `mtime_file`(测试用 + 生产 `new()` 复用) + pub(crate) async fn with_dirs( + db_dir: PathBuf, + cache_dir: PathBuf, + mtime_file: PathBuf, + all_keys: HashMap, + ) -> Result { tokio::fs::create_dir_all(&cache_dir).await?; - let inner: HashMap = HashMap::new(); let cache = DbCache { db_dir, cache_dir, + mtime_file, all_keys, - inner: Arc::new(Mutex::new(inner)), + inner: Arc::new(Mutex::new(HashMap::new())), }; cache.load_persistent().await; @@ -67,7 +77,7 @@ impl DbCache { /// 从持久化文件加载 mtime 记录,复用未过期的解密文件 async fn load_persistent(&self) { - let mtime_file = config::mtime_file(); + let mtime_file = &self.mtime_file; let content = match tokio::fs::read_to_string(&mtime_file).await { Ok(c) => c, Err(_) => return, @@ -106,7 +116,7 @@ impl DbCache { /// 持久化 mtime 记录 async fn save_persistent(&self) { - let mtime_file = config::mtime_file(); + let mtime_file = &self.mtime_file; let inner = self.inner.lock().await; let data: HashMap = inner.iter().map(|(k, v)| { (k.clone(), MtimeEntry { @@ -124,7 +134,14 @@ impl DbCache { /// 获取解密后的数据库路径 /// - /// 如果 mtime 未变,直接返回缓存路径;否则重新解密 + /// 三种命中路径: + /// 1. 主 `.db` 和 WAL mtime 都未变 → 直接返回缓存路径 + /// 2. 主 `.db` 未变、WAL mtime 变了 → 在已有 cached 产物上**增量** `apply_wal` + /// (apply_wal 是幂等的:旧帧 redo 同样的 page 写入,新帧追加生效;不重新 full_decrypt) + /// 3. 主 `.db` mtime 变了 → 重新 `full_decrypt` + `apply_wal` + /// + /// WeChat 在写消息时只 append WAL(除非触发 checkpoint),因此 path 2 是常态; + /// 这条路径把"每次请求都全量解密 ~1.8GB DB(~120s)"压到"只解 WAL 帧(典型 < 10s)"。 pub async fn get(&self, rel_key: &str) -> Result> { let enc_key_hex = match self.all_keys.get(rel_key) { Some(k) => k.clone(), @@ -140,28 +157,53 @@ impl DbCache { } let wal_path = wal_path_for(&db_path); - let db_mt = mtime_nanos(&db_path); let wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; - // 检查缓存 - { + let cached = { let inner = self.inner.lock().await; - if let Some(entry) = inner.get(rel_key) { - if entry.db_mtime == db_mt - && entry.wal_mtime == wal_mt - && entry.decrypted_path.exists() - { - return Ok(Some(entry.decrypted_path.clone())); - } - } - } + inner.get(rel_key).cloned() + }; - // 需要重新解密 - let out_path = self.cache_file_path(rel_key); let enc_key_bytes = hex_to_32bytes(&enc_key_hex) .with_context(|| format!("密钥格式错误: {}", rel_key))?; + // Path 1 / Path 2:主 .db mtime 未变且 cached 产物仍在 + if let Some(entry) = cached.as_ref() { + if entry.db_mtime == db_mt && entry.decrypted_path.exists() { + if entry.wal_mtime == wal_mt { + return Ok(Some(entry.decrypted_path.clone())); + } + + // Path 2: WAL-only 变化 → 在 cached 产物上重新 apply_wal + // 不存在的 WAL 也要更新 wal_mtime=0(虽然 SQLite 不会自发"主库不变 + WAL 清空") + let out_path = entry.decrypted_path.clone(); + let t0 = std::time::Instant::now(); + if wal_path.exists() { + let out_path2 = out_path.clone(); + let wal_path2 = wal_path.clone(); + let key_copy = enc_key_bytes; + tokio::task::spawn_blocking(move || { + wal::apply_wal(&wal_path2, &out_path2, &key_copy) + }).await??; + } + eprintln!("[cache] WAL 增量 {} ({}ms)", rel_key, t0.elapsed().as_millis()); + + { + let mut inner = self.inner.lock().await; + inner.insert(rel_key.to_string(), CacheEntry { + db_mtime: db_mt, + wal_mtime: wal_mt, + decrypted_path: out_path.clone(), + }); + } + self.save_persistent().await; + return Ok(Some(out_path)); + } + } + + // Path 3: 主 .db 变了 / 缓存 miss → 全量解密 + let out_path = self.cache_file_path(rel_key); let t0 = std::time::Instant::now(); let db_path2 = db_path.clone(); let out_path2 = out_path.clone(); @@ -170,7 +212,6 @@ impl DbCache { crypto::full_decrypt(&db_path2, &out_path2, &key_copy) }).await??; - // 应用 WAL if wal_path.exists() { let out_path3 = out_path.clone(); let wal_path3 = wal_path.clone(); @@ -180,10 +221,8 @@ impl DbCache { }).await??; } - let elapsed_ms = t0.elapsed().as_millis(); - eprintln!("[cache] 解密 {} ({}ms)", rel_key, elapsed_ms); + eprintln!("[cache] 全量解密 {} ({}ms)", rel_key, t0.elapsed().as_millis()); - // 更新内存缓存 { let mut inner = self.inner.lock().await; inner.insert(rel_key.to_string(), CacheEntry { @@ -223,3 +262,178 @@ fn hex_to_32bytes(s: &str) -> Result<[u8; 32]> { } Ok(out) } + +#[cfg(test)] +mod tests { + use super::*; + + /// 64 字符 hex(不需要是真 SQLCipher key — 仅用来证明"是否触发了 full_decrypt") + const FAKE_KEY_HEX: &str = + "0000000000000000000000000000000000000000000000000000000000000000"; + + /// 路径区分约定: + /// - 完全 hit / WAL 增量 → `decrypted_path` **内容不变** + /// - 全量解密 → `crypto::full_decrypt` 把 cached file **重写为 PAGE_SZ 倍数** + /// (fake key 解出 4096 字节垃圾,但仍写入 — 不验证内容合法性) + /// 因此用 cached file 的"size 是否被改"来判断走了哪条路径。 + const ORIGINAL_CACHED_BYTES: &[u8] = b"original cached contents"; + + fn unique_tmpdir(tag: &str) -> PathBuf { + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let p = std::env::temp_dir().join(format!("wx-cli-cache-test-{}-{}-{}", tag, pid, nanos)); + std::fs::create_dir_all(&p).unwrap(); + p + } + + /// 准备一份 "DbCache 已经 reuse 了 cached 解密产物" 的初始状态。 + /// 返回 (cache, db_path, decrypted_path, mtime_file, rel_key)。 + async fn setup_seeded_cache(tag: &str) -> (DbCache, PathBuf, PathBuf, PathBuf, String) { + let root = unique_tmpdir(tag); + let db_dir = root.join("db_storage"); + let cache_dir = root.join("cache"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let rel_key = "message_0.db".to_string(); + let db_path = db_dir.join(&rel_key); + std::fs::write(&db_path, b"fake encrypted db").unwrap(); + + let cached_hash = format!("{:x}", md5::compute(rel_key.as_bytes())); + let decrypted_path = cache_dir.join(format!("{}.db", cached_hash)); + std::fs::write(&decrypted_path, ORIGINAL_CACHED_BYTES).unwrap(); + + let db_mt = mtime_nanos(&db_path); + let mtime_file = cache_dir.join("_mtimes.json"); + let payload = serde_json::to_string(&serde_json::json!({ + &rel_key: { + "db_mt": db_mt, + "wal_mt": 0u64, + "path": decrypted_path.display().to_string(), + } + })) + .unwrap(); + std::fs::write(&mtime_file, payload).unwrap(); + + let mut all_keys = HashMap::new(); + all_keys.insert(rel_key.clone(), FAKE_KEY_HEX.to_string()); + let cache = DbCache::with_dirs(db_dir, cache_dir, mtime_file.clone(), all_keys) + .await + .unwrap(); + + (cache, db_path, decrypted_path, mtime_file, rel_key) + } + + #[tokio::test] + async fn exact_mtime_hit_skips_decrypt() { + let (cache, _db_path, decrypted_path, _mtime_file, rel_key) = + setup_seeded_cache("exact").await; + + let p = cache.get(&rel_key).await.unwrap().expect("cache should hit"); + assert_eq!(p, decrypted_path); + + // 完全 hit → cached file 内容不应被改 + let body = std::fs::read(&decrypted_path).unwrap(); + assert_eq!(body, ORIGINAL_CACHED_BYTES); + } + + #[tokio::test] + async fn wal_only_change_uses_incremental_path() { + // 自己构造(不走 setup_seeded_cache)以便初始 mtime.json 同时写 db_mt 和 wal_mt + let root = unique_tmpdir("walonly"); + let db_dir = root.join("db_storage"); + let cache_dir = root.join("cache"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let rel_key = "message_0.db".to_string(); + let db_path = db_dir.join(&rel_key); + std::fs::write(&db_path, b"fake encrypted db").unwrap(); + + let wal_path = wal_path_for(&db_path); + std::fs::write(&wal_path, [0u8; 31]).unwrap(); // ≤ WAL_HDR_SZ=32 → apply_wal noop + + let cached_hash = format!("{:x}", md5::compute(rel_key.as_bytes())); + let decrypted_path = cache_dir.join(format!("{}.db", cached_hash)); + std::fs::write(&decrypted_path, ORIGINAL_CACHED_BYTES).unwrap(); + + let db_mt = mtime_nanos(&db_path); + let wal_mt0 = mtime_nanos(&wal_path); + let mtime_file = cache_dir.join("_mtimes.json"); + let payload = serde_json::to_string(&serde_json::json!({ + &rel_key: { + "db_mt": db_mt, + "wal_mt": wal_mt0, + "path": decrypted_path.display().to_string(), + } + })) + .unwrap(); + std::fs::write(&mtime_file, payload).unwrap(); + + let mut all_keys = HashMap::new(); + all_keys.insert(rel_key.clone(), FAKE_KEY_HEX.to_string()); + let cache = DbCache::with_dirs(db_dir, cache_dir, mtime_file, all_keys) + .await + .unwrap(); + + // 第一次:完全 hit + let p1 = cache.get(&rel_key).await.unwrap().expect("first get hits"); + assert_eq!(p1, decrypted_path); + assert_eq!(std::fs::read(&decrypted_path).unwrap(), ORIGINAL_CACHED_BYTES); + + // bump WAL mtime(重写仍 31 bytes,apply_wal 仍 noop) + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&wal_path, [0xffu8; 31]).unwrap(); + let wal_mt1 = mtime_nanos(&wal_path); + assert_ne!(wal_mt0, wal_mt1, "rewriting WAL should bump mtime"); + + // 第二次:WAL 增量路径 + // 如果错误地走 full_decrypt → cached file 大小会被重写为 ≥ PAGE_SZ + let p2 = cache + .get(&rel_key) + .await + .unwrap() + .expect("WAL-incremental path should produce path"); + assert_eq!(p2, decrypted_path); + + let body = std::fs::read(&decrypted_path).unwrap(); + assert_eq!( + body, ORIGINAL_CACHED_BYTES, + "WAL-incremental should NOT rewrite cached file" + ); + } + + #[tokio::test] + async fn db_mtime_change_triggers_full_decrypt() { + let (cache, db_path, decrypted_path, _mtime_file, rel_key) = + setup_seeded_cache("dbchange").await; + + // bump 主 .db 的 mtime(重写一份不同 bytes) + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&db_path, b"different fake encrypted bytes").unwrap(); + assert_ne!( + mtime_nanos(&db_path), + cache.inner.lock().await.get(&rel_key).unwrap().db_mtime, + "rewriting db file should bump mtime" + ); + + // 走 full_decrypt 路径 → fake key 不会让 full_decrypt 失败(它不验证内容), + // 但会把 cached file 重写为 PAGE_SZ 倍数。原始内容是 24 bytes,重写后应该 ≥ 4096 bytes。 + let p = cache + .get(&rel_key) + .await + .unwrap() + .expect("cache should produce path"); + assert_eq!(p, decrypted_path); + + let new_size = std::fs::metadata(&decrypted_path).unwrap().len() as usize; + assert!( + new_size >= crate::crypto::PAGE_SZ, + "expected full_decrypt to rewrite cached file to PAGE_SZ multiple, got size={}", + new_size, + ); + } +} From e9f65ba71bf5ccb34034c16f264c459cfb98df25 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 19:35:36 +0800 Subject: [PATCH 19/20] review: preserve wal incremental reuse across restart --- src/daemon/cache.rs | 64 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/src/daemon/cache.rs b/src/daemon/cache.rs index 3780c25..d6e5892 100644 --- a/src/daemon/cache.rs +++ b/src/daemon/cache.rs @@ -98,12 +98,17 @@ impl DbCache { let wal_path = wal_path_for(&db_path); let db_mt = mtime_nanos(&db_path); - let wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; + let _wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; - if db_mt == entry.db_mt && wal_mt == entry.wal_mt { + // 只要主 .db 没变,就把 cached 产物载回来。 + // 如果 WAL mtime 变了,后续 `get()` 会自动走 Path 2:在已有 cached DB 上增量 apply_wal, + // 而不是 daemon 重启后第一条请求又退回全量解密。 + if db_mt == entry.db_mt { inner.insert(rel_key.clone(), CacheEntry { db_mtime: db_mt, - wal_mtime: wal_mt, + // 保留"cached 产物构建时看到的 wal_mtime",让 `get()` 去比较当前 WAL + // 是否发生了变化,从而决定 exact-hit 还是 WAL 增量。 + wal_mtime: entry.wal_mt, decrypted_path: dec_path, }); reused += 1; @@ -436,4 +441,57 @@ mod tests { new_size, ); } + + #[tokio::test] + async fn restart_with_wal_change_still_reuses_cached_db_then_applies_wal() { + let root = unique_tmpdir("restart-wal"); + let db_dir = root.join("db_storage"); + let cache_dir = root.join("cache"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let rel_key = "message_0.db".to_string(); + let db_path = db_dir.join(&rel_key); + std::fs::write(&db_path, b"fake encrypted db").unwrap(); + + let wal_path = wal_path_for(&db_path); + std::fs::write(&wal_path, [0u8; 31]).unwrap(); // WAL 增量仍是 noop + + let cached_hash = format!("{:x}", md5::compute(rel_key.as_bytes())); + let decrypted_path = cache_dir.join(format!("{}.db", cached_hash)); + std::fs::write(&decrypted_path, ORIGINAL_CACHED_BYTES).unwrap(); + + let db_mt = mtime_nanos(&db_path); + let wal_mt0 = mtime_nanos(&wal_path); + let mtime_file = cache_dir.join("_mtimes.json"); + let payload = serde_json::to_string(&serde_json::json!({ + &rel_key: { + "db_mt": db_mt, + "wal_mt": wal_mt0, + "path": decrypted_path.display().to_string(), + } + })) + .unwrap(); + std::fs::write(&mtime_file, payload).unwrap(); + + // 模拟 daemon 重启前又有新消息写入 WAL + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&wal_path, [0xffu8; 31]).unwrap(); + let wal_mt1 = mtime_nanos(&wal_path); + assert_ne!(wal_mt0, wal_mt1); + + let mut all_keys = HashMap::new(); + all_keys.insert(rel_key.clone(), FAKE_KEY_HEX.to_string()); + let cache = DbCache::with_dirs(db_dir, cache_dir, mtime_file, all_keys) + .await + .unwrap(); + + let p = cache.get(&rel_key).await.unwrap().expect("cache should reuse persisted DB"); + assert_eq!(p, decrypted_path); + let body = std::fs::read(&decrypted_path).unwrap(); + assert_eq!( + body, ORIGINAL_CACHED_BYTES, + "restart + WAL-only change should still reuse cached DB and avoid full_decrypt" + ); + } } From 52cc39a55c907db222b7ec6090d29c987463d668 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 21:38:05 +0800 Subject: [PATCH 20/20] chore(release): bump version to 0.2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 主要新增: - `wx attachments` / `wx extract`:从本地 chat 数据解密提取 V2 图片附件(macOS / Windows) - `DbCache` WAL 增量复用:daemon 请求路径从每次 ~120s 全量解密压到 < 1s(典型 WAL) 完整 changelog 见 #57 / #58。 --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/platforms/darwin-arm64/package.json | 2 +- npm/platforms/darwin-x64/package.json | 2 +- npm/platforms/linux-arm64/package.json | 2 +- npm/platforms/linux-x64/package.json | 2 +- npm/platforms/win32-x64/package.json | 2 +- npm/wx-cli/package.json | 12 ++++++------ 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a5cc78b..56821e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1313,7 +1313,7 @@ checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "wx-cli" -version = "0.1.11" +version = "0.2.0" dependencies = [ "aes", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index a32b845..527b5e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wx-cli" -version = "0.1.11" +version = "0.2.0" edition = "2021" description = "WeChat 4.x (macOS/Linux) local data CLI — decrypt SQLCipher DBs, query chat history, watch new messages" license = "Apache-2.0" diff --git a/npm/platforms/darwin-arm64/package.json b/npm/platforms/darwin-arm64/package.json index d0661cf..3566a92 100644 --- a/npm/platforms/darwin-arm64/package.json +++ b/npm/platforms/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-darwin-arm64", - "version": "0.1.11", + "version": "0.2.0", "description": "wx-cli binary for macOS arm64", "os": ["darwin"], "cpu": ["arm64"], diff --git a/npm/platforms/darwin-x64/package.json b/npm/platforms/darwin-x64/package.json index badd091..d8a9068 100644 --- a/npm/platforms/darwin-x64/package.json +++ b/npm/platforms/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-darwin-x64", - "version": "0.1.11", + "version": "0.2.0", "description": "wx-cli binary for macOS x64", "os": ["darwin"], "cpu": ["x64"], diff --git a/npm/platforms/linux-arm64/package.json b/npm/platforms/linux-arm64/package.json index 26f73c4..19655cb 100644 --- a/npm/platforms/linux-arm64/package.json +++ b/npm/platforms/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-linux-arm64", - "version": "0.1.11", + "version": "0.2.0", "description": "wx-cli binary for Linux arm64", "os": ["linux"], "cpu": ["arm64"], diff --git a/npm/platforms/linux-x64/package.json b/npm/platforms/linux-x64/package.json index 67d1c05..862c610 100644 --- a/npm/platforms/linux-x64/package.json +++ b/npm/platforms/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-linux-x64", - "version": "0.1.11", + "version": "0.2.0", "description": "wx-cli binary for Linux x64", "os": ["linux"], "cpu": ["x64"], diff --git a/npm/platforms/win32-x64/package.json b/npm/platforms/win32-x64/package.json index d9edf8f..1b6f9f8 100644 --- a/npm/platforms/win32-x64/package.json +++ b/npm/platforms/win32-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-win32-x64", - "version": "0.1.11", + "version": "0.2.0", "description": "wx-cli binary for Windows x64", "os": ["win32"], "cpu": ["x64"], diff --git a/npm/wx-cli/package.json b/npm/wx-cli/package.json index 5befb5a..dc76619 100644 --- a/npm/wx-cli/package.json +++ b/npm/wx-cli/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli", - "version": "0.1.11", + "version": "0.2.0", "description": "Query your local WeChat data from the command line. Designed for LLM agent tool calls.", "bin": { "wx": "bin/wx.js" @@ -13,11 +13,11 @@ "install.js" ], "optionalDependencies": { - "@jackwener/wx-cli-darwin-arm64": "0.1.11", - "@jackwener/wx-cli-darwin-x64": "0.1.11", - "@jackwener/wx-cli-linux-x64": "0.1.11", - "@jackwener/wx-cli-linux-arm64": "0.1.11", - "@jackwener/wx-cli-win32-x64": "0.1.11" + "@jackwener/wx-cli-darwin-arm64": "0.2.0", + "@jackwener/wx-cli-darwin-x64": "0.2.0", + "@jackwener/wx-cli-linux-x64": "0.2.0", + "@jackwener/wx-cli-linux-arm64": "0.2.0", + "@jackwener/wx-cli-win32-x64": "0.2.0" }, "engines": { "node": ">=14" }, "keywords": ["wechat", "cli", "wx", "llm", "ai", "sqlite", "sqlcipher"],