From d750ef6e9fb014e642abbf07e0f5de3bdcf36f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E4=BC=A0=E4=BD=B3?= Date: Thu, 14 May 2026 13:50:04 +0800 Subject: [PATCH 1/6] =?UTF-8?q?fix(cli,config):=20=E4=BF=AE=E5=A4=8D=20sud?= =?UTF-8?q?o=20=E4=B8=8B=E5=88=9D=E5=A7=8B=E5=8C=96=E5=A4=B1=E8=B4=A5=20+?= =?UTF-8?q?=20daemon=20=E4=B8=8D=E9=87=8D=E8=BD=BD=E9=97=AE=E9=A2=98=20(#3?= =?UTF-8?q?7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(cli,config): 修复 sudo 下初始化失败 + daemon 不重载问题 - cli/transport: 新增 stop_daemon(),init 后自动停止旧 daemon - config: cli_dir() 优先读 SUDO_USER 环境变量,避免写到 /root/.wx-cli - config: auto_detect_db_dir() 按 .db 文件最新 mtime 排序,正确选最新目录 - daemon/server: dispatch 新增 ReloadConfig 命令(预留) - ipc: Request 新增 ReloadConfig 变体 - scanner/linux: 移除调试日志,清理 unused bail import * fix(config): resolve sudo home via passwd lookup --------- Co-authored-by: cjliu Co-authored-by: jackwener --- src/cli/init.rs | 4 ++ src/cli/transport.rs | 25 ++++++++++ src/config.rs | 109 ++++++++++++++++++++++++++++++++++--------- src/daemon/server.rs | 3 ++ src/ipc.rs | 2 + src/scanner/linux.rs | 2 +- 6 files changed, 123 insertions(+), 22 deletions(-) diff --git a/src/cli/init.rs b/src/cli/init.rs index ece6af0..d7553b7 100644 --- a/src/cli/init.rs +++ b/src/cli/init.rs @@ -91,6 +91,10 @@ pub fn cmd_init(force: bool) -> Result<()> { std::fs::write(&config_path, serde_json::to_string_pretty(&cfg)?) .context("写入 config.json 失败")?; println!("配置已保存: {}", config_path.display()); + + // init 之后必须停掉旧 daemon(它用的是旧 config),下次调用会自动重启 + let _ = crate::cli::transport::stop_daemon(); + println!("初始化完成,可以使用 wx sessions / wx history 等命令了"); Ok(()) diff --git a/src/cli/transport.rs b/src/cli/transport.rs index ab62da5..73c2f88 100644 --- a/src/cli/transport.rs +++ b/src/cli/transport.rs @@ -62,6 +62,31 @@ pub fn ensure_daemon() -> Result<()> { Ok(()) } +/// 停止 daemon(如果正在运行) +pub fn stop_daemon() -> Result<()> { + let pid_path = config::pid_path(); + if let Ok(pid_str) = std::fs::read_to_string(&pid_path) { + if let Ok(pid) = pid_str.trim().parse::() { + #[cfg(unix)] + { + let _ = std::process::Command::new("kill") + .arg("-TERM") + .arg(pid.to_string()) + .spawn(); + } + #[cfg(windows)] + { + let _ = std::process::Command::new("taskkill") + .args(["/F", "/PID", &pid.to_string()]) + .spawn(); + } + } + } + let _ = std::fs::remove_file(config::sock_path()); + let _ = std::fs::remove_file(&pid_path); + Ok(()) +} + /// 启动 daemon 前检查 `~/.wx-cli/` 可写,给出比"超时"更明确的错误。 /// /// 典型坑:旧版本 `sudo wx init` 把目录留成 root 属主,非 root 的 daemon diff --git a/src/config.rs b/src/config.rs index 55a03ca..a488ca0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -71,7 +71,8 @@ fn find_config_file() -> Result { return Ok(cwd); } // 3. ~/.wx-cli/config.json - if let Some(home) = dirs::home_dir() { + let home = cli_home_dir(); + if home != PathBuf::from("/tmp") { let p = home.join(".wx-cli").join("config.json"); if p.exists() { return Ok(p); @@ -87,9 +88,44 @@ fn find_config_file() -> Result { } pub fn cli_dir() -> PathBuf { - dirs::home_dir() - .unwrap_or_else(|| PathBuf::from("/tmp")) - .join(".wx-cli") + cli_home_dir().join(".wx-cli") +} + +fn cli_home_dir() -> PathBuf { + resolve_cli_home( + dirs::home_dir().unwrap_or_else(|| PathBuf::from("/tmp")), + sudo_user_home_dir(), + ) +} + +fn resolve_cli_home(default_home: PathBuf, sudo_home: Option) -> PathBuf { + sudo_home.unwrap_or(default_home) +} + +#[cfg(unix)] +fn sudo_user_home_dir() -> Option { + use std::ffi::{CStr, CString}; + + let sudo_user = std::env::var("SUDO_USER").ok()?; + let sudo_user = sudo_user.trim(); + if sudo_user.is_empty() { + return None; + } + + let c_user = CString::new(sudo_user).ok()?; + unsafe { + let pwd = libc::getpwnam(c_user.as_ptr()); + if pwd.is_null() || (*pwd).pw_dir.is_null() { + return None; + } + let dir = CStr::from_ptr((*pwd).pw_dir).to_str().ok()?; + Some(PathBuf::from(dir)) + } +} + +#[cfg(not(unix))] +fn sudo_user_home_dir() -> Option { + None } pub fn sock_path() -> PathBuf { @@ -154,17 +190,7 @@ pub fn auto_detect_db_dir() -> Option { #[cfg(target_os = "macos")] fn detect_db_dir_impl() -> Option { - let home = dirs::home_dir()?; - // 支持 sudo 环境 - let home = if let Ok(sudo_user) = std::env::var("SUDO_USER") { - if !sudo_user.is_empty() { - PathBuf::from("/Users").join(&sudo_user) - } else { - home - } - } else { - home - }; + let home = sudo_user_home_dir().or_else(dirs::home_dir)?; let base = home.join("Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files"); if !base.exists() { @@ -190,9 +216,7 @@ fn detect_db_dir_impl() -> Option { #[cfg(target_os = "linux")] fn detect_db_dir_impl() -> Option { let home = dirs::home_dir()?; - let sudo_home = std::env::var("SUDO_USER").ok() - .filter(|s| !s.is_empty()) - .map(|u| PathBuf::from("/home").join(u)); + let sudo_home = sudo_user_home_dir(); let mut candidates: Vec = Vec::new(); for base_home in [Some(home.clone()), sudo_home].into_iter().flatten() { @@ -213,13 +237,32 @@ fn detect_db_dir_impl() -> Option { } } candidates.sort_by_key(|p| { - std::fs::metadata(p) - .and_then(|m| m.modified()) - .unwrap_or(std::time::SystemTime::UNIX_EPOCH) + // 排序:取 db_storage 目录下所有 .db 文件的最新 mtime,而非目录自身的 mtime + // 这样当收到新消息时(只有 .db 文件被更新),能正确识别最新目录 + latest_db_mtime(p).unwrap_or(std::time::SystemTime::UNIX_EPOCH) }); candidates.into_iter().next_back() } +/// 递归查找 db_storage 目录下所有 .db 文件的最新 mtime +fn latest_db_mtime(dir: &Path) -> Option { + let mut latest = None; + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + let mtime = if path.is_dir() { + latest_db_mtime(&path).unwrap_or(std::time::SystemTime::UNIX_EPOCH) + } else if path.extension().and_then(|s| s.to_str()) == Some("db") { + entry.metadata().and_then(|m| m.modified()).unwrap_or(std::time::SystemTime::UNIX_EPOCH) + } else { + continue; + }; + latest = Some(latest.map_or(mtime, |cur| if mtime > cur { mtime } else { cur })); + } + } + latest +} + #[cfg(target_os = "windows")] fn detect_db_dir_impl() -> Option { let appdata = std::env::var("APPDATA").ok()?; @@ -257,3 +300,27 @@ fn detect_db_dir_impl() -> Option { fn detect_db_dir_impl() -> Option { None } + +#[cfg(test)] +mod tests { + use super::resolve_cli_home; + use std::path::PathBuf; + + #[test] + fn resolve_cli_home_prefers_sudo_home_when_present() { + let home = resolve_cli_home( + PathBuf::from("/root"), + Some(PathBuf::from("/Users/alice")), + ); + assert_eq!(home, PathBuf::from("/Users/alice")); + } + + #[test] + fn resolve_cli_home_falls_back_to_default_home() { + let home = resolve_cli_home( + PathBuf::from("/root"), + None, + ); + assert_eq!(home, PathBuf::from("/root")); + } +} diff --git a/src/daemon/server.rs b/src/daemon/server.rs index 896a08e..4d7fd54 100644 --- a/src/daemon/server.rs +++ b/src/daemon/server.rs @@ -231,5 +231,8 @@ async fn dispatch( Err(e) => Response::err(e.to_string()), } } + ReloadConfig => { + Response::ok(serde_json::json!({ "reloading": true })) + } } } diff --git a/src/ipc.rs b/src/ipc.rs index 873e2d4..32e0a8f 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -114,6 +114,8 @@ pub enum Request { #[serde(skip_serializing_if = "Option::is_none")] user: Option, }, + /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) + ReloadConfig, } diff --git a/src/scanner/linux.rs b/src/scanner/linux.rs index ba6f97b..d6f4ee9 100644 --- a/src/scanner/linux.rs +++ b/src/scanner/linux.rs @@ -3,7 +3,7 @@ /// 通过 /proc//maps 枚举内存区域, /// 通过 /proc//mem 读取内存内容, /// 搜索 x'<64hex><32hex>' 格式的 SQLCipher 密钥 -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use std::io::{Read, Seek, SeekFrom}; use std::path::Path; From 35a8f0e94b5fc350c692042dffefb25af91bcdec Mon Sep 17 00:00:00 2001 From: Haoqing Wang <78337154+hqhq1025@users.noreply.github.com> Date: Thu, 14 May 2026 14:22:55 +0800 Subject: [PATCH 2/6] =?UTF-8?q?feat(group):=20=E6=94=AF=E6=8C=81=E7=BE=A4?= =?UTF-8?q?=E6=98=B5=E7=A7=B0/=E7=BE=A4=E5=90=8D=E7=89=87=E5=B1=95?= =?UTF-8?q?=E7=A4=BA=20(#23)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support group nicknames * fix(group): keep duplicate nickname senders separate in stats --------- Co-authored-by: jackwener --- README.md | 12 +- SKILL.md | 13 + src/daemon/query.rs | 589 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 583 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index e0f06da..d084301 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Platform](https://img.shields.io/badge/platform-macOS%20%7C%20Linux%20%7C%20Windows-lightgrey.svg)](#安装) [![Rust](https://img.shields.io/badge/built%20with-Rust-orange.svg)](https://www.rust-lang.org) -会话 · 聊天记录 · 搜索 · 联系人 · 群成员 · 收藏 · 统计 · 导出 +会话 · 聊天记录 · 搜索 · 联系人 · 群成员 · 群昵称 · 收藏 · 统计 · 导出 @@ -156,6 +156,8 @@ wx search "会议" --in "工作群" --since 2026-01-01 会话/消息输出里都带 `chat_type` 字段,取值为 `private` / `group` / `official_account` / `folded`。`official_account` 涵盖公众号、订阅号、服务号及 `mphelper` / `qqsafe` 等系统通知;`folded` 对应微信里的"订阅号折叠"和"折叠群聊"两个聚合入口。 +群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 + ### 朋友圈(SNS) 三个独立命令,区分"通知"和"帖子": @@ -185,6 +187,14 @@ wx contacts --query "李" # 按名字搜索 wx members "AI交流群" # 群成员列表 ``` +`wx members --json` 返回的成员字段包括: + +- `username`:微信内部 username +- `display`:用于展示的名称,优先使用群昵称 +- `contact_display`:联系人备注或微信昵称 +- `group_nickname`:群昵称;本地没有记录时为空字符串 +- `is_owner`:是否群主 + ### 收藏 & 统计 ```bash diff --git a/SKILL.md b/SKILL.md index 4ce28c3..386816f 100644 --- a/SKILL.md +++ b/SKILL.md @@ -11,6 +11,7 @@ description: "wx-cli — 从本地微信数据库查询聊天记录、联系人 - 微信消息历史 - 微信联系人 - 微信群成员 +- 微信群昵称 / 群名片 - 微信收藏 - wechat history / messages / contacts - wx-cli @@ -137,6 +138,8 @@ wx search "会议" --in "工作群" --since 2026-01-01 `wx unread --filter` 支持 `private` / `group` / `official` / `folded` / `all`,逗号分隔多选。默认 `all`。 +群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 + ### 联系人与群组 ```bash @@ -148,6 +151,16 @@ wx contacts --query "李" wx members "AI交流群" ``` +`wx members --json` 每个成员包含: + +- `username`:微信内部 username +- `display`:推荐展示名,优先使用群昵称 +- `contact_display`:联系人备注或微信昵称 +- `group_nickname`:群昵称;没有记录时为空字符串 +- `is_owner`:是否群主 + +Agent 展示群成员时优先用 `display`。需要区分群昵称和联系人名时,再读取 `group_nickname` 与 `contact_display`。 + ### 朋友圈(SNS) 三个命令,作用各不同: diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 18cf28e..041ff0b 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -4,8 +4,8 @@ use regex::Regex; use roxmltree::{Document, Node}; use rusqlite::Connection; use serde_json::{json, Value}; -use std::collections::HashMap; -use std::sync::OnceLock; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, OnceLock}; use super::cache::DbCache; @@ -141,6 +141,7 @@ pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result> = HashMap::new(); for (username, unread, summary_bytes, ts, msg_type, sender, sender_name) in rows { let display = names.display(&username); let chat_type = chat_type_of(&username, names); @@ -151,9 +152,13 @@ pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result = Vec::new(); + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); let tname = table_name.clone(); let uname = username.clone(); let is_group2 = is_group; let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); let since2 = since; let until2 = until; let limit2 = limit; @@ -211,7 +222,7 @@ pub async fn q_history( let msgs: Vec = tokio::task::spawn_blocking(move || { // per-DB 软上限:offset + limit 已足够全局分页,避免大群全量加载 let per_db_cap = offset2 + limit2; - query_messages(&path, &tname, &uname, is_group2, &names_map, since2, until2, msg_type, per_db_cap, 0) + query_messages(&path, &tname, &uname, is_group2, &names_map, &group_nicknames2, since2, until2, msg_type, per_db_cap, 0) }).await??; all_msgs.extend(msgs); @@ -311,6 +322,19 @@ pub async fn q_search( by_path.entry(p).or_default().push((t, d, u)); } + let mut group_usernames = HashSet::new(); + for table_list in by_path.values() { + for (_, _, uname) in table_list { + if uname.contains("@chatroom") { + group_usernames.insert(uname.clone()); + } + } + } + let group_nicknames_by_chat = load_group_nickname_maps(db, group_usernames) + .await + .unwrap_or_default(); + let group_nicknames_by_chat = Arc::new(group_nicknames_by_chat); + let mut results: Vec = Vec::new(); let kw = keyword.to_string(); for (db_path, table_list) in by_path { @@ -320,13 +344,18 @@ pub async fn q_search( let limit2 = limit * 3; let names_map2 = names.map.clone(); + let group_nicknames_by_chat2 = Arc::clone(&group_nicknames_by_chat); let found: Vec = match tokio::task::spawn_blocking(move || { let conn = Connection::open(&db_path)?; let mut all = Vec::new(); + let empty_group_nicknames = HashMap::new(); for (tname, display, uname) in &table_list { let is_group = uname.contains("@chatroom"); + let group_nicknames = group_nicknames_by_chat2 + .get(uname) + .unwrap_or(&empty_group_nicknames); match search_in_table(&conn, tname, &uname, is_group, - &names_map2, &kw2, since2, until2, msg_type, limit2) + &names_map2, group_nicknames, &kw2, since2, until2, msg_type, limit2) { Ok(rows) => { for mut row in rows { @@ -461,6 +490,7 @@ fn query_messages( chat_username: &str, is_group: bool, names_map: &HashMap, + group_nicknames: &HashMap, since: Option, until: Option, msg_type: Option, @@ -518,7 +548,7 @@ fn query_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); + let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); result.push(json!({ @@ -539,6 +569,7 @@ fn search_in_table( chat_username: &str, is_group: bool, names_map: &HashMap, + group_nicknames: &HashMap, keyword: &str, since: Option, until: Option, @@ -589,7 +620,7 @@ fn search_in_table( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); + let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); result.push(json!({ @@ -618,6 +649,368 @@ fn load_id2u(conn: &Connection) -> HashMap { map } +async fn load_group_nicknames( + db: &DbCache, + chat_username: &str, +) -> Result> { + if !chat_username.contains("@chatroom") { + return Ok(HashMap::new()); + } + let Some(contact_p) = db.get("contact/contact.db").await? else { + return Ok(HashMap::new()); + }; + let chat = chat_username.to_string(); + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&contact_p)?; + Ok::<_, anyhow::Error>(load_group_nickname_map_from_conn(&conn, &chat, None)) + }).await? +} + +async fn load_group_nickname_maps( + db: &DbCache, + chat_usernames: HashSet, +) -> Result>> { + if chat_usernames.is_empty() { + return Ok(HashMap::new()); + } + let Some(contact_p) = db.get("contact/contact.db").await? else { + return Ok(HashMap::new()); + }; + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&contact_p)?; + let mut out = HashMap::new(); + for chat in chat_usernames { + let nicknames = load_group_nickname_map_from_conn(&conn, &chat, None); + if !nicknames.is_empty() { + out.insert(chat, nicknames); + } + } + Ok::<_, anyhow::Error>(out) + }).await? +} + +fn load_group_nickname_map_from_conn( + conn: &Connection, + chat_username: &str, + targets: Option<&HashSet>, +) -> HashMap { + if !chat_username.contains("@chatroom") { + return HashMap::new(); + } + let ext = load_group_ext_buffer(conn, chat_username); + + let owned_targets = if targets.is_none() { + load_group_member_username_set(conn, chat_username) + } else { + None + }; + let targets = targets.or(owned_targets.as_ref()); + + ext.as_deref() + .map(|buf| parse_group_nickname_map(buf, targets)) + .unwrap_or_default() +} + +fn load_group_ext_buffer( + conn: &Connection, + chat_username: &str, +) -> Option> { + [ + "SELECT ext_buffer FROM chat_room WHERE username = ? LIMIT 1", + "SELECT ext_buffer FROM chat_room WHERE chat_room_name = ? LIMIT 1", + "SELECT ext_buffer FROM chat_room WHERE name = ? LIMIT 1", + ].iter().find_map(|sql| { + conn.query_row(sql, [chat_username], |row| row.get::<_, Option>>(0)) + .ok() + .flatten() + }) +} + +fn load_group_member_username_set( + conn: &Connection, + chat_username: &str, +) -> Option> { + let room_id: i64 = [ + "SELECT id FROM chat_room WHERE username = ?", + "SELECT id FROM chat_room WHERE chat_room_name = ?", + "SELECT id FROM chat_room WHERE name = ?", + ].iter().find_map(|sql| { + conn.query_row(sql, [chat_username], |row| row.get::<_, i64>(0)).ok() + }).unwrap_or(0); + + if room_id == 0 { + return None; + } + + let mut stmt = conn.prepare( + "SELECT c.username + FROM chatroom_member cm + LEFT JOIN contact c ON c.id = cm.member_id + WHERE cm.room_id = ?" + ).ok()?; + let usernames: HashSet = stmt.query_map([room_id], |row| { + row.get::<_, String>(0) + }).ok()? + .filter_map(|r| r.ok()) + .filter(|uid| !uid.is_empty()) + .collect(); + + if usernames.is_empty() { None } else { Some(usernames) } +} + +fn decode_proto_varint(raw: &[u8], offset: usize) -> Option<(u64, usize)> { + let mut value = 0u64; + let mut shift = 0u32; + let mut pos = offset; + while pos < raw.len() { + let byte = raw[pos]; + pos += 1; + value |= u64::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some((value, pos)); + } + shift += 7; + if shift > 63 { + return None; + } + } + None +} + +fn proto_len_fields<'a>(raw: &'a [u8]) -> Vec<(u64, &'a [u8])> { + let mut fields = Vec::new(); + let mut idx = 0usize; + while idx < raw.len() { + let Some((tag, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + let field_no = tag >> 3; + let wire_type = tag & 0x07; + match wire_type { + 0 => { + let Some((_, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + } + 1 => { + let Some(next) = idx.checked_add(8) else { break; }; + if next > raw.len() { break; } + idx = next; + } + 2 => { + let Some((size, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + let Ok(size) = usize::try_from(size) else { break; }; + let Some(end) = idx.checked_add(size) else { break; }; + if end > raw.len() { break; } + fields.push((field_no, &raw[idx..end])); + idx = end; + } + 5 => { + let Some(next) = idx.checked_add(4) else { break; }; + if next > raw.len() { break; } + idx = next; + } + _ => break, + } + } + fields +} + +fn proto_string_fields(raw: &[u8]) -> Vec<(u64, String)> { + proto_len_fields(raw) + .into_iter() + .filter_map(|(field_no, value)| { + if value.is_empty() || value.len() > 256 { + return None; + } + let text = std::str::from_utf8(value).ok()?.trim().to_string(); + if text.is_empty() || text.chars().any(char::is_control) { + return None; + } + Some((field_no, text)) + }) + .collect() +} + +fn is_strong_username_hint(value: &str) -> bool { + value.starts_with("wxid_") + || value.ends_with("@chatroom") + || value.starts_with("gh_") + || value.contains('@') +} + +fn looks_like_username(value: &str) -> bool { + let value = value.trim(); + if value.is_empty() { + return false; + } + if is_strong_username_hint(value) { + return true; + } + if value.len() < 6 || value.len() > 32 || value.chars().any(char::is_whitespace) { + return false; + } + let mut chars = value.chars(); + let Some(first) = chars.next() else { return false; }; + first.is_ascii_alphabetic() + && chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') +} + +fn pick_member_username( + strings: &[(u64, String)], + targets: Option<&HashSet>, +) -> Option { + if let Some(targets) = targets { + return strings + .iter() + .find(|(_, value)| targets.contains(value)) + .map(|(_, value)| value.clone()); + } + + for field_no in [1u64, 4u64] { + if let Some((_, value)) = strings + .iter() + .find(|(f, value)| *f == field_no && looks_like_username(value)) + { + return Some(value.clone()); + } + } + + strings + .iter() + .find(|(_, value)| is_strong_username_hint(value)) + .or_else(|| strings.iter().find(|(_, value)| looks_like_username(value))) + .map(|(_, value)| value.clone()) +} + +fn pick_group_nickname(strings: &[(u64, String)], username: &str) -> Option { + let mut best_score = i64::MIN; + let mut best = String::new(); + + for (idx, (field_no, value)) in strings.iter().enumerate() { + let value = value.trim(); + if value.is_empty() + || value == username + || is_strong_username_hint(value) + || value.contains('\n') + || value.contains('\r') + || value.len() > 64 + { + continue; + } + + let mut score = 0i64; + if *field_no == 2 { + score += 100; + } + if !looks_like_username(value) { + score += 20; + } + score += (32usize.saturating_sub(value.len())) as i64; + score = score * 1000 - idx as i64; + + if score > best_score { + best_score = score; + best = value.to_string(); + } + } + + if best.is_empty() { None } else { Some(best) } +} + +fn parse_group_nickname_map( + ext_buffer: &[u8], + targets: Option<&HashSet>, +) -> HashMap { + let mut out = HashMap::new(); + if ext_buffer.is_empty() { + return out; + } + + for (_, chunk) in proto_len_fields(ext_buffer) { + let strings = proto_string_fields(chunk); + if strings.is_empty() { + continue; + } + let Some(username) = pick_member_username(&strings, targets) else { + continue; + }; + if out.contains_key(&username) { + continue; + } + if let Some(nickname) = pick_group_nickname(&strings, &username) { + out.insert(username, nickname); + } + } + + out +} + +fn contact_display( + uid: &str, + nick: &str, + remark: &str, + names_map: &HashMap, +) -> String { + if !remark.is_empty() { + remark.to_string() + } else if !nick.is_empty() { + nick.to_string() + } else { + names_map.get(uid).cloned().unwrap_or_else(|| uid.to_string()) + } +} + +fn sender_display( + username: &str, + fallback_sender_name: &str, + names: &HashMap, + group_nicknames: &HashMap, +) -> String { + if username.is_empty() { + return String::new(); + } + group_nicknames + .get(username) + .filter(|s| !s.is_empty()) + .cloned() + .or_else(|| names.get(username).cloned()) + .or_else(|| { + if fallback_sender_name.is_empty() { + None + } else { + Some(fallback_sender_name.to_string()) + } + }) + .unwrap_or_else(|| username.to_string()) +} + +fn group_top_senders( + sender_counts: &HashMap, + names: &HashMap, + group_nicknames: &HashMap, + limit: usize, +) -> Vec { + let mut top_senders: Vec = sender_counts.iter() + .map(|(username, count)| json!({ + "sender": sender_display(username, "", names, group_nicknames), + "count": count, + })) + .collect(); + top_senders.sort_by(|a, b| { + b["count"].as_i64().unwrap_or(0) + .cmp(&a["count"].as_i64().unwrap_or(0)) + .then_with(|| { + a["sender"].as_str().unwrap_or("") + .cmp(b["sender"].as_str().unwrap_or("")) + }) + }); + top_senders.truncate(limit); + top_senders +} + fn sender_label( real_sender_id: i64, content: &str, @@ -625,15 +1018,16 @@ fn sender_label( chat_username: &str, id2u: &HashMap, names: &HashMap, + group_nicknames: &HashMap, ) -> String { let sender_uname = id2u.get(&real_sender_id).cloned().unwrap_or_default(); if is_group { if !sender_uname.is_empty() && sender_uname != chat_username { - return names.get(&sender_uname).cloned().unwrap_or(sender_uname); + return sender_display(&sender_uname, "", names, group_nicknames); } if content.contains(":\n") { let raw = content.splitn(2, ":\n").next().unwrap_or(""); - return names.get(raw).cloned().unwrap_or_else(|| raw.to_string()); + return sender_display(raw, "", names, group_nicknames); } return String::new(); } @@ -904,6 +1298,7 @@ pub async fn q_unread( }).await??; let mut results = Vec::new(); + let mut group_nickname_cache: HashMap> = HashMap::new(); for (username, unread, summary_bytes, ts, msg_type, sender, sender_name) in rows { let chat_type = chat_type_of(&username, names); if let Some(ref set) = filter_set { @@ -916,9 +1311,13 @@ pub async fn q_unread( let summary = decompress_or_str(&summary_bytes); let summary = strip_group_prefix(&summary); let sender_display = if is_group && !sender.is_empty() { - names.map.get(&sender).cloned().unwrap_or_else(|| { - if !sender_name.is_empty() { sender_name.clone() } else { sender.clone() } - }) + if !group_nickname_cache.contains_key(&username) { + let nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); + group_nickname_cache.insert(username.clone(), nicknames); + } + let empty = HashMap::new(); + let group_nicknames = group_nickname_cache.get(&username).unwrap_or(&empty); + sender_display(&sender, &sender_name, &names.map, group_nicknames) } else { String::new() }; @@ -955,7 +1354,6 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result // 优先路径:contact.db → chatroom_member + chat_room(完整成员列表) if let Some(contact_p) = db.get("contact/contact.db").await? { let uname2 = username.clone(); - let display2 = display.clone(); let names_map2 = names_map.clone(); let members_opt: Option> = tokio::task::spawn_blocking(move || { @@ -1008,12 +1406,31 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result return Ok(None); } + let target_usernames: HashSet = raw.iter() + .map(|(uid, _, _)| uid.clone()) + .collect(); + let group_nicknames = load_group_nickname_map_from_conn( + &conn, + &uname2, + Some(&target_usernames), + ); + let mut members: Vec = raw.iter().map(|(uid, nick, remark)| { - let disp = if !remark.is_empty() { remark.clone() } - else if !nick.is_empty() { nick.clone() } - else { names_map2.get(uid).cloned().unwrap_or_else(|| uid.clone()) }; + let contact_display = contact_display(uid, nick, remark, &names_map2); + let group_nickname = group_nicknames.get(uid).cloned().unwrap_or_default(); + let disp = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; let is_owner = uid == &owner && !owner.is_empty(); - json!({ "username": uid, "display": disp, "is_owner": is_owner }) + json!({ + "username": uid, + "display": disp, + "contact_display": contact_display, + "group_nickname": group_nickname, + "is_owner": is_owner, + }) }).collect(); // 群主排首位,其余按 display 字典序 @@ -1024,7 +1441,6 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result a["display"].as_str().unwrap_or("").cmp(b["display"].as_str().unwrap_or("")) }); - let _ = display2; // 不在此 closure 内使用 Ok(Some(members)) }).await??; @@ -1075,10 +1491,20 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result sender_set.extend(senders); } + let group_nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); let mut members: Vec = sender_set.iter().map(|u| { + let contact_display = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); + let group_nickname = group_nicknames.get(u).cloned().unwrap_or_default(); + let display = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; json!({ "username": u, - "display": names_map.get(u).cloned().unwrap_or_else(|| u.clone()), + "display": display, + "contact_display": contact_display, + "group_nickname": group_nickname, "is_owner": false, }) }).collect(); @@ -1163,6 +1589,11 @@ pub async fn q_new_messages( let display = names.display(uname); let chat_type = chat_type_of(uname, names); let is_group = chat_type == "group"; + let group_nicknames = if is_group { + load_group_nicknames(db, uname).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); @@ -1170,6 +1601,7 @@ pub async fn q_new_messages( let uname2 = uname.clone(); let display2 = display.clone(); let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); let tname_for_log = tname.clone(); let msgs: Vec = match tokio::task::spawn_blocking(move || { @@ -1201,7 +1633,7 @@ pub async fn q_new_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map); + let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2); let text = fmt_content(local_id, local_type, &content, is_group); result.push(json!({ "chat": display2, @@ -1376,13 +1808,17 @@ pub async fn q_stats( let mut type_counts: HashMap = HashMap::new(); let mut sender_counts: HashMap = HashMap::new(); let mut hour_counts = [0i64; 24]; + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); let tname = table_name.clone(); let uname = username.clone(); let is_group2 = is_group; - let names_map = names.map.clone(); // 用 SQL GROUP BY 在数据库侧聚合,避免把全量消息内容加载进内存 let result: (i64, HashMap, HashMap, [i64; 24]) = @@ -1469,8 +1905,7 @@ pub async fn q_stats( for (id, cnt) in rows.flatten() { if let Some(u) = id2u.get(&id) { if u != &uname { - let name = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); - *sender_c.entry(name).or_insert(0) += cnt; + *sender_c.entry(u.clone()).or_insert(0) += cnt; } } } @@ -1495,11 +1930,7 @@ pub async fn q_stats( by_type.sort_by_key(|v| std::cmp::Reverse(v["count"].as_i64().unwrap_or(0))); // 发言排行,Top 10 - let mut top_senders: Vec = sender_counts.iter() - .map(|(s, c)| json!({ "sender": s, "count": c })) - .collect(); - top_senders.sort_by_key(|v| std::cmp::Reverse(v["count"].as_i64().unwrap_or(0))); - top_senders.truncate(10); + let top_senders = group_top_senders(&sender_counts, &names.map, &group_nicknames, 10); // 24小时分布 let by_hour: Vec = hour_counts.iter().enumerate() @@ -2001,6 +2432,104 @@ pub async fn q_sns_search( Ok(json!({ "keyword": keyword, "posts": posts, "total": total })) } +#[cfg(test)] +mod group_nickname_tests { + use super::*; + + fn varint(mut value: u64) -> Vec { + let mut out = Vec::new(); + loop { + let mut byte = (value & 0x7f) as u8; + value >>= 7; + if value != 0 { + byte |= 0x80; + } + out.push(byte); + if value == 0 { + return out; + } + } + } + + fn len_field(field_no: u64, bytes: &[u8]) -> Vec { + let mut out = varint((field_no << 3) | 2); + out.extend(varint(bytes.len() as u64)); + out.extend(bytes); + out + } + + fn string_field(field_no: u64, value: &str) -> Vec { + len_field(field_no, value.as_bytes()) + } + + fn member_chunk(username: &str, group_nickname: &str) -> Vec { + let mut member = Vec::new(); + member.extend(string_field(1, username)); + member.extend(string_field(2, group_nickname)); + len_field(1, &member) + } + + #[test] + fn parses_group_nickname_member_chunks() { + let mut ext_buffer = Vec::new(); + ext_buffer.extend(member_chunk("wxid_alice", "Alice In Group")); + ext_buffer.extend(member_chunk("bob_123456", "Bob Card")); + + let nicknames = parse_group_nickname_map(&ext_buffer, None); + + assert_eq!( + nicknames.get("wxid_alice").map(String::as_str), + Some("Alice In Group") + ); + assert_eq!( + nicknames.get("bob_123456").map(String::as_str), + Some("Bob Card") + ); + } + + #[test] + fn target_filter_anchors_member_username_choice() { + let mut member = Vec::new(); + member.extend(string_field(3, "candidate_name")); + member.extend(string_field(4, "wxid_target")); + member.extend(string_field(2, "Target Card")); + let ext_buffer = len_field(1, &member); + let targets = HashSet::from(["wxid_target".to_string()]); + + let nicknames = parse_group_nickname_map(&ext_buffer, Some(&targets)); + + assert_eq!( + nicknames.get("wxid_target").map(String::as_str), + Some("Target Card") + ); + assert!(!nicknames.contains_key("candidate_name")); + } + + #[test] + fn group_top_senders_keeps_duplicate_display_names_separate() { + let sender_counts = HashMap::from([ + ("wxid_alice".to_string(), 7), + ("wxid_bob".to_string(), 3), + ]); + let names = HashMap::from([ + ("wxid_alice".to_string(), "Alice Contact".to_string()), + ("wxid_bob".to_string(), "Bob Contact".to_string()), + ]); + let group_nicknames = HashMap::from([ + ("wxid_alice".to_string(), "同名".to_string()), + ("wxid_bob".to_string(), "同名".to_string()), + ]); + + let top = group_top_senders(&sender_counts, &names, &group_nicknames, 10); + + assert_eq!(top.len(), 2); + assert_eq!(top[0]["sender"].as_str(), Some("同名")); + assert_eq!(top[0]["count"].as_i64(), Some(7)); + assert_eq!(top[1]["sender"].as_str(), Some("同名")); + assert_eq!(top[1]["count"].as_i64(), Some(3)); + } +} + #[cfg(test)] mod sns_tests { use super::*; From b0431352ce009f8646bd18256e22884b718d5ed3 Mon Sep 17 00:00:00 2001 From: Haoqing Wang <78337154+hqhq1025@users.noreply.github.com> Date: Thu, 14 May 2026 14:42:03 +0800 Subject: [PATCH 3/6] =?UTF-8?q?feat(appmsg):=20=E6=94=AF=E6=8C=81=E5=BC=95?= =?UTF-8?q?=E7=94=A8=E6=B6=88=E6=81=AF=E5=8E=9F=E6=96=87=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=20(#28)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(appmsg): parse quoted message content * docs(appmsg): document quote message output --- README.md | 9 ++ SKILL.md | 9 ++ src/daemon/query.rs | 327 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 317 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index d084301..bec8f65 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,15 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 +引用消息会在 `history` / `search` / `new-messages` 输出中显示当前回复和被引用原文: + +```text +[引用] 当前回复 + ↳ 发送者: 被引用内容 +``` + +`--type link` / `--type file` 会包含微信 appmsg 里的链接、文件、合并聊天记录和引用消息等变体;搜索时也会匹配解压后可见的引用原文。 + ### 朋友圈(SNS) 三个独立命令,区分"通知"和"帖子": diff --git a/SKILL.md b/SKILL.md index 386816f..ec02ce8 100644 --- a/SKILL.md +++ b/SKILL.md @@ -140,6 +140,15 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 +引用消息(appmsg `type=57`)在 `history` / `search` / `new-messages` 输出里会展开为两行:第一行是当前回复,第二行以 `↳` 开头显示被引用原文,例如: + +```text +[引用] 当前回复 + ↳ 发送者: 被引用内容 +``` + +`--type link` / `--type file` 会覆盖微信 appmsg 的链接、文件、合并聊天记录和引用消息等变体;`search --type link` 也会匹配解压并格式化后的引用原文。 + ### 联系人与群组 ```bash diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 041ff0b..0c4b106 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -500,19 +500,18 @@ fn query_messages( let conn = Connection::open(db_path)?; let id2u = load_id2u(&conn); - let mut clauses = Vec::new(); + let mut clauses: Vec = Vec::new(); let mut params: Vec> = Vec::new(); if let Some(s) = since { - clauses.push("create_time >= ?"); + clauses.push("create_time >= ?".into()); params.push(Box::new(s)); } if let Some(u) = until { - clauses.push("create_time <= ?"); + clauses.push("create_time <= ?".into()); params.push(Box::new(u)); } if let Some(t) = msg_type { - clauses.push("local_type = ?"); - params.push(Box::new(t)); + push_msg_type_filter(&mut clauses, &mut params, t); } let where_clause = if clauses.is_empty() { String::new() @@ -579,8 +578,14 @@ fn search_in_table( let id2u = load_id2u(conn); // 转义 LIKE 通配符,使用 '\' 作为 ESCAPE 字符 let escaped_kw = keyword.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_"); - let mut clauses = vec!["message_content LIKE ? ESCAPE '\\'".to_string()]; - let mut params: Vec> = vec![Box::new(format!("%{}%", escaped_kw))]; + let search_decoded_content = msg_type == Some(49); + let keyword_lower = keyword.to_lowercase(); + let mut clauses: Vec = Vec::new(); + let mut params: Vec> = Vec::new(); + if !search_decoded_content { + clauses.push("message_content LIKE ? ESCAPE '\\'".to_string()); + params.push(Box::new(format!("%{}%", escaped_kw))); + } if let Some(s) = since { clauses.push("create_time >= ?".into()); params.push(Box::new(s)); @@ -590,17 +595,23 @@ fn search_in_table( params.push(Box::new(u)); } if let Some(t) = msg_type { - clauses.push("local_type = ?".into()); - params.push(Box::new(t)); + push_msg_type_filter(&mut clauses, &mut params, t); } - let where_clause = format!("WHERE {}", clauses.join(" AND ")); + let where_clause = if clauses.is_empty() { + String::new() + } else { + format!("WHERE {}", clauses.join(" AND ")) + }; + let limit_clause = if search_decoded_content { "" } else { " LIMIT ?" }; let sql = format!( "SELECT local_id, local_type, create_time, real_sender_id, message_content, WCDB_CT_message_content - FROM [{}] {} ORDER BY create_time DESC LIMIT ?", - table, where_clause + FROM [{}] {} ORDER BY create_time DESC{}", + table, where_clause, limit_clause ); - params.push(Box::new(limit as i64)); + if !search_decoded_content { + params.push(Box::new(limit as i64)); + } let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; @@ -622,6 +633,9 @@ fn search_in_table( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); + if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) { + continue; + } result.push(json!({ "timestamp": ts, @@ -631,10 +645,32 @@ fn search_in_table( "content": text, "type": fmt_type(local_type), })); + if search_decoded_content && result.len() >= limit { + break; + } } Ok(result) } +fn push_msg_type_filter( + clauses: &mut Vec, + params: &mut Vec>, + msg_type: i64, +) { + clauses.push("(local_type & 4294967295) = ?".into()); + params.push(Box::new(msg_type)); +} + +fn matches_search_text(raw: &str, formatted: &str, keyword: &str, keyword_lower: &str) -> bool { + contains_search_text(raw, keyword, keyword_lower) + || contains_search_text(formatted, keyword, keyword_lower) +} + +fn contains_search_text(haystack: &str, keyword: &str, keyword_lower: &str) -> bool { + haystack.contains(keyword) + || (!keyword_lower.is_empty() && haystack.to_lowercase().contains(keyword_lower)) +} + fn load_id2u(conn: &Connection) -> HashMap { let mut map = HashMap::new(); if let Ok(mut stmt) = conn.prepare("SELECT rowid, user_name FROM Name2Id") { @@ -1163,21 +1199,8 @@ fn parse_appmsg(text: &str) -> Option { match atype.as_str() { "6" => Some(if !title.is_empty() { format!("[文件] {}", title) } else { "[文件]".into() }), "57" => { - let ref_content = extract_xml_text(text, "content") - .map(|s| { - // content 可能是 HTML 转义的 XML(被引用的消息是 appmsg 时) - let unescaped = unescape_html(&s); - // 如果解转义后是 XML,尝试递归解析 - if unescaped.contains(">().join(" "); - if s.chars().count() > 40 { - format!("{}...", s.chars().take(40).collect::()) - } else { s } - }) + let ref_content = quote_refermsg_content(text) + .or_else(|| extract_xml_text(text, "content").and_then(|s| quote_content_text(&s, 40))) .unwrap_or_default(); let quote = if !title.is_empty() { format!("[引用] {}", title) } else { "[引用]".into() }; if !ref_content.is_empty() { @@ -1191,6 +1214,56 @@ fn parse_appmsg(text: &str) -> Option { } } +fn quote_refermsg_content(text: &str) -> Option { + let refer = extract_xml_text(text, "refermsg")?; + let content = extract_xml_text(&refer, "content") + .and_then(|s| quote_content_text(&s, 80)) + .or_else(|| { + extract_xml_text(&refer, "type") + .and_then(|t| quote_refermsg_type_label(&t).map(str::to_string)) + })?; + match extract_xml_text(&refer, "displayname") { + Some(name) if !name.is_empty() => Some(format!("{}: {}", name, content)), + _ => Some(content), + } +} + +fn quote_content_text(raw: &str, max_chars: usize) -> Option { + let unescaped = unescape_html(raw); + if unescaped.contains(" Option<&'static str> { + match t { + "1" => None, + "3" => Some("[图片]"), + "34" => Some("[语音]"), + "43" => Some("[视频]"), + "47" => Some("[表情]"), + "49" => Some("[链接/文件]"), + _ => None, + } +} + +fn collapse_text(text: &str, max_chars: usize) -> String { + let collapsed = text.split_whitespace().collect::>().join(" "); + if collapsed.chars().count() > max_chars { + format!("{}...", collapsed.chars().take(max_chars).collect::()) + } else { + collapsed + } +} + fn extract_xml_text(xml: &str, tag: &str) -> Option { let open = format!("<{}>", tag); let close = format!("", tag); @@ -1223,6 +1296,204 @@ fn unescape_html(s: &str) -> String { .replace("'", "'") } +#[cfg(test)] +mod appmsg_tests { + use super::*; + + #[test] + fn parse_quote_appmsg_reads_refermsg_content() { + let xml = r#" + + + 我也没有用ai啊 + 57 + + + 1 + 不再熬夜 + 昨天用 claude 爬小红书数据来着 + + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn query_messages_filters_appmsg_by_base_type() { + let path = temp_db_path("query_messages_filters_appmsg_by_base_type"); + { + let conn = Connection::open(&path).expect("open temp db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content TEXT, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#, + 0_i64 + ], + ) + .expect("insert quote message"); + } + + let rows = query_messages( + &path, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + None, + None, + Some(49), + 10, + 0, + ) + .expect("query messages"); + + let _ = std::fs::remove_file(&path); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn search_in_table_filters_appmsg_by_base_type() { + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content TEXT, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#, + 0_i64 + ], + ) + .expect("insert quote message"); + + let rows = search_in_table( + &conn, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + "claude", + None, + None, + Some(49), + 10, + ) + .expect("search messages"); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn search_in_table_matches_decompressed_formatted_appmsg_content() { + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content BLOB, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + let xml = r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#; + let compressed = zstd::encode_all(xml.as_bytes(), 0).expect("compress appmsg xml"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + compressed, + 4_i64 + ], + ) + .expect("insert compressed quote message"); + + let rows = search_in_table( + &conn, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + "claude", + None, + None, + Some(49), + 10, + ) + .expect("search messages"); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + fn temp_db_path(name: &str) -> std::path::PathBuf { + let unique = format!( + "wx-cli-{}-{}-{}.db", + name, + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock before unix epoch") + .as_nanos() + ); + std::env::temp_dir().join(unique) + } +} + fn fmt_time(ts: i64, fmt: &str) -> String { Local.timestamp_opt(ts, 0) .single() From 1b00d04598017392994fc09ea1009a7de4b5c990 Mon Sep 17 00:00:00 2001 From: Tsing Date: Thu, 14 May 2026 14:46:34 +0800 Subject: [PATCH 4/6] feat: expose url field for link/appmsg messages (#18) * feat: expose url field for link/appmsg messages Extract from appmsg XML in type-49 messages and append it as a 'url' field in history/search output. The field is omitted when the message has no valid URL (non-link types, empty, non-http). * fix: normalize appmsg urls across query outputs --------- Co-authored-by: tsinghu Co-authored-by: jackwener --- src/daemon/query.rs | 123 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 6 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 0c4b106..8aa14c1 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -549,15 +549,20 @@ fn query_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "sender": sender, "content": text, "type": fmt_type(local_type), "local_id": local_id, - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -636,15 +641,20 @@ fn search_in_table( if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) { continue; } + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "chat": "", "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); if search_decoded_content && result.len() >= limit { break; } @@ -1273,6 +1283,37 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { + if (local_type as u64 & 0xFFFFFFFF) != 49 { + return None; + } + extract_appmsg_url(content) +} + +fn strip_xml_cdata(s: &str) -> &str { + s.strip_prefix("")) + .unwrap_or(s) +} + +/// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) +fn extract_appmsg_url(text: &str) -> Option { + let xml = strip_group_prefix(text); + if !xml.contains(" Option { let open = format!("<{}", tag); let start = xml.find(&open)?; @@ -1906,7 +1947,8 @@ pub async fn q_new_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2); let text = fmt_content(local_id, local_type, &content, is_group); - result.push(json!({ + let url = appmsg_url_for_message(local_type, &content); + let mut msg = json!({ "chat": display2, "username": uname2, "is_group": is_group, @@ -1916,7 +1958,11 @@ pub async fn q_new_messages( "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok::<_, anyhow::Error>(result) }).await { @@ -2926,6 +2972,71 @@ mod sns_tests { assert_eq!(escape_like_pattern(""), ""); } + #[test] + fn extract_appmsg_url_unescapes_html_entities() { + let xml = concat!( + "", + "5", + "https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1") + ); + } + + #[test] + fn extract_appmsg_url_strips_group_prefix_and_cdata() { + let xml = concat!( + "wxid_sender:\n", + "", + "5", + "", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/x?a=1&b=2") + ); + } + + #[test] + fn extract_appmsg_url_falls_back_to_url1() { + let xml = concat!( + "", + "5", + "https://example.com/fallback", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/fallback") + ); + } + + #[test] + fn extract_appmsg_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://bizmsgmenu?msgmenucontent=foo", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_appmsg_url_ignores_refermsg() { + let xml = concat!( + "", + "57", + "https://example.com/nested", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") } From 9d5a78ac04b74d7210819f26793ec9569a4dadef Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 15:13:50 +0800 Subject: [PATCH 5/6] docs(macOS): document TCC csreq invalidation after re-signing WeChat (#48) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macOS TCC binds permissions to (bundle id, csreq) where csreq encodes the app's code signature. `codesign --force --deep --sign -` on WeChat changes the csreq, silently invalidating every existing TCC grant for com.tencent.xinWeChat — yet System Settings still paints each toggle as ON because the UI only checks bundle id, hiding the drift. WeChat then reprompts for screen recording / camera / microphone / file access despite "looking allowed". Three doc-only updates, no code changes: - README.md quick start: add the `tccutil reset` loop right after the codesign step, plus a one-line callout pointing at the deep-dive section. - SKILL.md macOS init flow: same loop in the agent-readable order, so agents executing the steps don't skip it. - docs/macos-permission-guide.md: new section 五 with first-principles root cause, the reset loop, the macOS 26 "录屏与系统录音 / 仅系统 录音" UI split footgun, and ad-hoc signature verification. Builds on the BobbyCat PR #29 — keeps the symptom description and the macOS 26 UI split note, expands scope from ScreenCapture-only to all TCC services that re-signing actually breaks (Camera / Microphone / AppleEvents / AddressBook / Documents / Downloads / Desktop), drops the misleading TCC.db sqlite query (path varies by macOS version, can need FDA, and is no more useful than just trying WeChat's screenshot again), and explicitly leaves the reset as a manual step rather than auto-running it from `wx init` because it would wipe currently-working grants. Co-authored-by: BobbyCat <114374951+BobbyCats@users.noreply.github.com> --- README.md | 12 +++++- SKILL.md | 23 +++++++++- docs/macos-permission-guide.md | 76 ++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bec8f65..b9783ed 100644 --- a/README.md +++ b/README.md @@ -100,10 +100,16 @@ cargo build --release # 1. 签名(只需做一次,WeChat 更新后重做) codesign --force --deep --sign - /Applications/WeChat.app -# 2. 重启微信,等待完全登录 +# 2. 清理旧 TCC 授权记录(重签名后必做,否则微信截图/通话权限可能 silent 失效) +for s in ScreenCapture Camera Microphone AppleEvents AddressBook \ + SystemPolicyDocumentsFolder SystemPolicyDownloadsFolder SystemPolicyDesktopFolder; do + tccutil reset "$s" com.tencent.xinWeChat +done + +# 3. 重启微信,等待完全登录 killall WeChat && open /Applications/WeChat.app -# 3. 初始化 +# 4. 初始化 sudo wx init ``` @@ -112,6 +118,8 @@ sudo wx init > codesign --remove-signature "/Applications/WeChat.app/Contents/Frameworks/vlc_plugins/librtp_mpeg4_plugin.dylib" > codesign --force --deep --sign - /Applications/WeChat.app > ``` +> +> 重签名后 macOS 的 TCC 隐私授权按新 code signature 重新校验,旧记录会失效。如果跳过 `tccutil reset`,微信截图/视频通话/麦克风等权限可能"看起来已开启但实际拒绝"。详见 [macOS 权限与签名指南](docs/macos-permission-guide.md#五重签名后微信权限-silent-失效)。 **Linux** diff --git a/SKILL.md b/SKILL.md index ec02ce8..7d587af 100644 --- a/SKILL.md +++ b/SKILL.md @@ -66,14 +66,33 @@ codesign --remove-signature "/Applications/WeChat.app/Contents/Frameworks/vlc_pl codesign --force --deep --sign - /Applications/WeChat.app ``` -**第二步:重启 WeChat** +**第二步:清理 WeChat 在 macOS TCC 隐私数据库里的旧授权记录**(重签名后必做) + +macOS TCC 按 `bundle id + csreq` 联合校验权限;csreq 编码自代码签名。重签名后旧 csreq 和新签名不再匹配,旧授权记录会 silent 失效(System Settings 仍把开关画成"已允许",运行时实际拒绝)。把 WeChat 在 TCC 里的旧记录抹掉,让 macOS 在下次微信请求权限时按新签名重新生成 csreq: + +```bash +tccutil reset ScreenCapture com.tencent.xinWeChat # 截图 / 屏幕共享 +tccutil reset Camera com.tencent.xinWeChat # 视频通话 / 扫码 +tccutil reset Microphone com.tencent.xinWeChat # 语音消息 / 通话 +tccutil reset AppleEvents com.tencent.xinWeChat # 自动化 / 输入法 +tccutil reset AddressBook com.tencent.xinWeChat # 通讯录 +tccutil reset SystemPolicyDocumentsFolder com.tencent.xinWeChat +tccutil reset SystemPolicyDownloadsFolder com.tencent.xinWeChat +tccutil reset SystemPolicyDesktopFolder com.tencent.xinWeChat +``` + +`tccutil` 对没有授权过的 service 会报 "No such bundle identifier",是 no-op,不影响其他 service 的 reset。 + +**第三步:重启 WeChat** ```bash killall WeChat && open /Applications/WeChat.app # 等待微信完全登录后再继续 ``` -**第三步:初始化** +之后微信触发权限请求时按 GUI 提示重新允许即可。在 macOS 26 上,把 WeChat 加进 **隐私与安全 → 录屏与系统录音** 的上半区,**不要**只勾下半区的"仅系统录音"——后者不能授予截图权限。 + +**第四步:初始化** ```bash sudo wx init diff --git a/docs/macos-permission-guide.md b/docs/macos-permission-guide.md index e5ee463..322cb90 100644 --- a/docs/macos-permission-guide.md +++ b/docs/macos-permission-guide.md @@ -196,3 +196,79 @@ open /Applications/WeChat.app | "SIP 阻止了调试微信" | ❌ SIP 只保护系统进程,微信不受 SIP 保护 | | "加了 sshd 到 FDA 就行" | ❌ 还需要加 `sshd-keygen-wrapper`,且要重连 SSH | | "微信开着也能重签名" | ❌ 运行中的 binary/dylib 被占用,codesign 会失败 | + +--- + +## 五、重签名后微信权限 silent 失效 + +### 现象 + +完成 ad-hoc 重签名后,微信任意以下功能都可能"看起来已授权但实际被拒绝": + +- 截图 / 屏幕共享(`ScreenCapture`) +- 视频通话 / 扫码(`Camera`) +- 语音消息 / 通话(`Microphone`) +- 自动化、第三方输入法(`AppleEvents`) +- 同步通讯录(`AddressBook`) +- 文件发送 / 接收(`SystemPolicyDocumentsFolder` / `Downloads` / `Desktop`) + +System Settings 里通常仍看到"微信.app"开关是 ON,但运行时权限校验失败。微信会反复弹"需要开启 X 权限"。 + +### 根因(第一性原理) + +macOS TCC(Transparency, Consent, and Control)按 **bundle id + csreq** 联合校验权限。`csreq`(code requirement)是从 app 的 code signature 推导出的二进制 blob,存在 `/Library/Application Support/com.apple.TCC/TCC.db` 的 `access` 表里,每条 ~160 字节。 + +`codesign --force --deep --sign -` 把 WeChat 从官方签名换成 ad-hoc 签名(甚至 ad-hoc → ad-hoc 重签也会变),新进程的 csreq 跟旧记录里那条对不上 —— tccd 拒绝。 + +System Settings UI 只按 client 显示开关、不重算 csreq,所以视觉上是"已授权",运行时实际拒绝。这是 silent drift。 + +### 修复步骤 + +把 WeChat 在 TCC 里的旧记录全部抹掉,让 macOS 在下次微信请求权限时按新签名重新生成 csreq: + +```bash +for s in ScreenCapture Camera Microphone AppleEvents AddressBook \ + SystemPolicyDocumentsFolder SystemPolicyDownloadsFolder SystemPolicyDesktopFolder; do + tccutil reset "$s" com.tencent.xinWeChat +done +``` + +`tccutil` 对没有授权过的 service 会报 "No such bundle identifier",这是 no-op,不影响其他 service 的 reset。 + +之后退出并重新打开微信,按 GUI 提示重新允许: + +```bash +killall WeChat +open /Applications/WeChat.app +``` + +> 这一步**应当由用户/agent 手动执行**,不在 `wx init` 里自动跑——TCC 重置会让用户的现有授权失效,需要由人决定时机。 + +#### macOS 26 的 UI 拆分 + +在 macOS 26 上,**隐私与安全 → 录屏与系统录音** 显示为两块,容易踩坑: + +| 区域 | 作用 | +|------|------| +| **录屏与系统录音**(上半区) | 录制屏幕内容 + 系统音频;微信截图、屏幕共享需要这一项 | +| **仅系统录音**(下半区) | 只录系统音频;只打开这一项**不能**修复微信截图 | + +把 WeChat 加进上半区;只勾下半区的"仅系统录音"无效。 + +### 验证 + +确认 WeChat 当前是 ad-hoc 签名(这是修复前提): + +```bash +codesign -dv --verbose=4 /Applications/WeChat.app 2>&1 | grep -E "Signature|flags|TeamIdentifier" +``` + +期望看到: + +```text +flags=0x2(adhoc) +Signature=adhoc +TeamIdentifier=not set +``` + +最直接的功能验证:在微信里使用截图、视频通话、麦克风等功能,按 GUI 弹窗的"允许"重新授权一次,之后正常工作。 From c284b4ade6690c6c4e5d6559db1d5f3f8d380450 Mon Sep 17 00:00:00 2001 From: Haoqing Wang <78337154+hqhq1025@users.noreply.github.com> Date: Thu, 14 May 2026 15:29:01 +0800 Subject: [PATCH 6/6] fix: parse appmsg subtypes from type 49 messages (#24) --- src/daemon/query.rs | 200 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 2 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 8aa14c1..2d33e97 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -1202,8 +1202,25 @@ fn parse_sysmsg(xml: &str) -> Option { } fn parse_appmsg(text: &str) -> Option { - // 简单 XML 解析,避免引入重量级 XML 库(或直接用 minidom) - // 这里用基本字符串搜索实现 + if let Some(parsed) = parse_appmsg_dom(text) { + return Some(parsed); + } + parse_appmsg_legacy(text) +} + +fn parse_appmsg_dom(text: &str) -> Option { + let doc = Document::parse(text).ok()?; + let appmsg = doc.descendants().find(|node| node.has_tag_name("appmsg"))?; + let title = xml_text(xml_child(appmsg, "title")).unwrap_or_default(); + let atype = xml_text(xml_child(appmsg, "type")).unwrap_or_default(); + match atype.as_str() { + "6" => Some(format_file_appmsg(appmsg, &title)), + "19" => Some(format_record_appmsg(appmsg, &title)), + _ => None, + } +} + +fn parse_appmsg_legacy(text: &str) -> Option { let title = extract_xml_text(text, "title")?; let atype = extract_xml_text(text, "type").unwrap_or_default(); match atype.as_str() { @@ -1224,6 +1241,119 @@ fn parse_appmsg(text: &str) -> Option { } } +fn format_file_appmsg<'a, 'input>(appmsg: Node<'a, 'input>, title: &str) -> String { + let mut meta = Vec::new(); + if let Some(size) = xml_child(appmsg, "appattach") + .and_then(|attach| xml_text(xml_child(attach, "totallen"))) + .and_then(|value| value.parse::().ok()) + .filter(|size| *size > 0) + { + meta.push(format_byte_size(size)); + } + if let Some(ext) = xml_child(appmsg, "appattach") + .and_then(|attach| xml_text(xml_child(attach, "fileext"))) + .filter(|ext| !ext.is_empty()) + { + meta.push(ext); + } + + let base = if !title.is_empty() { + format!("[文件] {}", title) + } else { + "[文件]".into() + }; + if meta.is_empty() { + base + } else { + format!("{} ({})", base, meta.join(", ")) + } +} + +fn format_record_appmsg<'a, 'input>(appmsg: Node<'a, 'input>, title: &str) -> String { + let items = record_item_lines(appmsg); + let mut header = if !title.is_empty() { + format!("[合并聊天记录] {}", title) + } else { + "[合并聊天记录]".into() + }; + if !items.is_empty() { + header.push_str(&format!(" ({}条)", items.len())); + } + + let mut lines = vec![header]; + if items.is_empty() { + if let Some(desc) = xml_text(xml_child(appmsg, "des")).filter(|desc| !desc.is_empty()) { + lines.push(format!(" {}", collapse_text(&desc, 120))); + } + } else { + for item in items.iter().take(10) { + lines.push(format!(" - {}", item)); + } + if items.len() > 10 { + lines.push(format!(" - ... 还有{}条", items.len() - 10)); + } + } + lines.join("\n") +} + +fn record_item_lines<'a, 'input>(appmsg: Node<'a, 'input>) -> Vec { + let mut lines = record_item_lines_from_node(appmsg); + if !lines.is_empty() { + return lines; + } + + let Some(record_xml) = xml_text(xml_child(appmsg, "recorditem")).filter(|value| !value.is_empty()) else { + return Vec::new(); + }; + let unescaped = unescape_html(&record_xml); + for candidate in [&record_xml, &unescaped] { + if let Ok(doc) = Document::parse(candidate) { + lines = record_item_lines_from_node(doc.root_element()); + if !lines.is_empty() { + break; + } + } + } + lines +} + +fn record_item_lines_from_node<'a, 'input>(node: Node<'a, 'input>) -> Vec { + node.descendants() + .filter(|child| child.has_tag_name("dataitem")) + .filter_map(format_record_item) + .collect() +} + +fn format_record_item<'a, 'input>(item: Node<'a, 'input>) -> Option { + let name = first_child_text(item, &["sourcename", "datasrcname", "sourceusername"]); + let desc = first_child_text(item, &["datadesc", "datatitle", "datafmt"]) + .or_else(|| item.attribute("datatype").and_then(record_datatype_label).map(str::to_string))?; + let desc = collapse_text(&desc, 100); + if let Some(name) = name.filter(|value| !value.is_empty()) { + Some(format!("{}: {}", name, desc)) + } else { + Some(desc) + } +} + +fn first_child_text<'a, 'input>(node: Node<'a, 'input>, tags: &[&str]) -> Option { + tags.iter() + .find_map(|tag| xml_text(xml_child(node, tag))) + .filter(|value| !value.is_empty()) +} + +fn record_datatype_label(datatype: &str) -> Option<&'static str> { + match datatype { + "1" => Some("[文本]"), + "2" => Some("[图片]"), + "3" => Some("[语音]"), + "4" => Some("[视频]"), + "6" => Some("[文件]"), + "17" => Some("[链接]"), + _ => None, + } +} + fn quote_refermsg_content(text: &str) -> Option { let refer = extract_xml_text(text, "refermsg")?; let content = extract_xml_text(&refer, "content") @@ -1274,6 +1404,30 @@ fn collapse_text(text: &str, max_chars: usize) -> String { } } +fn format_byte_size(bytes: u64) -> String { + const KB: f64 = 1024.0; + const MB: f64 = KB * 1024.0; + const GB: f64 = MB * 1024.0; + let bytes_f = bytes as f64; + if bytes_f >= GB { + format_decimal_unit(bytes_f / GB, "GB") + } else if bytes_f >= MB { + format_decimal_unit(bytes_f / MB, "MB") + } else if bytes_f >= KB { + format_decimal_unit(bytes_f / KB, "KB") + } else { + format!("{} B", bytes) + } +} + +fn format_decimal_unit(value: f64, unit: &str) -> String { + let mut s = format!("{:.1}", value); + if s.ends_with(".0") { + s.truncate(s.len() - 2); + } + format!("{} {}", s, unit) +} + fn extract_xml_text(xml: &str, tag: &str) -> Option { let open = format!("<{}>", tag); let close = format!("", tag); @@ -1341,6 +1495,48 @@ fn unescape_html(s: &str) -> String { mod appmsg_tests { use super::*; + #[test] + fn parse_forwarded_chat_record_expands_record_items() { + let xml = r#" + + + 群聊的聊天记录 + 张三: 早上好 +李四: 收到 + 19 + <recordinfo><datalist count="2"><dataitem datatype="1"><sourcename>张三</sourcename><sourcetime>1710000000</sourcetime><datadesc>早上好 &amp; coffee</datadesc></dataitem><dataitem datatype="2"><sourcename>李四</sourcename><sourcetime>1710000060</sourcetime><datafmt>图片</datafmt><datadesc>[图片]</datadesc></dataitem></datalist></recordinfo> + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[合并聊天记录] 群聊的聊天记录 (2条)\n - 张三: 早上好 & coffee\n - 李四: [图片]") + ); + } + + #[test] + fn parse_file_appmsg_includes_attachment_metadata() { + let xml = r#" + + + report.pdf + 6 + + 1536 + pdf + + abcdef123456 + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[文件] report.pdf (1.5 KB, pdf)") + ); + } + #[test] fn parse_quote_appmsg_reads_refermsg_content() { let xml = r#"