From 94fcc36ffef594f2a740da6bad24ec7c758e1215 Mon Sep 17 00:00:00 2001 From: jakevin Date: Tue, 19 May 2026 01:44:03 +0800 Subject: [PATCH] feat(attachments): expose stable group sender identity (#77) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `q_attachments` 群聊场景下两个昵称同名的成员,原本只输出 `sender` 字段(取群名片),无法在 JSON 消费侧区分谁发的图。 跟 #68 把 `sender_username / sender_contact_display / sender_group_nickname` 一起追加到 attachment row 上,复用 PR68 引入的 `add_sender_identity` / `sender_username` helper, 保持 4 处出口 (history / search / new-messages / stats.top_senders) + attachments 的字段语义完全一致。 调整: - `q_attachments` 元组从 7 字段扩到 8 字段(多带一个稳定 wxid) - spawn_blocking 内部多算一次 `sender_username`,per-row 复杂度 O(1) - JSON build 处调用 `add_sender_identity`,行为对齐:非群 / 解析不到 wxid 时三字段不输出 测试 / 文档: - 新增 `attachment_row_gets_stable_group_sender_identity_via_helper`, 锁住"两同名成员可被 sender_username 区分" + "非群 / 未知 sender 不追加伪字段" - README + SKILL.md 在 `attachments` 段和顶部 "sender 选择策略" 段 同时记录新字段,标明 wxid 解析不到时的不输出语义 closes #23 --- README.md | 10 +++- SKILL.md | 4 +- src/daemon/query.rs | 111 +++++++++++++++++++++++++++++++++++++------- 3 files changed, 105 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 34a605f..ea7c22f 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,14 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 +`history` / `search` / `new-messages` / `attachments` 以及 `stats.top_senders`,在群聊上下文里还会附带稳定身份三件套: + +- `sender_username`:稳定 wxid,用来区分两个昵称同名的成员 +- `sender_contact_display`:通讯录里的显示名(备注 > 昵称 > wxid 兜底) +- `sender_group_nickname`:群名片本身(同 `sender` 的来源,方便机器读取时不必再解析) + +解析不到 wxid 时(id2u 没命中且老格式 `wxid_xxx:\n...` 前缀也不存在)这三字段不会输出,避免伪造空字段污染下游过滤。 + `history` / `search` / `sessions` / `unread` / `new-messages` / `stats` / `attachments` 现在都会附带 `meta`: - `status`: `ok` / `possibly_stale` / `possibly_stale_unknown_shards` / `windowed` @@ -237,7 +245,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages`:`sender_username` 是 wxid,用于两个同名成员之间的稳定区分;解析不到 wxid 时这三字段不输出)。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/SKILL.md b/SKILL.md index be3dd63..f75cadc 100644 --- a/SKILL.md +++ b/SKILL.md @@ -159,6 +159,8 @@ wx search "会议" --in "工作群" --since 2026-01-01 群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 +`history` / `search` / `new-messages` / `attachments` 和 `stats.top_senders` 在群上下文里同时输出稳定身份三件套:`sender_username`(稳定 wxid,用来区分同名成员)/ `sender_contact_display`(备注 > 昵称 > wxid 兜底)/ `sender_group_nickname`(群名片,等价于 `sender` 的来源,免去再做字符串解析)。当 wxid 解析不到时,这三字段不会输出,避免空字符串污染下游过滤。 + `sessions` / `unread` / `history` / `search` / `new-messages` / `stats` / `attachments` 的 stdout 现在统一是 wrapper: ```json @@ -280,7 +282,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 +`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/src/daemon/query.rs b/src/daemon/query.rs index d02c13c..0de0eca 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -2285,6 +2285,68 @@ mod appmsg_tests { assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名")); } + /// q_attachments 是异步 + 依赖 DbCache,无法直接 unit-test 整条 pipeline。 + /// 这里锁住 attachment row 复用 `add_sender_identity` 后的最终 JSON 形状: + /// 两个 group nickname 同为 "同名" 的成员,attachment 行可以通过 sender_username 区分。 + #[test] + fn attachment_row_gets_stable_group_sender_identity_via_helper() { + let names: HashMap = HashMap::from([ + ("wxid_alice".to_string(), "Alice Contact".to_string()), + ("wxid_bob".to_string(), "Bob Contact".to_string()), + ]); + let group_nicknames: HashMap = HashMap::from([ + ("wxid_alice".to_string(), "同名".to_string()), + ("wxid_bob".to_string(), "同名".to_string()), + ]); + + let mut alice_row = json!({ + "attachment_id": "abc", + "kind": "image", + "type": "Image", + "local_id": 1, + "timestamp": 1775146911, + "time": "2026-04-30 12:00", + "sender": "同名", + }); + add_sender_identity(&mut alice_row, true, "wxid_alice", &names, &group_nicknames); + assert_eq!(alice_row["sender"].as_str(), Some("同名")); + assert_eq!(alice_row["sender_username"].as_str(), Some("wxid_alice")); + assert_eq!(alice_row["sender_contact_display"].as_str(), Some("Alice Contact")); + assert_eq!(alice_row["sender_group_nickname"].as_str(), Some("同名")); + + let mut bob_row = json!({ + "attachment_id": "def", + "kind": "image", + "type": "Image", + "local_id": 2, + "timestamp": 1775146922, + "time": "2026-04-30 12:00", + "sender": "同名", + }); + add_sender_identity(&mut bob_row, true, "wxid_bob", &names, &group_nicknames); + assert_eq!(bob_row["sender_username"].as_str(), Some("wxid_bob")); + // 同样 sender_group_nickname 都是 "同名",但 sender_username 能区分 + assert_ne!( + alice_row["sender_username"], bob_row["sender_username"], + "sender_username 必须区分两位同名成员" + ); + + // 非群 chat 不该追加 identity 字段(行为对齐 history/search/new-messages) + let mut private_row = json!({"attachment_id": "ghi", "sender": ""}); + add_sender_identity(&mut private_row, false, "wxid_alice", &names, &group_nicknames); + assert!(private_row.get("sender_username").is_none()); + assert!(private_row.get("sender_contact_display").is_none()); + assert!(private_row.get("sender_group_nickname").is_none()); + + // group 但 sender_username 解析为空(非常老的格式、id2u 没命中、content 也没 wxid_xxx:\n 前缀): + // 不要伪造空字段,整段 identity 也不追加 + let mut unknown_row = json!({"attachment_id": "jkl", "sender": ""}); + add_sender_identity(&mut unknown_row, true, "", &names, &group_nicknames); + assert!(unknown_row.get("sender_username").is_none()); + assert!(unknown_row.get("sender_contact_display").is_none()); + assert!(unknown_row.get("sender_group_nickname").is_none()); + } + #[test] fn search_in_table_filters_appmsg_by_base_type() { let conn = Connection::open_in_memory().expect("open in-memory db"); @@ -4236,9 +4298,12 @@ pub async fn q_attachments( HashMap::new() }; - let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new(); + let mut all_rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = Vec::new(); let mut shard_hits = 0usize; - // 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx) + // 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, + // sender_username, ts_for_sort, db_idx) + // sender_username 是稳定 wxid,用来让 sender_contact_display / sender_group_nickname + // 落在 attachment row 上(消除"两个同名成员的图分不清谁发的"歧义)。 for (db_idx, shard) in shards.iter().enumerate() { let path = shard.path.clone(); let tname = shard.table.clone(); @@ -4253,7 +4318,7 @@ pub async fn q_attachments( let per_db_cap = (offset + limit).max(limit) * 2; let db_idx2 = db_idx as i64; - let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = + let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = tokio::task::spawn_blocking(move || { let conn = Connection::open(&path)?; let id2u = load_id2u(&conn); @@ -4291,7 +4356,7 @@ pub async fn q_attachments( let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; - let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = stmt + let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = stmt .query_map(params_ref.as_slice(), |row| { let local_id: i64 = row.get(0)?; let raw_type: i64 = row.get(1)?; @@ -4301,20 +4366,29 @@ pub async fn q_attachments( let content_bytes = get_content_bytes(row, 4); let ct: i64 = row.get::<_, i64>(5).unwrap_or(0); let content = decompress_message(&content_bytes, ct); - let sender = if is_group2 { - sender_label( - real_sender_id, - &content, - true, - &uname, - &id2u, - &names_map, - &group_nicknames2, + let (sender, sender_uname) = if is_group2 { + ( + sender_label( + real_sender_id, + &content, + true, + &uname, + &id2u, + &names_map, + &group_nicknames2, + ), + sender_username( + real_sender_id, + &content, + true, + &uname, + &id2u, + ), ) } else { - String::new() + (String::new(), String::new()) }; - Ok((local_id, lo32, ts, real_sender_id, sender, ts, db_idx2)) + Ok((local_id, lo32, ts, real_sender_id, sender, sender_uname, ts, db_idx2)) })? .filter_map(|r| r.ok()) .collect(); @@ -4327,13 +4401,13 @@ pub async fn q_attachments( all_rows.extend(rows); } - // 全局按 ts DESC 排序后分页 - all_rows.sort_by_key(|r| std::cmp::Reverse(r.5)); + // 全局按 ts DESC 排序后分页(ts_for_sort 在 tuple index 6) + all_rows.sort_by_key(|r| std::cmp::Reverse(r.6)); let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect(); // 翻成 JSON let mut results: Vec = Vec::with_capacity(paged.len()); - for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged { + for (local_id, lo32, ts, _real_sender_id, sender, sender_uname, _ts2, _db_idx) in paged { let kind = type_to_kind .get(&lo32) .copied() @@ -4359,6 +4433,7 @@ pub async fn q_attachments( if is_group && !sender.is_empty() { row["sender"] = Value::String(sender); } + add_sender_identity(&mut row, is_group, &sender_uname, &names.map, &group_nicknames); results.push(row); } let unknown_shards = current_unknown_shards(db, names);