feat(attachments): expose stable group sender identity (#77)

`q_attachments` 群聊场景下两个昵称同名的成员,原本只输出
`sender` 字段(取群名片),无法在 JSON 消费侧区分谁发的图。

跟 #68 把 `sender_username / sender_contact_display /
sender_group_nickname` 一起追加到 attachment row 上,复用
PR68 引入的 `add_sender_identity` / `sender_username` helper,
保持 4 处出口 (history / search / new-messages / stats.top_senders)
+ attachments 的字段语义完全一致。

调整:
- `q_attachments` 元组从 7 字段扩到 8 字段(多带一个稳定 wxid)
- spawn_blocking 内部多算一次 `sender_username`,per-row 复杂度 O(1)
- JSON build 处调用 `add_sender_identity`,行为对齐:非群 / 解析不到
  wxid 时三字段不输出

测试 / 文档:
- 新增 `attachment_row_gets_stable_group_sender_identity_via_helper`,
  锁住"两同名成员可被 sender_username 区分" + "非群 / 未知 sender
  不追加伪字段"
- README + SKILL.md 在 `attachments` 段和顶部 "sender 选择策略" 段
  同时记录新字段,标明 wxid 解析不到时的不输出语义

closes #23
pull/81/head
jakevin 2026-05-19 01:44:03 +08:00 committed by GitHub
parent 0612789d19
commit 94fcc36ffe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 105 additions and 20 deletions

View File

@ -168,6 +168,14 @@ wx search "会议" --in "工作群" --since 2026-01-01
群聊里的 `last_sender`、`sender` 和 `stats``top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。
`history` / `search` / `new-messages` / `attachments` 以及 `stats.top_senders`,在群聊上下文里还会附带稳定身份三件套:
- `sender_username`:稳定 wxid用来区分两个昵称同名的成员
- `sender_contact_display`:通讯录里的显示名(备注 > 昵称 > wxid 兜底)
- `sender_group_nickname`:群名片本身(同 `sender` 的来源,方便机器读取时不必再解析)
解析不到 wxid 时id2u 没命中且老格式 `wxid_xxx:\n...` 前缀也不存在)这三字段不会输出,避免伪造空字段污染下游过滤。
`history` / `search` / `sessions` / `unread` / `new-messages` / `stats` / `attachments` 现在都会附带 `meta`
- `status`: `ok` / `possibly_stale` / `possibly_stale_unknown_shards` / `windowed`
@ -237,7 +245,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
```
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages``sender_username` 是 wxid用于两个同名成员之间的稳定区分解析不到 wxid 时这三字段不输出)。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`实际识别出的图片格式jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。

View File

@ -159,6 +159,8 @@ wx search "会议" --in "工作群" --since 2026-01-01
群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。
`history` / `search` / `new-messages` / `attachments``stats.top_senders` 在群上下文里同时输出稳定身份三件套:`sender_username`(稳定 wxid用来区分同名成员/ `sender_contact_display`(备注 > 昵称 > wxid 兜底)/ `sender_group_nickname`(群名片,等价于 `sender` 的来源,免去再做字符串解析)。当 wxid 解析不到时,这三字段不会输出,避免空字符串污染下游过滤。
`sessions` / `unread` / `history` / `search` / `new-messages` / `stats` / `attachments` 的 stdout 现在统一是 wrapper
```json
@ -280,7 +282,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
```
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`实际识别出的图片格式jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。

View File

@ -2285,6 +2285,68 @@ mod appmsg_tests {
assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名"));
}
/// q_attachments 是异步 + 依赖 DbCache无法直接 unit-test 整条 pipeline。
/// 这里锁住 attachment row 复用 `add_sender_identity` 后的最终 JSON 形状:
/// 两个 group nickname 同为 "同名" 的成员attachment 行可以通过 sender_username 区分。
#[test]
fn attachment_row_gets_stable_group_sender_identity_via_helper() {
let names: HashMap<String, String> = HashMap::from([
("wxid_alice".to_string(), "Alice Contact".to_string()),
("wxid_bob".to_string(), "Bob Contact".to_string()),
]);
let group_nicknames: HashMap<String, String> = HashMap::from([
("wxid_alice".to_string(), "同名".to_string()),
("wxid_bob".to_string(), "同名".to_string()),
]);
let mut alice_row = json!({
"attachment_id": "abc",
"kind": "image",
"type": "Image",
"local_id": 1,
"timestamp": 1775146911,
"time": "2026-04-30 12:00",
"sender": "同名",
});
add_sender_identity(&mut alice_row, true, "wxid_alice", &names, &group_nicknames);
assert_eq!(alice_row["sender"].as_str(), Some("同名"));
assert_eq!(alice_row["sender_username"].as_str(), Some("wxid_alice"));
assert_eq!(alice_row["sender_contact_display"].as_str(), Some("Alice Contact"));
assert_eq!(alice_row["sender_group_nickname"].as_str(), Some("同名"));
let mut bob_row = json!({
"attachment_id": "def",
"kind": "image",
"type": "Image",
"local_id": 2,
"timestamp": 1775146922,
"time": "2026-04-30 12:00",
"sender": "同名",
});
add_sender_identity(&mut bob_row, true, "wxid_bob", &names, &group_nicknames);
assert_eq!(bob_row["sender_username"].as_str(), Some("wxid_bob"));
// 同样 sender_group_nickname 都是 "同名",但 sender_username 能区分
assert_ne!(
alice_row["sender_username"], bob_row["sender_username"],
"sender_username 必须区分两位同名成员"
);
// 非群 chat 不该追加 identity 字段(行为对齐 history/search/new-messages
let mut private_row = json!({"attachment_id": "ghi", "sender": ""});
add_sender_identity(&mut private_row, false, "wxid_alice", &names, &group_nicknames);
assert!(private_row.get("sender_username").is_none());
assert!(private_row.get("sender_contact_display").is_none());
assert!(private_row.get("sender_group_nickname").is_none());
// group 但 sender_username 解析为空非常老的格式、id2u 没命中、content 也没 wxid_xxx:\n 前缀):
// 不要伪造空字段,整段 identity 也不追加
let mut unknown_row = json!({"attachment_id": "jkl", "sender": ""});
add_sender_identity(&mut unknown_row, true, "", &names, &group_nicknames);
assert!(unknown_row.get("sender_username").is_none());
assert!(unknown_row.get("sender_contact_display").is_none());
assert!(unknown_row.get("sender_group_nickname").is_none());
}
#[test]
fn search_in_table_filters_appmsg_by_base_type() {
let conn = Connection::open_in_memory().expect("open in-memory db");
@ -4236,9 +4298,12 @@ pub async fn q_attachments(
HashMap::new()
};
let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new();
let mut all_rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = Vec::new();
let mut shard_hits = 0usize;
// 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx)
// 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label,
// sender_username, ts_for_sort, db_idx)
// sender_username 是稳定 wxid用来让 sender_contact_display / sender_group_nickname
// 落在 attachment row 上(消除"两个同名成员的图分不清谁发的"歧义)。
for (db_idx, shard) in shards.iter().enumerate() {
let path = shard.path.clone();
let tname = shard.table.clone();
@ -4253,7 +4318,7 @@ pub async fn q_attachments(
let per_db_cap = (offset + limit).max(limit) * 2;
let db_idx2 = db_idx as i64;
let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> =
let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> =
tokio::task::spawn_blocking(move || {
let conn = Connection::open(&path)?;
let id2u = load_id2u(&conn);
@ -4291,7 +4356,7 @@ pub async fn q_attachments(
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?;
let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = stmt
let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = stmt
.query_map(params_ref.as_slice(), |row| {
let local_id: i64 = row.get(0)?;
let raw_type: i64 = row.get(1)?;
@ -4301,7 +4366,8 @@ pub async fn q_attachments(
let content_bytes = get_content_bytes(row, 4);
let ct: i64 = row.get::<_, i64>(5).unwrap_or(0);
let content = decompress_message(&content_bytes, ct);
let sender = if is_group2 {
let (sender, sender_uname) = if is_group2 {
(
sender_label(
real_sender_id,
&content,
@ -4310,11 +4376,19 @@ pub async fn q_attachments(
&id2u,
&names_map,
&group_nicknames2,
),
sender_username(
real_sender_id,
&content,
true,
&uname,
&id2u,
),
)
} else {
String::new()
(String::new(), String::new())
};
Ok((local_id, lo32, ts, real_sender_id, sender, ts, db_idx2))
Ok((local_id, lo32, ts, real_sender_id, sender, sender_uname, ts, db_idx2))
})?
.filter_map(|r| r.ok())
.collect();
@ -4327,13 +4401,13 @@ pub async fn q_attachments(
all_rows.extend(rows);
}
// 全局按 ts DESC 排序后分页
all_rows.sort_by_key(|r| std::cmp::Reverse(r.5));
// 全局按 ts DESC 排序后分页ts_for_sort 在 tuple index 6
all_rows.sort_by_key(|r| std::cmp::Reverse(r.6));
let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect();
// 翻成 JSON
let mut results: Vec<Value> = Vec::with_capacity(paged.len());
for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged {
for (local_id, lo32, ts, _real_sender_id, sender, sender_uname, _ts2, _db_idx) in paged {
let kind = type_to_kind
.get(&lo32)
.copied()
@ -4359,6 +4433,7 @@ pub async fn q_attachments(
if is_group && !sender.is_empty() {
row["sender"] = Value::String(sender);
}
add_sender_identity(&mut row, is_group, &sender_uname, &names.map, &group_nicknames);
results.push(row);
}
let unknown_shards = current_unknown_shards(db, names);