mirror of https://github.com/jackwener/wx-cli.git
feat(attachments): expose stable group sender identity (#77)
`q_attachments` 群聊场景下两个昵称同名的成员,原本只输出 `sender` 字段(取群名片),无法在 JSON 消费侧区分谁发的图。 跟 #68 把 `sender_username / sender_contact_display / sender_group_nickname` 一起追加到 attachment row 上,复用 PR68 引入的 `add_sender_identity` / `sender_username` helper, 保持 4 处出口 (history / search / new-messages / stats.top_senders) + attachments 的字段语义完全一致。 调整: - `q_attachments` 元组从 7 字段扩到 8 字段(多带一个稳定 wxid) - spawn_blocking 内部多算一次 `sender_username`,per-row 复杂度 O(1) - JSON build 处调用 `add_sender_identity`,行为对齐:非群 / 解析不到 wxid 时三字段不输出 测试 / 文档: - 新增 `attachment_row_gets_stable_group_sender_identity_via_helper`, 锁住"两同名成员可被 sender_username 区分" + "非群 / 未知 sender 不追加伪字段" - README + SKILL.md 在 `attachments` 段和顶部 "sender 选择策略" 段 同时记录新字段,标明 wxid 解析不到时的不输出语义 closes #23pull/81/head
parent
0612789d19
commit
94fcc36ffe
10
README.md
10
README.md
|
|
@ -168,6 +168,14 @@ wx search "会议" --in "工作群" --since 2026-01-01
|
||||||
|
|
||||||
群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。
|
群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。
|
||||||
|
|
||||||
|
`history` / `search` / `new-messages` / `attachments` 以及 `stats.top_senders`,在群聊上下文里还会附带稳定身份三件套:
|
||||||
|
|
||||||
|
- `sender_username`:稳定 wxid,用来区分两个昵称同名的成员
|
||||||
|
- `sender_contact_display`:通讯录里的显示名(备注 > 昵称 > wxid 兜底)
|
||||||
|
- `sender_group_nickname`:群名片本身(同 `sender` 的来源,方便机器读取时不必再解析)
|
||||||
|
|
||||||
|
解析不到 wxid 时(id2u 没命中且老格式 `wxid_xxx:\n...` 前缀也不存在)这三字段不会输出,避免伪造空字段污染下游过滤。
|
||||||
|
|
||||||
`history` / `search` / `sessions` / `unread` / `new-messages` / `stats` / `attachments` 现在都会附带 `meta`:
|
`history` / `search` / `sessions` / `unread` / `new-messages` / `stats` / `attachments` 现在都会附带 `meta`:
|
||||||
|
|
||||||
- `status`: `ok` / `possibly_stale` / `possibly_stale_unknown_shards` / `windowed`
|
- `status`: `ok` / `possibly_stale` / `possibly_stale_unknown_shards` / `windowed`
|
||||||
|
|
@ -237,7 +245,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||||
```
|
```
|
||||||
|
|
||||||
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages`:`sender_username` 是 wxid,用于两个同名成员之间的稳定区分;解析不到 wxid 时这三字段不输出)。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||||
|
|
||||||
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||||
|
|
||||||
|
|
|
||||||
4
SKILL.md
4
SKILL.md
|
|
@ -159,6 +159,8 @@ wx search "会议" --in "工作群" --since 2026-01-01
|
||||||
|
|
||||||
群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。
|
群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。
|
||||||
|
|
||||||
|
`history` / `search` / `new-messages` / `attachments` 和 `stats.top_senders` 在群上下文里同时输出稳定身份三件套:`sender_username`(稳定 wxid,用来区分同名成员)/ `sender_contact_display`(备注 > 昵称 > wxid 兜底)/ `sender_group_nickname`(群名片,等价于 `sender` 的来源,免去再做字符串解析)。当 wxid 解析不到时,这三字段不会输出,避免空字符串污染下游过滤。
|
||||||
|
|
||||||
`sessions` / `unread` / `history` / `search` / `new-messages` / `stats` / `attachments` 的 stdout 现在统一是 wrapper:
|
`sessions` / `unread` / `history` / `search` / `new-messages` / `stats` / `attachments` 的 stdout 现在统一是 wrapper:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
|
@ -280,7 +282,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||||
```
|
```
|
||||||
|
|
||||||
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||||
|
|
||||||
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2285,6 +2285,68 @@ mod appmsg_tests {
|
||||||
assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名"));
|
assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// q_attachments 是异步 + 依赖 DbCache,无法直接 unit-test 整条 pipeline。
|
||||||
|
/// 这里锁住 attachment row 复用 `add_sender_identity` 后的最终 JSON 形状:
|
||||||
|
/// 两个 group nickname 同为 "同名" 的成员,attachment 行可以通过 sender_username 区分。
|
||||||
|
#[test]
|
||||||
|
fn attachment_row_gets_stable_group_sender_identity_via_helper() {
|
||||||
|
let names: HashMap<String, String> = HashMap::from([
|
||||||
|
("wxid_alice".to_string(), "Alice Contact".to_string()),
|
||||||
|
("wxid_bob".to_string(), "Bob Contact".to_string()),
|
||||||
|
]);
|
||||||
|
let group_nicknames: HashMap<String, String> = HashMap::from([
|
||||||
|
("wxid_alice".to_string(), "同名".to_string()),
|
||||||
|
("wxid_bob".to_string(), "同名".to_string()),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let mut alice_row = json!({
|
||||||
|
"attachment_id": "abc",
|
||||||
|
"kind": "image",
|
||||||
|
"type": "Image",
|
||||||
|
"local_id": 1,
|
||||||
|
"timestamp": 1775146911,
|
||||||
|
"time": "2026-04-30 12:00",
|
||||||
|
"sender": "同名",
|
||||||
|
});
|
||||||
|
add_sender_identity(&mut alice_row, true, "wxid_alice", &names, &group_nicknames);
|
||||||
|
assert_eq!(alice_row["sender"].as_str(), Some("同名"));
|
||||||
|
assert_eq!(alice_row["sender_username"].as_str(), Some("wxid_alice"));
|
||||||
|
assert_eq!(alice_row["sender_contact_display"].as_str(), Some("Alice Contact"));
|
||||||
|
assert_eq!(alice_row["sender_group_nickname"].as_str(), Some("同名"));
|
||||||
|
|
||||||
|
let mut bob_row = json!({
|
||||||
|
"attachment_id": "def",
|
||||||
|
"kind": "image",
|
||||||
|
"type": "Image",
|
||||||
|
"local_id": 2,
|
||||||
|
"timestamp": 1775146922,
|
||||||
|
"time": "2026-04-30 12:00",
|
||||||
|
"sender": "同名",
|
||||||
|
});
|
||||||
|
add_sender_identity(&mut bob_row, true, "wxid_bob", &names, &group_nicknames);
|
||||||
|
assert_eq!(bob_row["sender_username"].as_str(), Some("wxid_bob"));
|
||||||
|
// 同样 sender_group_nickname 都是 "同名",但 sender_username 能区分
|
||||||
|
assert_ne!(
|
||||||
|
alice_row["sender_username"], bob_row["sender_username"],
|
||||||
|
"sender_username 必须区分两位同名成员"
|
||||||
|
);
|
||||||
|
|
||||||
|
// 非群 chat 不该追加 identity 字段(行为对齐 history/search/new-messages)
|
||||||
|
let mut private_row = json!({"attachment_id": "ghi", "sender": ""});
|
||||||
|
add_sender_identity(&mut private_row, false, "wxid_alice", &names, &group_nicknames);
|
||||||
|
assert!(private_row.get("sender_username").is_none());
|
||||||
|
assert!(private_row.get("sender_contact_display").is_none());
|
||||||
|
assert!(private_row.get("sender_group_nickname").is_none());
|
||||||
|
|
||||||
|
// group 但 sender_username 解析为空(非常老的格式、id2u 没命中、content 也没 wxid_xxx:\n 前缀):
|
||||||
|
// 不要伪造空字段,整段 identity 也不追加
|
||||||
|
let mut unknown_row = json!({"attachment_id": "jkl", "sender": ""});
|
||||||
|
add_sender_identity(&mut unknown_row, true, "", &names, &group_nicknames);
|
||||||
|
assert!(unknown_row.get("sender_username").is_none());
|
||||||
|
assert!(unknown_row.get("sender_contact_display").is_none());
|
||||||
|
assert!(unknown_row.get("sender_group_nickname").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn search_in_table_filters_appmsg_by_base_type() {
|
fn search_in_table_filters_appmsg_by_base_type() {
|
||||||
let conn = Connection::open_in_memory().expect("open in-memory db");
|
let conn = Connection::open_in_memory().expect("open in-memory db");
|
||||||
|
|
@ -4236,9 +4298,12 @@ pub async fn q_attachments(
|
||||||
HashMap::new()
|
HashMap::new()
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new();
|
let mut all_rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = Vec::new();
|
||||||
let mut shard_hits = 0usize;
|
let mut shard_hits = 0usize;
|
||||||
// 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx)
|
// 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label,
|
||||||
|
// sender_username, ts_for_sort, db_idx)
|
||||||
|
// sender_username 是稳定 wxid,用来让 sender_contact_display / sender_group_nickname
|
||||||
|
// 落在 attachment row 上(消除"两个同名成员的图分不清谁发的"歧义)。
|
||||||
for (db_idx, shard) in shards.iter().enumerate() {
|
for (db_idx, shard) in shards.iter().enumerate() {
|
||||||
let path = shard.path.clone();
|
let path = shard.path.clone();
|
||||||
let tname = shard.table.clone();
|
let tname = shard.table.clone();
|
||||||
|
|
@ -4253,7 +4318,7 @@ pub async fn q_attachments(
|
||||||
let per_db_cap = (offset + limit).max(limit) * 2;
|
let per_db_cap = (offset + limit).max(limit) * 2;
|
||||||
let db_idx2 = db_idx as i64;
|
let db_idx2 = db_idx as i64;
|
||||||
|
|
||||||
let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> =
|
let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> =
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
let conn = Connection::open(&path)?;
|
let conn = Connection::open(&path)?;
|
||||||
let id2u = load_id2u(&conn);
|
let id2u = load_id2u(&conn);
|
||||||
|
|
@ -4291,7 +4356,7 @@ pub async fn q_attachments(
|
||||||
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
||||||
params.iter().map(|p| p.as_ref()).collect();
|
params.iter().map(|p| p.as_ref()).collect();
|
||||||
let mut stmt = conn.prepare(&sql)?;
|
let mut stmt = conn.prepare(&sql)?;
|
||||||
let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = stmt
|
let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = stmt
|
||||||
.query_map(params_ref.as_slice(), |row| {
|
.query_map(params_ref.as_slice(), |row| {
|
||||||
let local_id: i64 = row.get(0)?;
|
let local_id: i64 = row.get(0)?;
|
||||||
let raw_type: i64 = row.get(1)?;
|
let raw_type: i64 = row.get(1)?;
|
||||||
|
|
@ -4301,20 +4366,29 @@ pub async fn q_attachments(
|
||||||
let content_bytes = get_content_bytes(row, 4);
|
let content_bytes = get_content_bytes(row, 4);
|
||||||
let ct: i64 = row.get::<_, i64>(5).unwrap_or(0);
|
let ct: i64 = row.get::<_, i64>(5).unwrap_or(0);
|
||||||
let content = decompress_message(&content_bytes, ct);
|
let content = decompress_message(&content_bytes, ct);
|
||||||
let sender = if is_group2 {
|
let (sender, sender_uname) = if is_group2 {
|
||||||
sender_label(
|
(
|
||||||
real_sender_id,
|
sender_label(
|
||||||
&content,
|
real_sender_id,
|
||||||
true,
|
&content,
|
||||||
&uname,
|
true,
|
||||||
&id2u,
|
&uname,
|
||||||
&names_map,
|
&id2u,
|
||||||
&group_nicknames2,
|
&names_map,
|
||||||
|
&group_nicknames2,
|
||||||
|
),
|
||||||
|
sender_username(
|
||||||
|
real_sender_id,
|
||||||
|
&content,
|
||||||
|
true,
|
||||||
|
&uname,
|
||||||
|
&id2u,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
String::new()
|
(String::new(), String::new())
|
||||||
};
|
};
|
||||||
Ok((local_id, lo32, ts, real_sender_id, sender, ts, db_idx2))
|
Ok((local_id, lo32, ts, real_sender_id, sender, sender_uname, ts, db_idx2))
|
||||||
})?
|
})?
|
||||||
.filter_map(|r| r.ok())
|
.filter_map(|r| r.ok())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
@ -4327,13 +4401,13 @@ pub async fn q_attachments(
|
||||||
all_rows.extend(rows);
|
all_rows.extend(rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 全局按 ts DESC 排序后分页
|
// 全局按 ts DESC 排序后分页(ts_for_sort 在 tuple index 6)
|
||||||
all_rows.sort_by_key(|r| std::cmp::Reverse(r.5));
|
all_rows.sort_by_key(|r| std::cmp::Reverse(r.6));
|
||||||
let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect();
|
let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect();
|
||||||
|
|
||||||
// 翻成 JSON
|
// 翻成 JSON
|
||||||
let mut results: Vec<Value> = Vec::with_capacity(paged.len());
|
let mut results: Vec<Value> = Vec::with_capacity(paged.len());
|
||||||
for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged {
|
for (local_id, lo32, ts, _real_sender_id, sender, sender_uname, _ts2, _db_idx) in paged {
|
||||||
let kind = type_to_kind
|
let kind = type_to_kind
|
||||||
.get(&lo32)
|
.get(&lo32)
|
||||||
.copied()
|
.copied()
|
||||||
|
|
@ -4359,6 +4433,7 @@ pub async fn q_attachments(
|
||||||
if is_group && !sender.is_empty() {
|
if is_group && !sender.is_empty() {
|
||||||
row["sender"] = Value::String(sender);
|
row["sender"] = Value::String(sender);
|
||||||
}
|
}
|
||||||
|
add_sender_identity(&mut row, is_group, &sender_uname, &names.map, &group_nicknames);
|
||||||
results.push(row);
|
results.push(row);
|
||||||
}
|
}
|
||||||
let unknown_shards = current_unknown_shards(db, names);
|
let unknown_shards = current_unknown_shards(db, names);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue