diff --git a/README.md b/README.md index 35589cd..29c8736 100644 --- a/README.md +++ b/README.md @@ -211,14 +211,14 @@ wx biz-articles --json | jq '.[].url' # 下游消费 URL 每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 -### 附件提取(图片 / 视频 / 文件 / 语音) +### 附件提取(图片) 聊天里的附件本体存在 `xwechat_files//msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。 ```bash -# 1) 列出会话里的附件,先拿到不透明的 attachment_id(默认 image,可多选) +# 1) 列出会话里的图片附件,先拿到不透明的 attachment_id wx attachments "张三" -wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 # 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等) @@ -226,7 +226,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。 +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/SKILL.md b/SKILL.md index ddf02e1..6b79e0d 100644 --- a/SKILL.md +++ b/SKILL.md @@ -242,14 +242,14 @@ wx biz-articles --since 2026-05-10 --json | jq '.[].url' 每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 -### 附件提取(图片 / 视频 / 文件 / 语音) +### 附件提取(图片) -聊天里的图片/视频/文件本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: +聊天里的图片本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: ```bash -# 1) 先列出附件,拿到不透明的 attachment_id(默认 image,可多选) +# 1) 先列出图片附件,拿到不透明的 attachment_id wx attachments "张三" -wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 # 2) 用 attachment_id 把单个资源解密写到指定路径 @@ -257,7 +257,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind`(image/voice/video/file)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。 +`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/src/attachment/resolver.rs b/src/attachment/resolver.rs index c32d84e..8db4f41 100644 --- a/src/attachment/resolver.rs +++ b/src/attachment/resolver.rs @@ -48,6 +48,7 @@ pub fn lookup_md5_blocking( resource_db_path: &Path, chat: &str, local_id: i64, + create_time: i64, msg_local_type_lo32: i64, ) -> Result> { let conn = Connection::open_with_flags( @@ -68,9 +69,25 @@ pub fn lookup_md5_blocking( return Ok(None); }; - // 2) MessageResourceInfo: 同 chat 内 local_id 也会复用,按 create_time DESC 取最新 + // 2) MessageResourceInfo: + // 同 chat 内 local_id 会复用,所以先用 create_time 精确命中; + // 若资源库里的时间戳跟 message_N.db 不完全对齐,再 fallback 到“同 local_id/type 取最新” // message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型 - let packed: Option> = conn + let packed_exact: Option> = conn + .query_row( + "SELECT packed_info FROM MessageResourceInfo + WHERE chat_id = ?1 + AND message_local_id = ?2 + AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3) + AND message_create_time = ?4 + ORDER BY rowid DESC + LIMIT 1", + rusqlite::params![chat_id, local_id, msg_local_type_lo32, create_time], + |row| row.get(0), + ) + .ok(); + + let packed: Option> = packed_exact.or_else(|| conn .query_row( "SELECT packed_info FROM MessageResourceInfo WHERE chat_id = ?1 @@ -81,7 +98,7 @@ pub fn lookup_md5_blocking( rusqlite::params![chat_id, local_id, msg_local_type_lo32], |row| row.get(0), ) - .ok(); + .ok()); let Some(blob) = packed else { return Ok(None); @@ -235,7 +252,13 @@ pub fn resolve_blocking( super::AttachmentKind::File => 49, }; - let meta = lookup_md5_blocking(resource_db_path, &id.chat, id.local_id, lo32_type)? + let meta = lookup_md5_blocking( + resource_db_path, + &id.chat, + id.local_id, + id.create_time, + lo32_type, + )? .ok_or_else(|| { anyhow!( "message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)", @@ -306,6 +329,69 @@ mod tests { assert!(extract_md5_from_packed_info(&blob).is_none()); } + #[test] + fn lookup_md5_prefers_exact_create_time_over_latest_reuse() { + let dir = tempdir_for_test(); + let db_path = dir.join("message_resource.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute( + "CREATE TABLE ChatName2Id (user_name TEXT)", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO ChatName2Id (rowid, user_name) VALUES (1, 'room@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "CREATE TABLE MessageResourceInfo ( + chat_id INTEGER, + message_local_id INTEGER, + message_local_type INTEGER, + message_create_time INTEGER, + packed_info BLOB + )", + [], + ) + .unwrap(); + + let old_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"11111111111111111111111111111111"); + blob + }; + let new_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"22222222222222222222222222222222"); + blob + }; + + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 1000i64, old_blob], + ) + .unwrap(); + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 2000i64, new_blob], + ) + .unwrap(); + + let old = lookup_md5_blocking(&db_path, "room@chatroom", 7, 1000, 3) + .unwrap() + .unwrap(); + let new = lookup_md5_blocking(&db_path, "room@chatroom", 7, 2000, 3) + .unwrap() + .unwrap(); + assert_eq!(old.md5, "11111111111111111111111111111111"); + assert_eq!(new.md5, "22222222222222222222222222222222"); + } + #[test] fn three_month_candidates_includes_prev_curr_next() { // 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09 diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 5fe4e8c..2ec2476 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -264,13 +264,13 @@ enum Commands { #[arg(long)] json: bool, }, - /// 列出某会话的附件(图片 / 视频 / 文件 / 语音),返回不透明 attachment_id + /// 列出某会话的图片附件,返回不透明 attachment_id Attachments { /// 会话名称(联系人显示名 / wxid / @chatroom username 都可以) chat: String, - /// 类型(多选,默认 image)。可选:image / voice / video / file + /// 类型(当前仅支持 image) #[arg(long = "kind", value_name = "KIND", - value_parser = ["image", "voice", "video", "file", "audio", "img"])] + value_parser = ["image", "img"])] kinds: Vec, /// 显示数量 #[arg(short = 'n', long, default_value = "50")] diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 6bd46b2..634ff2d 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -3285,7 +3285,7 @@ pub async fn q_biz_articles( Ok(json!({ "count": results.len(), "articles": results })) } -// ─── 附件(图片 / 视频 / 文件 / 语音)查询与提取 ───────────────────────────────── +// ─── 附件(当前先支持图片)查询与提取 ───────────────────────────────── // // 设计要点: // - `q_attachments` 只走 `Msg_` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤 @@ -3296,7 +3296,7 @@ pub async fn q_biz_articles( // - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。 // 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。 -/// 列出某会话内的附件消息(默认 image,可多选)。返回每条的 `attachment_id`, +/// 列出某会话内的附件消息(当前仅 image)。返回每条的 `attachment_id`, /// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。 pub async fn q_attachments( db: &DbCache, @@ -3319,7 +3319,7 @@ pub async fn q_attachments( // 解析 kinds → 低 32 bit local_type 集合 let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?; if kind_filters.is_empty() { - anyhow::bail!("kinds 为空 — 至少传一种 image/video/file/voice"); + anyhow::bail!("kinds 为空 — 当前至少传一种 image"); } let lo32_types: Vec = kind_filters.iter().map(|(_, t)| *t).collect(); // local_type → AttachmentKind 反查(mask 完后定 kind) @@ -3569,7 +3569,7 @@ pub async fn q_extract( } /// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。 -/// 缺省(None / 空)按 image 处理。 +/// 当前只支持 image;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 fn parse_attachment_kinds( kinds: Option<&[String]>, ) -> Result> { @@ -3583,10 +3583,10 @@ fn parse_attachment_kinds( for k in raw { let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { "image" | "img" => (AttachmentKind::Image, 3), - "voice" | "audio" => (AttachmentKind::Voice, 34), - "video" => (AttachmentKind::Video, 43), - "file" => (AttachmentKind::File, 49), - other => anyhow::bail!("未知附件类型:{}(支持 image/voice/video/file)", other), + "voice" | "audio" | "video" | "file" => { + anyhow::bail!("当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通") + } + other => anyhow::bail!("未知附件类型:{}(当前仅支持 image)", other), }; if seen.insert(kind.as_str()) { out.push((kind, t)); diff --git a/src/ipc.rs b/src/ipc.rs index 78d6278..a4615eb 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -131,11 +131,11 @@ pub enum Request { }, /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) ReloadConfig, - /// 列出某个会话里的附件(图片 / 视频 / 文件 / 语音) + /// 列出某个会话里的图片附件 /// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体 Attachments { chat: String, - /// 类型过滤:image / video / file / voice,多选;缺省返回 image + /// 类型过滤:当前仅支持 image #[serde(default, skip_serializing_if = "Option::is_none")] kinds: Option>, #[serde(default = "default_limit_50")]