From 14fdfde1d36debd90781bce24b0568f72897476c Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:25:32 +0800 Subject: [PATCH 1/5] feat(attachment): scaffold module + V1 decoders + resource resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lays down the skeleton for聊天附件 (chat attachment) extraction. This commit introduces the `attachment` module with: - `attachment_id`: opaque base64url(json) round-trip handle for CLI/IPC. Carries `(chat, local_id, create_time, kind)` — `local_id` alone is not unique (实测同 chat 内最多 7 条同 local_id 的记录), so create_time is required for disambiguation. - `decoder/`: dispatch by 6B header magic. Three branches: - `V2_MAGIC` → AES-128-ECB + raw + XOR (need image AES key) - `V1_MAGIC` → AES-128-ECB with fixed key `cfcd208495d565ef` (= md5("0")[:16]) - else → legacy single-byte XOR with magic auto-detect Manual ECB + PKCS7 unpad to avoid pulling in another crate. - `resolver`: `message_resource.db` lookup chain `username → ChatName2Id.rowid → MessageResourceInfo.packed_info → md5` + on-disk `.dat` selection (full > _h > _t) under `/msg/attach///Img/[_t|_h].dat`. Honors `message_local_type % 2^32` to strip the high flag bits, and orders by `message_create_time DESC` to handle local_id reuse. - `image_key/`: stub trait + macOS / Windows placeholders. To be filled by codex with the V2 image key extraction (kvcomm + brute-force on macOS, memory scan on Windows). V1 decoder ships with 6 unit tests covering every supported magic + the BMP extra validation; resolver ships with packed_info parser + dat-file selection tests; v2 decoder ships with header validation tests. 21 tests pass. `cargo check` and `cargo check --target x86_64-pc-windows-gnu` both clean. --- Cargo.lock | 7 + Cargo.toml | 3 + src/attachment/attachment_id.rs | 153 ++++++++++++ src/attachment/decoder/mod.rs | 122 ++++++++++ src/attachment/decoder/v1_xor.rs | 166 +++++++++++++ src/attachment/decoder/v2.rs | 130 ++++++++++ src/attachment/image_key/macos.rs | 10 + src/attachment/image_key/mod.rs | 34 +++ src/attachment/image_key/windows.rs | 10 + src/attachment/mod.rs | 28 +++ src/attachment/resolver.rs | 353 ++++++++++++++++++++++++++++ src/main.rs | 1 + 12 files changed, 1017 insertions(+) create mode 100644 src/attachment/attachment_id.rs create mode 100644 src/attachment/decoder/mod.rs create mode 100644 src/attachment/decoder/v1_xor.rs create mode 100644 src/attachment/decoder/v2.rs create mode 100644 src/attachment/image_key/macos.rs create mode 100644 src/attachment/image_key/mod.rs create mode 100644 src/attachment/image_key/windows.rs create mode 100644 src/attachment/mod.rs create mode 100644 src/attachment/resolver.rs diff --git a/Cargo.lock b/Cargo.lock index 912068a..a5cc78b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.11.1" @@ -1311,6 +1317,7 @@ version = "0.1.11" dependencies = [ "aes", "anyhow", + "base64", "cbc", "chrono", "clap", diff --git a/Cargo.toml b/Cargo.toml index 58c3224..a32b845 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,9 @@ dirs = "5" # MD5 (联系人表名 Msg_) md5 = "0.7" +# 附件 ID 编码(base64url) +base64 = "0.22" + # 正则表达式 regex = "1" roxmltree = "0.20" diff --git a/src/attachment/attachment_id.rs b/src/attachment/attachment_id.rs new file mode 100644 index 0000000..8af569e --- /dev/null +++ b/src/attachment/attachment_id.rs @@ -0,0 +1,153 @@ +//! 不透明附件 ID — 跨 CLI / IPC 的圆 trip 句柄。 +//! +//! 编码:`base64url_no_pad(serde_json(payload))`。 +//! 选择 base64url(json) 而不是紧凑 bit-pack: +//! - phase 1 求稳,不发明二进制协议 +//! - 后面加字段(`resource_md5` / `decoder_hint` 之类)老 CLI 不 break +//! - debug 直接 base64 -d | jq 看字段 +//! +//! ⚠️ `local_id` 在同一 chat 内会被 WeChat 复用(实测同 chat 最多 7 条同 local_id), +//! 所以 `(chat, local_id, create_time)` 三元组才是定位资源行的最小集。 + +use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AttachmentKind { + Image, + Video, + File, + Voice, +} + +impl AttachmentKind { + /// 从 message.local_type 推 attachment kind(只覆盖 phase 1 关心的几种)。 + /// 高 32 bit 是版本/会话 flag,要先 mask 到低 32 bit。 + pub fn from_local_type(local_type: i64) -> Option { + let lo = (local_type as u64) & 0xFFFF_FFFF; + match lo { + 3 => Some(AttachmentKind::Image), + 34 => Some(AttachmentKind::Voice), + 43 => Some(AttachmentKind::Video), + // type=49 是 appmsg,里面 subtype=6 才是文件;这里偏宽松返回 File, + // 由 resolver 进一步根据 appmsg subtype 决定是否真的能 extract + 49 => Some(AttachmentKind::File), + _ => None, + } + } + + pub fn as_str(&self) -> &'static str { + match self { + AttachmentKind::Image => "image", + AttachmentKind::Video => "video", + AttachmentKind::File => "file", + AttachmentKind::Voice => "voice", + } + } +} + +/// 附件 ID payload(序列化后 base64url 编码)。 +/// +/// `v` 是版本字段,将来 schema 变了可以走分支兼容。当前 v=1。 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AttachmentId { + /// payload schema version + pub v: u32, + /// 会话 username(同时用于 ChatName2Id 查 chat_id 和拼 attach 路径) + pub chat: String, + /// 消息行的 local_id + pub local_id: i64, + /// 消息行的 create_time(unix 秒)— 用于 disambiguate 同 chat 内 local_id 复用 + pub create_time: i64, + /// 附件类别 + pub kind: AttachmentKind, + /// 可选 hint:消息所在 message_N.db 的 N。给定时 resolver 可跳过 shard 扫描; + /// 缺省时 resolver 会按 `find_msg_tables` 逻辑全量扫 + #[serde(default, skip_serializing_if = "Option::is_none")] + pub db: Option, +} + +impl AttachmentId { + pub fn encode(&self) -> Result { + let json = serde_json::to_vec(self).context("序列化 AttachmentId")?; + Ok(URL_SAFE_NO_PAD.encode(json)) + } + + pub fn decode(s: &str) -> Result { + let bytes = URL_SAFE_NO_PAD + .decode(s.trim()) + .map_err(|e| anyhow!("attachment_id 不是合法 base64url: {}", e))?; + let id: AttachmentId = + serde_json::from_slice(&bytes).context("attachment_id payload 非合法 JSON")?; + if id.v != 1 { + return Err(anyhow!("不支持的 attachment_id 版本 v={}", id.v)); + } + Ok(id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn round_trip_minimal() { + let id = AttachmentId { + v: 1, + chat: "wxid_abc".to_string(), + local_id: 12345, + create_time: 1_715_678_901, + kind: AttachmentKind::Image, + db: None, + }; + let s = id.encode().unwrap(); + let back = AttachmentId::decode(&s).unwrap(); + assert_eq!(back.chat, id.chat); + assert_eq!(back.local_id, id.local_id); + assert_eq!(back.create_time, id.create_time); + assert_eq!(back.kind, id.kind); + assert_eq!(back.db, id.db); + } + + #[test] + fn round_trip_with_db_hint() { + let id = AttachmentId { + v: 1, + chat: "1234@chatroom".to_string(), + local_id: 42, + create_time: 1, + kind: AttachmentKind::Image, + db: Some(2), + }; + let s = id.encode().unwrap(); + assert!(!s.contains('=')); // base64url no-pad + let back = AttachmentId::decode(&s).unwrap(); + assert_eq!(back.db, Some(2)); + } + + #[test] + fn local_type_mask_high_bits() { + // monitor_web.py 里 image push 路径:高位带 flag,低 32 bit 是 3 + let high_flag = (0xDEAD_BEEFu64 << 32) as i64 | 3; + assert_eq!( + AttachmentKind::from_local_type(high_flag), + Some(AttachmentKind::Image) + ); + } + + #[test] + fn rejects_unknown_version() { + let id = AttachmentId { + v: 99, + chat: "x".to_string(), + local_id: 0, + create_time: 0, + kind: AttachmentKind::Image, + db: None, + }; + let s = id.encode().unwrap(); + assert!(AttachmentId::decode(&s).is_err()); + } +} diff --git a/src/attachment/decoder/mod.rs b/src/attachment/decoder/mod.rs new file mode 100644 index 0000000..a5723c5 --- /dev/null +++ b/src/attachment/decoder/mod.rs @@ -0,0 +1,122 @@ +//! `.dat` 文件解码:根据 6B header magic 分发到具体 decoder。 +//! +//! 三档: +//! | header[0..6] | decoder | 备注 | +//! |-------------------------|-------------------|-----------------------------------------| +//! | `07 08 V2 08 07` | `v2` | AES-128-ECB + XOR 混合,需要 image AES key | +//! | `07 08 V1 08 07` | `v1_aes` | 固定 AES key `cfcd208495d565ef` | +//! | (其他, 通常无 magic) | `v1_xor` | legacy single-byte XOR,magic 自动探测 | +//! +//! 决策点放在 `dispatch`,让上层(`resolver` / CLI extract 命令)只跟一个入口打交道。 + +use anyhow::{anyhow, Result}; + +pub mod v1_xor; +pub mod v2; + +/// 完整 V2 magic:`\x07\x08V2\x08\x07` +pub const V2_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'2', 0x08, 0x07]; +/// 完整 V1 magic:`\x07\x08V1\x08\x07` +pub const V1_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'1', 0x08, 0x07]; + +/// 解码后的产物 + 探测出的图片格式 +#[derive(Debug)] +pub struct DecodedImage { + pub data: Vec, + /// 推断出的图片扩展名(不带点),由 magic 决定。例如 "jpg" / "png" / "gif" / "webp" / + /// "tif" / "bmp" / "hevc"(wxgf 容器)/ "bin"(未识别) + pub format: &'static str, + /// 解码器名称("legacy_xor" / "v1_aes" / "v2"),用于 CLI 调试输出 + pub decoder: &'static str, +} + +/// 由 caller 提供的 V2 image AES key(codex 的 `image_key` 模块负责拿到)。 +/// 缺省时遇到 V2 文件会返回 `Err`,caller 可以拿到具体错误信息再处理。 +#[derive(Debug, Clone, Copy, Default)] +pub struct V2KeyMaterial<'a> { + pub aes_key: Option<&'a [u8; 16]>, + /// XOR key — WeChat 4.x 默认 0x88,可 override + pub xor_key: u8, +} + +impl<'a> V2KeyMaterial<'a> { + pub fn with_aes(key: &'a [u8; 16]) -> Self { + Self { aes_key: Some(key), xor_key: 0x88 } + } +} + +/// 根据 `dat_bytes` 头部 magic 自动分发到对应 decoder。 +/// +/// `v2_key` 仅在文件是 V2 magic 时被消费。 +pub fn dispatch(dat_bytes: &[u8], v2_key: V2KeyMaterial<'_>) -> Result { + if dat_bytes.len() >= 6 { + let head: &[u8; 6] = dat_bytes[..6].try_into().unwrap(); + if head == &V2_MAGIC { + return v2::decode(dat_bytes, v2_key); + } + if head == &V1_MAGIC { + // V1 fixed-AES: 固定 key = md5("0")[:16] = "cfcd208495d565ef" + let fixed_key: [u8; 16] = *b"cfcd208495d565ef"; + return v2::decode( + dat_bytes, + V2KeyMaterial { aes_key: Some(&fixed_key), xor_key: v2_key.xor_key }, + ) + .map(|mut d| { + d.decoder = "v1_aes"; + d + }); + } + } + if dat_bytes.is_empty() { + return Err(anyhow!("空 .dat 文件")); + } + v1_xor::decode(dat_bytes) +} + +/// 从解密后的字节流头部探测图片格式扩展名。 +/// +/// 与上游 `decode_image.py::detect_image_format` 一致;新增 wxgf (HEVC 裸流) 的探测, +/// 因为 V2 解码后产物可能直接是 wxgf 容器。 +pub fn detect_image_format(bytes: &[u8]) -> &'static str { + if bytes.len() >= 4 && &bytes[..4] == b"wxgf" { + return "hevc"; + } + if bytes.len() >= 3 && bytes[..3] == [0xFF, 0xD8, 0xFF] { + return "jpg"; + } + if bytes.len() >= 4 && bytes[..4] == [0x89, 0x50, 0x4E, 0x47] { + return "png"; + } + if bytes.len() >= 3 && &bytes[..3] == b"GIF" { + return "gif"; + } + if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WEBP" { + return "webp"; + } + if bytes.len() >= 4 && bytes[..4] == [0x49, 0x49, 0x2A, 0x00] { + return "tif"; + } + if bytes.len() >= 2 && &bytes[..2] == b"BM" { + return "bmp"; + } + "bin" +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detect_basic_formats() { + assert_eq!(detect_image_format(&[0xFF, 0xD8, 0xFF, 0xE0]), "jpg"); + assert_eq!(detect_image_format(&[0x89, 0x50, 0x4E, 0x47]), "png"); + assert_eq!(detect_image_format(b"GIF89a"), "gif"); + assert_eq!(detect_image_format(b"BM\0\0\0\0\0\0\0\0\0\0\0\0"), "bmp"); + let mut webp = b"RIFF\0\0\0\0WEBP".to_vec(); + webp.extend_from_slice(&[0; 4]); + assert_eq!(detect_image_format(&webp), "webp"); + assert_eq!(detect_image_format(&[0x49, 0x49, 0x2A, 0x00]), "tif"); + assert_eq!(detect_image_format(b"wxgfXXXX"), "hevc"); + assert_eq!(detect_image_format(&[0, 0, 0, 0]), "bin"); + } +} diff --git a/src/attachment/decoder/v1_xor.rs b/src/attachment/decoder/v1_xor.rs new file mode 100644 index 0000000..788383e --- /dev/null +++ b/src/attachment/decoder/v1_xor.rs @@ -0,0 +1,166 @@ +//! Legacy single-byte XOR decoder(无 magic 头的旧 .dat) +//! +//! 算法:用已知图片 magic 反推 XOR key —— `key = file[0] ^ magic[0]`。 +//! 然后用同一个 key 校验 `file[i] ^ key == magic[i]`,全部命中才接受这个 key。 +//! +//! 优先级(按 magic 长度降序,避免短 magic 假阳性): +//! PNG (4) > GIF (4) > TIF (4) > WEBP (4, RIFF) > JPG (3) > BMP (2, 需额外校验) +//! +//! BMP 只有 2 字节 magic,假阳性高;额外用 BMP file header 里的 +//! `bf_size`(offset 2, u32 LE)和 `bf_offset`(offset 10, u32 LE)做合理性校验: +//! - `|bf_size - file_size| < 1024`(允许微小 padding 差) +//! - `14 <= bf_offset <= 1078`(最大调色板 256*4 + header 14 = 1038,留点余量) + +use anyhow::{anyhow, Result}; + +use super::{detect_image_format, DecodedImage}; + +const PNG: &[u8] = &[0x89, 0x50, 0x4E, 0x47]; +const GIF: &[u8] = &[0x47, 0x49, 0x46, 0x38]; +const TIF: &[u8] = &[0x49, 0x49, 0x2A, 0x00]; +const WEBP_RIFF: &[u8] = &[0x52, 0x49, 0x46, 0x46]; +const JPG: &[u8] = &[0xFF, 0xD8, 0xFF]; +const BMP: &[u8] = &[0x42, 0x4D]; + +/// 在 `header` 上尝试一个固定 magic:返回 `Some(key)` 当且仅当所有字节都对得上。 +fn try_magic(header: &[u8], magic: &[u8]) -> Option { + if header.len() < magic.len() { + return None; + } + let key = header[0] ^ magic[0]; + for i in 1..magic.len() { + if header[i] ^ key != magic[i] { + return None; + } + } + Some(key) +} + +/// 探测 XOR key。失败返回 `None`(caller 决定是不是错)。 +pub fn detect_key(file_bytes: &[u8]) -> Option { + if file_bytes.len() < 4 { + return None; + } + let header = &file_bytes[..file_bytes.len().min(16)]; + + // 先试 3+ 字节 magic + for magic in [PNG, GIF, TIF, WEBP_RIFF, JPG] { + if let Some(k) = try_magic(header, magic) { + return Some(k); + } + } + + // 最后试 BMP(只有 2B magic,需额外校验) + if let Some(k) = try_magic(header, BMP) { + if header.len() >= 14 { + // 解 BMP file header 14 字节 + let mut dec = [0u8; 14]; + for i in 0..14 { + dec[i] = header[i] ^ k; + } + let bmp_size = u32::from_le_bytes([dec[2], dec[3], dec[4], dec[5]]); + let bmp_offset = u32::from_le_bytes([dec[10], dec[11], dec[12], dec[13]]); + let file_size = file_bytes.len() as u32; + // 允许 1024 字节 padding 差;offset 在合理范围 + if file_size.abs_diff(bmp_size) < 1024 && (14..=1078).contains(&bmp_offset) { + return Some(k); + } + } + } + + None +} + +/// XOR 解码整个 `.dat` 内容。 +pub fn decode(file_bytes: &[u8]) -> Result { + let key = + detect_key(file_bytes).ok_or_else(|| anyhow!("legacy XOR: 无法识别图片 magic(key 探测失败)"))?; + let data: Vec = file_bytes.iter().map(|b| b ^ key).collect(); + let format = detect_image_format(&data); + if format == "bin" { + return Err(anyhow!("legacy XOR: 解出 key=0x{:02x} 但产物 magic 不识别", key)); + } + Ok(DecodedImage { data, format, decoder: "legacy_xor" }) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// 把一段 plaintext 用单字节 key XOR 加密,模拟 .dat 文件 + fn xor_encrypt(plain: &[u8], key: u8) -> Vec { + plain.iter().map(|b| b ^ key).collect() + } + + #[test] + fn detect_jpg_key() { + let plain = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46]; + let enc = xor_encrypt(&plain, 0x3C); + assert_eq!(detect_key(&enc), Some(0x3C)); + } + + #[test] + fn detect_png_key() { + let mut plain = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0xA5); + assert_eq!(detect_key(&enc), Some(0xA5)); + } + + #[test] + fn detect_gif_key() { + let mut plain = b"GIF89a".to_vec(); + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0x77); + assert_eq!(detect_key(&enc), Some(0x77)); + } + + #[test] + fn detect_webp_riff_key() { + let mut plain = b"RIFF\x00\x00\x00\x00WEBP".to_vec(); + plain.extend_from_slice(&[0; 8]); + let enc = xor_encrypt(&plain, 0x12); + assert_eq!(detect_key(&enc), Some(0x12)); + } + + #[test] + fn detect_tif_key() { + let mut plain = vec![0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00]; + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0xC3); + assert_eq!(detect_key(&enc), Some(0xC3)); + } + + #[test] + fn detect_bmp_with_valid_header() { + // BMP 14B header: 'BM' + size(u32 LE) + reserved(2*u16) + offset(u32 LE) + let mut plain = Vec::new(); + plain.extend_from_slice(b"BM"); + plain.extend_from_slice(&100u32.to_le_bytes()); // file_size = 100 + plain.extend_from_slice(&[0; 4]); // reserved + plain.extend_from_slice(&54u32.to_le_bytes()); // pixel data offset = 54 + plain.resize(100, 0); // 整个文件 100 字节,匹配 file_size + let enc = xor_encrypt(&plain, 0x55); + assert_eq!(detect_key(&enc), Some(0x55)); + } + + #[test] + fn reject_random_bytes() { + // 全 0 文件:BMP 检测会算出 key = 0x42 ^ 0 = 0x42, + // 但解密出的 BMP file_size = 0 vs file_size = 100,差距 > 1024 → + // 应该 reject + let bytes = vec![0u8; 100]; + assert_eq!(detect_key(&bytes), None); + } + + #[test] + fn decode_round_trip_jpg() { + let mut plain = vec![0xFF, 0xD8, 0xFF, 0xE0]; + plain.extend_from_slice(b"JFIF padding here"); + let enc = xor_encrypt(&plain, 0xAB); + let out = decode(&enc).unwrap(); + assert_eq!(out.format, "jpg"); + assert_eq!(out.decoder, "legacy_xor"); + assert_eq!(out.data, plain); + } +} diff --git a/src/attachment/decoder/v2.rs b/src/attachment/decoder/v2.rs new file mode 100644 index 0000000..1c90f29 --- /dev/null +++ b/src/attachment/decoder/v2.rs @@ -0,0 +1,130 @@ +//! V2 .dat 解码:`AES-128-ECB(PKCS7) + raw + XOR` 三段拼接。 +//! +//! 文件结构(来自上游 `decode_image.py::v2_decrypt_file`): +//! `[6B magic V2/V1] [4B aes_size LE] [4B xor_size LE] [1B padding]` +//! `[aligned_aes_size bytes AES-ECB ciphertext]` +//! `[len - aligned_aes_size - xor_size bytes raw_data (不加密)]` +//! `[xor_size bytes XOR (单字节 key)]` +//! +//! `aligned_aes_size`:把 `aes_size` 向上对齐到 16 的倍数;当 `aes_size` 本身是 +//! 16 的倍数时,PKCS7 还会再加一整块 padding,所以再 +16。等价于 +//! `aes_size + (16 - aes_size % 16)`。 +//! +//! ⚠️ 此模块由 codex 落地完整 V2 实现 + image key 模块。当前只提供一个 +//! `decode` 入口骨架,方便 v1_aes 路径(固定 key)和 dispatch 一起编译过。 +//! `aes_key=None` 时返回带具体诊断信息的错误。 + +use anyhow::{anyhow, bail, Result}; + +use super::{detect_image_format, DecodedImage, V2KeyMaterial, V1_MAGIC, V2_MAGIC}; + +const HEADER_SIZE: usize = 15; + +pub fn decode(file_bytes: &[u8], key: V2KeyMaterial<'_>) -> Result { + if file_bytes.len() < HEADER_SIZE { + bail!("V2 .dat: 文件过短({} < {} 字节)", file_bytes.len(), HEADER_SIZE); + } + let magic: &[u8; 6] = file_bytes[..6].try_into().unwrap(); + if magic != &V2_MAGIC && magic != &V1_MAGIC { + bail!("V2 .dat: header magic 不匹配 V1/V2"); + } + + let aes_key = key.aes_key.ok_or_else(|| { + anyhow!("V2 .dat: 需要 image AES key(codex 的 image_key 模块尚未填充)") + })?; + + let aes_size = u32::from_le_bytes(file_bytes[6..10].try_into().unwrap()) as usize; + let xor_size = u32::from_le_bytes(file_bytes[10..14].try_into().unwrap()) as usize; + + // PKCS7 对齐:aes_size 不是 16 的倍数 → 向上对齐;是 16 的倍数 → 再加一整块 + let aligned_aes_size = aes_size + (16 - (aes_size % 16)); + + let aes_end = HEADER_SIZE.checked_add(aligned_aes_size).ok_or_else(|| anyhow!("aes 段长度溢出"))?; + if aes_end > file_bytes.len() { + bail!( + "V2 .dat: 头部宣称 aes_size={} (aligned={}) 超过文件长度 {}", + aes_size, + aligned_aes_size, + file_bytes.len() + ); + } + let raw_end = file_bytes.len().checked_sub(xor_size).ok_or_else(|| { + anyhow!("V2 .dat: 头部宣称 xor_size={} 超过文件长度 {}", xor_size, file_bytes.len()) + })?; + if aes_end > raw_end { + bail!( + "V2 .dat: aes_end={} > raw_end={}(aes/xor 段重叠)", + aes_end, + raw_end + ); + } + + // === AES-128-ECB 解密 + PKCS7 unpad === + let aes_data = &file_bytes[HEADER_SIZE..aes_end]; + let dec_aes = aes_ecb_decrypt_pkcs7(aes_key, aes_data)?; + + // === Raw 段(未加密) === + let raw_data = &file_bytes[aes_end..raw_end]; + + // === XOR 段 === + let xor_data: Vec = file_bytes[raw_end..].iter().map(|b| b ^ key.xor_key).collect(); + + let mut out = Vec::with_capacity(dec_aes.len() + raw_data.len() + xor_data.len()); + out.extend_from_slice(&dec_aes); + out.extend_from_slice(raw_data); + out.extend_from_slice(&xor_data); + + let format = detect_image_format(&out); + if format == "bin" { + bail!("V2 .dat: AES 解密成功但产物 magic 不识别(key 可能错)"); + } + Ok(DecodedImage { data: out, format, decoder: "v2" }) +} + +/// AES-128-ECB 解密 + PKCS7 unpad。失败时返回 `Err`,不返回半结果。 +/// +/// 不引第三方 ECB 包;ECB 本身就是 block-by-block,手工跑就行。 +/// PKCS7 padding 由本函数最后一段做 strict 校验:长度 1..=16,且尾部全是同值字节。 +fn aes_ecb_decrypt_pkcs7(key: &[u8; 16], cipher: &[u8]) -> Result> { + use aes::cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit}; + if cipher.is_empty() || cipher.len() % 16 != 0 { + bail!("AES 输入长度 {} 不是 16 的倍数", cipher.len()); + } + let aes = aes::Aes128::new(key.into()); + let mut out = Vec::with_capacity(cipher.len()); + for chunk in cipher.chunks_exact(16) { + let mut block = GenericArray::clone_from_slice(chunk); + aes.decrypt_block(&mut block); + out.extend_from_slice(&block); + } + let pad = *out.last().ok_or_else(|| anyhow!("AES PKCS7: 空输出"))? as usize; + if pad == 0 || pad > 16 || pad > out.len() { + bail!("AES PKCS7: 非法 padding 长度 {}", pad); + } + let tail = &out[out.len() - pad..]; + if !tail.iter().all(|&b| b as usize == pad) { + bail!("AES PKCS7: padding 字节不一致"); + } + out.truncate(out.len() - pad); + Ok(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rejects_short_file() { + let r = decode(&[0u8; 4], V2KeyMaterial::default()); + assert!(r.is_err()); + } + + #[test] + fn rejects_v2_without_key() { + let mut buf = V2_MAGIC.to_vec(); + buf.extend_from_slice(&[0u8; HEADER_SIZE - 6]); + let r = decode(&buf, V2KeyMaterial::default()); + let err = r.unwrap_err().to_string(); + assert!(err.contains("AES key"), "{}", err); + } +} diff --git a/src/attachment/image_key/macos.rs b/src/attachment/image_key/macos.rs new file mode 100644 index 0000000..234d4e5 --- /dev/null +++ b/src/attachment/image_key/macos.rs @@ -0,0 +1,10 @@ +//! macOS V2 image AES key 提取。 +//! +//! 主路径:从 `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/key__*.statistic` +//! 文件名拿 uin,然后 `md5(str(uin) + sanitize(wxid)).hex()[:16]` 派生 AES key。 +//! +//! Fallback:枚举 uin 候选 2^24 个(`uint32`,但 wxid 4-byte 前缀只看后 24 bit), +//! 通过 `md5(str(uin))[:4] == wxid 后 4 字节` 匹配。 +//! 上游 `find_image_key_macos.py` 实测 1-2s 完成。 +//! +//! ⚠️ codex 落实现。 diff --git a/src/attachment/image_key/mod.rs b/src/attachment/image_key/mod.rs new file mode 100644 index 0000000..ec4f8ad --- /dev/null +++ b/src/attachment/image_key/mod.rs @@ -0,0 +1,34 @@ +//! V2 image AES key 提取 — 平台相关。 +//! +//! ⚠️ 此模块由 codex 落地。本文件只放公共 trait + 平台 dispatch 占位。 +//! +//! 路径: +//! - macOS:磁盘派生(`key__*.statistic` 文件名拿 uin → `md5(str(uin) + wxid)[:16]`) +//! + brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24) +//! - Windows:扫 `Weixin.exe` 内存,匹配 `[a-zA-Z0-9]{32}` 候选,按已知 AES ciphertext-block +//! 反验(`find_image_key.py` / `find_image_key.c` 已写实) +//! - Linux:上游空白;当前不实现,遇到 V2 .dat 返回 unsupported 错误 + +#[allow(dead_code)] +pub mod macos; +#[allow(dead_code)] +pub mod windows; + +use anyhow::Result; + +/// 单个 wxid 的 V2 image key 提取接口。 +/// +/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时是稳定的)。 +pub trait ImageKeyProvider { + /// 返回当前 wxid 的 16 字节 AES key。失败要带可执行的诊断(例如「macOS 没找到 + /// kvcomm cache,请确认微信已登录」/「Windows 进程不在跑」)。 + fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]>; +} + +/// 平台默认实现(codex 后续填)。 +/// +/// 调用方目前可以直接传 `None`,让 resolver 在遇到 V2 .dat 时报「image key 未提取」错。 +pub fn default_provider() -> Option> { + // TODO(codex): 按 cfg(target_os) 返回 macOS / Windows / 不支持 + None +} diff --git a/src/attachment/image_key/windows.rs b/src/attachment/image_key/windows.rs new file mode 100644 index 0000000..1a0080a --- /dev/null +++ b/src/attachment/image_key/windows.rs @@ -0,0 +1,10 @@ +//! Windows V2 image AES key 提取。 +//! +//! 扫 `Weixin.exe` 进程内存,匹配模式 `(?) → message_resource.db (ChatName2Id + MessageResourceInfo) +//! → packed_info protobuf md5 提取 → xwechat_files//msg/attach/.../Img/[_t|_h].dat +//! → magic 分发 (legacy XOR / V1 fixed-AES / V2 AES+XOR) → 写出实际图片 +//! +//! 模块切分: +//! - `attachment_id`:跨 IPC / CLI 的不透明 ID(base64url(json)) +//! - `resolver`:从 `attachment_id` 反查 message_resource.db,定位本地 .dat +//! - `decoder`:根据文件 magic 分发到具体解码器(V1 / V2 等) +//! - `image_key`:V2 image AES key 提取(macOS / Windows) +//! +//! V2 / image_key 模块由 codex 落地,先放空 stub 以便 V1 / resolver / CLI 不被 block。 + +// 此模块由分多个 PR/commit 增量启用: +// 1) 先落 attachment_id / decoder / resolver / image_key 骨架(本 commit) +// 2) IPC + CLI + daemon route 把它们串起来(后续 commit) +// 3) image_key 平台实现(codex 后续 commit) +// 在 step 1 完成、step 2 未到时,大量公开 API 仍未被引用,#[allow(dead_code)] 抑制噪音 +#![allow(dead_code)] + +pub mod attachment_id; +pub mod decoder; +pub mod resolver; +pub mod image_key; + +pub use attachment_id::{AttachmentId, AttachmentKind}; diff --git a/src/attachment/resolver.rs b/src/attachment/resolver.rs new file mode 100644 index 0000000..c32d84e --- /dev/null +++ b/src/attachment/resolver.rs @@ -0,0 +1,353 @@ +//! 把 `AttachmentId` 翻译成本地 `.dat` 路径。 +//! +//! 流程: +//! 1. `chat` username → `ChatName2Id.rowid`(资源库) +//! 2. `(chat_id, local_id)` + `ORDER BY message_create_time DESC LIMIT 1` → +//! `MessageResourceInfo.packed_info` +//! 3. 从 `packed_info` (protobuf) 提取 32 字节 ASCII hex MD5 +//! 4. 在 `/msg/attach///Img/[_t|_h].dat` +//! 下找对应文件,按 full > _h > _t 优先级选一个 +//! +//! `` 由 daemon 已知(同 `db_dir` 的父目录),路径 layout 平台差异: +//! - Linux: `~/Documents/xwechat_files/` +//! - macOS: `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/` +//! ⚠️ msg/attach/... 子树 layout 待我用真实账号验证;上游 docstring 只写了 Windows +//! - Windows: `\xwechat_files\`(root 从 `%APPDATA%\Tencent\xwechat\config\*.ini` 读) + +use anyhow::{anyhow, Context, Result}; +use chrono::TimeZone; +use rusqlite::Connection; +use std::path::{Path, PathBuf}; + +use super::AttachmentId; + +/// 单条 attachment 在资源库 + 本地 attach 树下的解析结果。 +#[derive(Debug, Clone)] +pub struct ResolvedAttachment { + pub id: AttachmentId, + /// 从 `packed_info` 提取出的资源 MD5(小写 hex) + pub md5: String, + /// 命中的本地 .dat 路径(按 full > _h > _t 优先级选一个) + pub dat_path: PathBuf, + /// 文件 size(字节) + pub size: u64, +} + +/// 仅 schema lookup(不去找本地 .dat)。 +/// 用于 `wx attachments` 列表时填 `md5` 字段——文件可能根本不在本地。 +#[derive(Debug, Clone)] +pub struct AttachmentMetadata { + pub md5: String, +} + +/// 用 `(chat, local_id)` 查 message_resource.db 拿 file md5。 +/// +/// 调用方传已经解密好的 `message_resource.db` 路径(由 daemon 的 `DBCache` 准备)。 +/// 同步函数 — caller 在 `spawn_blocking` 里跑。 +pub fn lookup_md5_blocking( + resource_db_path: &Path, + chat: &str, + local_id: i64, + msg_local_type_lo32: i64, +) -> Result> { + let conn = Connection::open_with_flags( + resource_db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI, + ) + .with_context(|| format!("打开 message_resource.db {:?}", resource_db_path))?; + + // 1) ChatName2Id: user_name -> rowid + let chat_id: Option = conn + .query_row( + "SELECT rowid FROM ChatName2Id WHERE user_name = ?1", + [chat], + |row| row.get(0), + ) + .ok(); + let Some(chat_id) = chat_id else { + return Ok(None); + }; + + // 2) MessageResourceInfo: 同 chat 内 local_id 也会复用,按 create_time DESC 取最新 + // message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型 + let packed: Option> = conn + .query_row( + "SELECT packed_info FROM MessageResourceInfo + WHERE chat_id = ?1 + AND message_local_id = ?2 + AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3) + ORDER BY message_create_time DESC + LIMIT 1", + rusqlite::params![chat_id, local_id, msg_local_type_lo32], + |row| row.get(0), + ) + .ok(); + + let Some(blob) = packed else { + return Ok(None); + }; + Ok(extract_md5_from_packed_info(&blob).map(|md5| AttachmentMetadata { md5 })) +} + +/// 从 `MessageResourceInfo.packed_info` (protobuf) 提取 32 字节 ASCII hex md5。 +/// +/// 主路径:搜 4 字节 marker `12 22 0a 20`(field=2 LEN, length=34, sub field=1 LEN, length=32), +/// 紧跟 32 字节 ASCII hex。 +/// Fallback:扫整个 blob 找连续 32 字节合法 hex 字符。 +pub fn extract_md5_from_packed_info(blob: &[u8]) -> Option { + const MARKER: &[u8; 4] = &[0x12, 0x22, 0x0A, 0x20]; + + // 主路径 + if let Some(pos) = find_subslice(blob, MARKER) { + let start = pos + MARKER.len(); + if start + 32 <= blob.len() { + if let Ok(s) = std::str::from_utf8(&blob[start..start + 32]) { + if s.chars().all(|c| c.is_ascii_hexdigit()) { + return Some(s.to_ascii_lowercase()); + } + } + } + } + + // Fallback:连续 32 字节合法 hex + if blob.len() >= 32 { + for start in 0..=blob.len() - 32 { + let chunk = &blob[start..start + 32]; + if let Ok(s) = std::str::from_utf8(chunk) { + if s.chars().all(|c| c.is_ascii_hexdigit()) { + return Some(s.to_ascii_lowercase()); + } + } + } + } + None +} + +/// 简单的子串扫描(避免拉 memchr/memmem 依赖;blob 通常 < 1KB) +fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() || needle.len() > haystack.len() { + return None; + } + haystack + .windows(needle.len()) + .position(|w| w == needle) +} + +/// 在 `///Img/[_t|_h].dat` 下找文件。 +/// +/// 优先级:full > `_h`(HD thumbnail)> `_t`(thumbnail)。返回最优的一个; +/// 找不到返回 None。 +/// +/// `attach_root` = `/msg/attach`。 +/// `create_time` 用于先定位 `` 子目录;找不到时再 fallback 全月份扫描, +/// 因为 WeChat 的 `YYYY-MM` 目录有时跟消息时间差 1 个月(按收到时间归档)。 +pub fn find_dat_file( + attach_root: &Path, + chat: &str, + file_md5: &str, + create_time: i64, +) -> Option { + let chat_hash = format!("{:x}", md5::compute(chat.as_bytes())); + let chat_dir = attach_root.join(&chat_hash); + if !chat_dir.is_dir() { + return None; + } + + // 第一步:试 create_time 当月 + 前后各一个月(共 3 个候选目录) + let candidates_ym: Vec = three_month_candidates(create_time); + for ym in &candidates_ym { + let img_dir = chat_dir.join(ym).join("Img"); + if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + return Some(p); + } + } + + // 第二步 fallback:扫整个 chat_dir 的所有月份子目录 + let entries = std::fs::read_dir(&chat_dir).ok()?; + let mut all_months: Vec = entries + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.is_dir()) + .collect(); + // 已经试过的 3 个候选可以跳过,但成本极小;保留全量扫 + all_months.sort(); + for month_dir in all_months { + let img_dir = month_dir.join("Img"); + if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + return Some(p); + } + } + None +} + +fn pick_best_in_img_dir(img_dir: &Path, file_md5: &str) -> Option { + if !img_dir.is_dir() { + return None; + } + let full = img_dir.join(format!("{}.dat", file_md5)); + if full.is_file() { + return Some(full); + } + let hd = img_dir.join(format!("{}_h.dat", file_md5)); + if hd.is_file() { + return Some(hd); + } + let thumb = img_dir.join(format!("{}_t.dat", file_md5)); + if thumb.is_file() { + return Some(thumb); + } + None +} + +fn three_month_candidates(unix_ts: i64) -> Vec { + use chrono::{Datelike, Duration}; + let dt = match chrono::Local.timestamp_opt(unix_ts, 0).single() { + Some(d) => d, + None => return Vec::new(), + }; + let prev = dt - Duration::days(31); + let next = dt + Duration::days(31); + [prev, dt, next] + .iter() + .map(|d| format!("{:04}-{:02}", d.year(), d.month())) + .collect() +} + +/// 把 `` (即 `db_storage` 父目录)拼成 `/msg/attach`。 +pub fn attach_root_for(wxchat_base: &Path) -> PathBuf { + wxchat_base.join("msg").join("attach") +} + +/// 完整流程:用 `attachment_id` 拿 md5 + 找 .dat。失败返回带具体诊断信息的 `Err`。 +/// +/// `resource_db_path` 由 daemon 提供(DBCache 已经解密好); +/// `attach_root` 由 caller 拼好(`attach_root_for(wxchat_base)`)。 +/// 同步函数 — caller 在 `spawn_blocking` 里跑。 +pub fn resolve_blocking( + id: &AttachmentId, + resource_db_path: &Path, + attach_root: &Path, +) -> Result { + let lo32_type: i64 = match id.kind { + super::AttachmentKind::Image => 3, + super::AttachmentKind::Voice => 34, + super::AttachmentKind::Video => 43, + super::AttachmentKind::File => 49, + }; + + let meta = lookup_md5_blocking(resource_db_path, &id.chat, id.local_id, lo32_type)? + .ok_or_else(|| { + anyhow!( + "message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)", + id.chat, + id.local_id, + lo32_type + ) + })?; + + let dat_path = find_dat_file(attach_root, &id.chat, &meta.md5, id.create_time).ok_or_else( + || { + anyhow!( + "找不到本地 .dat(md5={} chat={} create_time={})— 微信可能尚未下载该附件,或附件已被清理", + meta.md5, + id.chat, + id.create_time + ) + }, + )?; + let size = std::fs::metadata(&dat_path).map(|m| m.len()).unwrap_or(0); + + Ok(ResolvedAttachment { id: id.clone(), md5: meta.md5, dat_path, size }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extract_md5_main_path() { + // 构造一段含 12 22 0a 20 marker 的 blob + let mut blob = vec![0xAA, 0xBB, 0xCC]; + blob.extend_from_slice(&[0x12, 0x22, 0x0A, 0x20]); + blob.extend_from_slice(b"deadbeefcafebabe1234567890abcdef"); + blob.extend_from_slice(&[0xFF, 0xFF]); + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("deadbeefcafebabe1234567890abcdef".to_string()) + ); + } + + #[test] + fn extract_md5_fallback_no_marker() { + // 没有 marker,但 blob 里有合法 32 字节 hex + let mut blob = vec![0xFF, 0x00]; + blob.extend_from_slice(b"00112233445566778899aabbccddeeff"); + blob.extend_from_slice(&[0x01]); + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("00112233445566778899aabbccddeeff".to_string()) + ); + } + + #[test] + fn extract_md5_uppercase_normalized_to_lower() { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"DEADBEEFCAFEBABE1234567890ABCDEF"); + // 上游/CI/本地 file md5 都是 lowercase;强制小写化避免大小写不一致导致命中失败 + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("deadbeefcafebabe1234567890abcdef".to_string()) + ); + } + + #[test] + fn extract_md5_returns_none_on_garbage() { + let blob = vec![0; 16]; + assert!(extract_md5_from_packed_info(&blob).is_none()); + } + + #[test] + fn three_month_candidates_includes_prev_curr_next() { + // 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09 + let ts = chrono::Local + .with_ymd_and_hms(2025, 8, 15, 12, 0, 0) + .unwrap() + .timestamp(); + let v = three_month_candidates(ts); + assert!(v.contains(&"2025-07".to_string())); + assert!(v.contains(&"2025-08".to_string())); + assert!(v.contains(&"2025-09".to_string())); + } + + #[test] + fn pick_best_prefers_full_then_h_then_t() { + let tmp = tempdir_for_test(); + let img = tmp.join("Img"); + std::fs::create_dir_all(&img).unwrap(); + let md5 = "abcd1234"; + std::fs::write(img.join(format!("{}_t.dat", md5)), b"thumb").unwrap(); + std::fs::write(img.join(format!("{}_h.dat", md5)), b"hd").unwrap(); + // 只有 _t / _h 时取 _h + assert_eq!( + pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + format!("{}_h.dat", md5).as_str() + ); + // 加 full 后取 full + std::fs::write(img.join(format!("{}.dat", md5)), b"full").unwrap(); + assert_eq!( + pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + format!("{}.dat", md5).as_str() + ); + } + + fn tempdir_for_test() -> PathBuf { + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let p = std::env::temp_dir().join(format!("wx-cli-attach-test-{}-{}", pid, nanos)); + std::fs::create_dir_all(&p).unwrap(); + p + } +} diff --git a/src/main.rs b/src/main.rs index 6c3f9a2..e6385fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod crypto; mod scanner; mod daemon; mod cli; +mod attachment; fn main() { if std::env::var("WX_DAEMON_MODE").is_ok() { From bf8d0d934af2afda963ed79909a11f8dde532934 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:34:38 +0800 Subject: [PATCH 2/5] feat(attachment): implement V2 image key providers --- src/attachment/image_key/linux.rs | 11 + src/attachment/image_key/macos.rs | 427 +++++++++++++++++++++++++++- src/attachment/image_key/mod.rs | 332 ++++++++++++++++++++- src/attachment/image_key/windows.rs | 244 +++++++++++++++- 4 files changed, 987 insertions(+), 27 deletions(-) create mode 100644 src/attachment/image_key/linux.rs diff --git a/src/attachment/image_key/linux.rs b/src/attachment/image_key/linux.rs new file mode 100644 index 0000000..4100ab2 --- /dev/null +++ b/src/attachment/image_key/linux.rs @@ -0,0 +1,11 @@ +use anyhow::{bail, Result}; + +use super::{ImageKeyMaterial, ImageKeyProvider}; + +pub struct LinuxImageKeyProvider; + +impl ImageKeyProvider for LinuxImageKeyProvider { + fn get_key(&self, _wxid: &str) -> Result { + bail!("Linux V2 图片 key 当前未实现;请先用 legacy/V1 图片或在 README 中标注 unsupported") + } +} diff --git a/src/attachment/image_key/macos.rs b/src/attachment/image_key/macos.rs index 234d4e5..127d81c 100644 --- a/src/attachment/image_key/macos.rs +++ b/src/attachment/image_key/macos.rs @@ -1,10 +1,423 @@ //! macOS V2 image AES key 提取。 //! -//! 主路径:从 `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/key__*.statistic` -//! 文件名拿 uin,然后 `md5(str(uin) + sanitize(wxid)).hex()[:16]` 派生 AES key。 +//! 主路径:从 `key__*.statistic` 文件名拿 uin,然后 +//! `md5(str(uin) + normalize(wxid)).hex()[:16]` 派生 AES key。 //! -//! Fallback:枚举 uin 候选 2^24 个(`uint32`,但 wxid 4-byte 前缀只看后 24 bit), -//! 通过 `md5(str(uin))[:4] == wxid 后 4 字节` 匹配。 -//! 上游 `find_image_key_macos.py` 实测 1-2s 完成。 -//! -//! ⚠️ codex 落实现。 +//! fallback:通过 `md5(str(uin))[:4] == wxid_suffix` + `uin & 0xff == xor_key` +//! 把搜索空间压到 2^24,再用 V2 模板反验 AES key。 + +use anyhow::{bail, Context, Result}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{mpsc, Arc, Mutex}; + +use crate::config; + +use super::{ + attach_root_for_db_dir, configured_db_dir_for_wxid, derive_xor_key_from_v2_dat, + find_v2_template_ciphertexts, join_components, normalize_wxid, verify_aes_key, wxid_from_db_dir, + ImageKeyMaterial, ImageKeyProvider, +}; + +pub struct MacosImageKeyProvider { + configured_db_dir: Result, + cache: Mutex>, +} + +impl MacosImageKeyProvider { + pub fn from_current_config() -> Self { + let configured_db_dir = config::load_config() + .map(|cfg| cfg.db_dir) + .map_err(|err| err.to_string()); + Self { + configured_db_dir, + cache: Mutex::new(HashMap::new()), + } + } +} + +impl ImageKeyProvider for MacosImageKeyProvider { + fn get_key(&self, wxid: &str) -> Result { + let cache_key = normalize_wxid(wxid); + if let Some(found) = self.cache.lock().unwrap().get(&cache_key).copied() { + return Ok(found); + } + + let configured_db_dir = self + .configured_db_dir + .as_ref() + .map_err(|err| anyhow::anyhow!("读取 config.db_dir 失败: {}", err))?; + let db_dir = configured_db_dir_for_wxid(configured_db_dir, wxid); + let attach_dir = attach_root_for_db_dir(&db_dir); + let key = derive_key_for_paths(&db_dir, &attach_dir)?; + self.cache.lock().unwrap().insert(cache_key, key); + Ok(key) + } +} + +fn derive_key_for_paths(db_dir: &Path, attach_dir: &Path) -> Result { + let templates = find_v2_template_ciphertexts(attach_dir, 3, 64)?; + if templates.is_empty() { + bail!("在 {} 下找不到 V2 模板文件", attach_dir.display()); + } + + if let Some(found) = find_via_kvcomm(db_dir, &templates)? { + return Ok(found); + } + + let (wxid_full, wxid_norm, suffix) = + extract_wxid_parts(db_dir).context("db_dir 不含可用于 fallback 的 wxid 4 位后缀")?; + let (xor_key, _votes, _total) = derive_xor_key_from_v2_dat(attach_dir, 10, 3)? + .context("V2 .dat 样本不足,无法投票反推 xor_key")?; + + for wxid in preferred_wxid_candidates(&wxid_full, &wxid_norm) { + if let Some(aes_key) = bruteforce_aes_key(xor_key, &suffix, wxid, &templates)? { + return Ok(ImageKeyMaterial { aes_key, xor_key }); + } + } + + bail!("macOS V2 图片 key 派生失败") +} + +fn find_via_kvcomm(db_dir: &Path, templates: &[[u8; 16]]) -> Result> { + let Some(kvcomm_dir) = find_existing_kvcomm_dir(db_dir) else { + return Ok(None); + }; + + let codes = collect_kvcomm_codes(&kvcomm_dir)?; + if codes.is_empty() { + return Ok(None); + } + let wxids = collect_wxid_candidates(db_dir); + if wxids.is_empty() { + return Ok(None); + } + + for wxid in wxids { + for code in &codes { + let candidate = derive_image_key_material(*code, &wxid); + if verify_aes_key(&candidate.aes_key, templates) { + return Ok(Some(candidate)); + } + } + } + Ok(None) +} + +fn derive_image_key_material(code: u32, wxid: &str) -> ImageKeyMaterial { + let xor_key = (code & 0xFF) as u8; + let digest = format!("{:x}", md5::compute(format!("{}{}", code, wxid))); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + ImageKeyMaterial { aes_key, xor_key } +} + +fn collect_wxid_candidates(db_dir: &Path) -> Vec { + let Some(raw) = wxid_from_db_dir(db_dir) else { + return Vec::new(); + }; + let mut out = vec![raw.clone()]; + let normalized = normalize_wxid(&raw); + if normalized != raw { + out.push(normalized); + } + out +} + +fn extract_wxid_parts(db_dir: &Path) -> Option<(String, String, String)> { + let raw = wxid_from_db_dir(db_dir)?; + let idx = raw.rfind('_')?; + let suffix = &raw[idx + 1..]; + if suffix.len() != 4 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) { + return None; + } + Some((raw.clone(), normalize_wxid(&raw), suffix.to_ascii_lowercase())) +} + +fn preferred_wxid_candidates<'a>(raw: &'a str, normalized: &'a str) -> Vec<&'a str> { + if raw == normalized { + vec![raw] + } else { + vec![normalized, raw] + } +} + +fn derive_kvcomm_dir_candidates(db_dir: &Path) -> Vec { + let parts: Vec = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()) + .collect(); + + let mut candidates = Vec::new(); + if let Some(idx) = parts.iter().position(|part| part == "xwechat_files") { + let documents_root = join_components(&parts[..idx]); + candidates.push(documents_root.join("app_data/net/kvcomm")); + candidates.push(documents_root.join("xwechat/net/kvcomm")); + if idx >= 1 { + let container_root = join_components(&parts[..idx - 1]); + candidates.push( + container_root + .join("Library/Application Support/com.tencent.xinWeChat/xwechat/net/kvcomm"), + ); + candidates.push( + container_root.join("Library/Application Support/com.tencent.xinWeChat/net/kvcomm"), + ); + } + } + if let Some(home) = dirs::home_dir() { + candidates.push( + home.join("Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm"), + ); + } + + let mut dedup = Vec::new(); + for candidate in candidates { + if !dedup.contains(&candidate) { + dedup.push(candidate); + } + } + dedup +} + +fn find_existing_kvcomm_dir(db_dir: &Path) -> Option { + derive_kvcomm_dir_candidates(db_dir) + .into_iter() + .find(|path| path.is_dir()) +} + +fn collect_kvcomm_codes(kvcomm_dir: &Path) -> Result> { + let mut codes = std::collections::BTreeSet::new(); + for entry in std::fs::read_dir(kvcomm_dir)? { + let entry = entry?; + let Some(name) = entry.file_name().to_str().map(|value| value.to_string()) else { + continue; + }; + let Some(rest) = name.strip_prefix("key_") else { + continue; + }; + let Some((code, _)) = rest.split_once('_') else { + continue; + }; + if let Ok(code) = code.parse::() { + codes.insert(code); + } + } + Ok(codes.into_iter().collect()) +} + +fn bruteforce_aes_key( + xor_key: u8, + suffix_hex: &str, + wxid: &str, + templates: &[[u8; 16]], +) -> Result> { + let suffix = hex_prefix_to_bytes(suffix_hex)?; + let workers = std::thread::available_parallelism() + .map(|count| count.get()) + .unwrap_or(1) + .max(1); + let total = 1u32 << 24; + let chunk = total / workers as u32; + let stop = Arc::new(AtomicBool::new(false)); + let (tx, rx) = mpsc::channel(); + let wxid = Arc::new(wxid.as_bytes().to_vec()); + let templates = Arc::new(templates.to_vec()); + + std::thread::scope(|scope| { + for idx in 0..workers { + let start = idx as u32 * chunk; + let end = if idx + 1 == workers { + total + } else { + (idx as u32 + 1) * chunk + }; + let stop = Arc::clone(&stop); + let tx = tx.clone(); + let wxid = Arc::clone(&wxid); + let templates = Arc::clone(&templates); + scope.spawn(move || { + for upper in start..end { + if stop.load(Ordering::Relaxed) { + break; + } + let uin = (upper << 8) | xor_key as u32; + let uin_ascii = uin.to_string(); + let digest = md5::compute(uin_ascii.as_bytes()); + if digest.0[0] != suffix[0] || digest.0[1] != suffix[1] { + continue; + } + + let mut input = Vec::with_capacity(uin_ascii.len() + wxid.len()); + input.extend_from_slice(uin_ascii.as_bytes()); + input.extend_from_slice(&wxid); + let aes_hex = format!("{:x}", md5::compute(input)); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&aes_hex.as_bytes()[..16]); + if verify_aes_key(&aes_key, &templates) { + stop.store(true, Ordering::Relaxed); + let _ = tx.send(aes_key); + break; + } + } + }); + } + }); + drop(tx); + Ok(rx.try_iter().next()) +} + +fn hex_prefix_to_bytes(hex: &str) -> Result<[u8; 2]> { + if hex.len() != 4 { + bail!("wxid suffix 不是 4 位 hex: {}", hex); + } + let hi = u8::from_str_radix(&hex[..2], 16)?; + let lo = u8::from_str_radix(&hex[2..], 16)?; + Ok([hi, lo]) +} + +#[cfg(test)] +mod tests { + use super::{derive_key_for_paths, find_existing_kvcomm_dir}; + use super::collect_wxid_candidates; + use crate::attachment::image_key::normalize_wxid; + use aes::cipher::{generic_array::GenericArray, BlockEncrypt, KeyInit}; + use aes::Aes128; + use std::fs; + use std::path::Path; + + fn temp_dir(label: &str) -> std::path::PathBuf { + let mut dir = std::env::temp_dir(); + dir.push(format!( + "wx-cli-image-key-macos-{}-{:?}", + label, + std::thread::current().id() + )); + let _ = fs::remove_dir_all(&dir); + fs::create_dir_all(&dir).unwrap(); + dir + } + + fn write_v2_template(path: &Path, aes_key: &[u8; 16], xor_key: u8, plaintext: &[u8; 16]) { + let cipher = Aes128::new(aes_key.into()); + let mut block = GenericArray::clone_from_slice(plaintext); + cipher.encrypt_block(&mut block); + + let mut data = Vec::new(); + data.extend_from_slice(&crate::attachment::decoder::V2_MAGIC); + data.extend_from_slice(&0u32.to_le_bytes()); + data.extend_from_slice(&0u32.to_le_bytes()); + data.push(0); + data.extend_from_slice(&block); + data.push(0); + data.push(0xD9 ^ xor_key); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(path, data).unwrap(); + } + + #[test] + fn normalize_wxid_matches_expected_shapes() { + assert_eq!(normalize_wxid("wxid_abc_def"), "wxid_abc"); + assert_eq!(normalize_wxid("your_wxid_a1b2"), "your_wxid"); + assert_eq!(normalize_wxid("plain"), "plain"); + } + + #[test] + fn kvcomm_path_detection_works() { + let dir = temp_dir("kvcomm"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + let kvcomm = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm", + ); + fs::create_dir_all(&db_dir).unwrap(); + fs::create_dir_all(&kvcomm).unwrap(); + assert_eq!(find_existing_kvcomm_dir(&db_dir), Some(kvcomm)); + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn derives_key_via_kvcomm() { + let dir = temp_dir("via-kvcomm"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + let attach = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/msg/attach/chat/2026-05/Img", + ); + let kvcomm = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm", + ); + fs::create_dir_all(&db_dir).unwrap(); + fs::create_dir_all(&kvcomm).unwrap(); + fs::write(kvcomm.join("key_42_x.statistic"), b"").unwrap(); + + let digest = format!("{:x}", md5::compute("42your_wxid")); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + write_v2_template( + &attach.join("sample_t.dat"), + &aes_key, + 42, + b"\xFF\xD8\xFFtemplate-001!", + ); + + let derived = derive_key_for_paths(&db_dir, db_dir.parent().unwrap().join("msg/attach").as_path()) + .unwrap(); + assert_eq!(derived.aes_key, aes_key); + assert_eq!(derived.xor_key, 42); + + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn derives_key_via_bruteforce_fallback() { + let dir = temp_dir("via-fallback"); + let suffix = format!("{:x}", md5::compute("42")) + .chars() + .take(4) + .collect::(); + let raw_wxid = format!("mywxid_{}", suffix); + let db_dir = dir.join(format!( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/{}/db_storage", + raw_wxid + )); + let attach = dir.join(format!( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/{}/msg/attach/chat/2026-05/Img", + raw_wxid + )); + fs::create_dir_all(&db_dir).unwrap(); + + let digest = format!("{:x}", md5::compute("42mywxid")); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + for idx in 0..3 { + write_v2_template( + &attach.join(format!("sample{}_t.dat", idx)), + &aes_key, + 42, + b"\xFF\xD8\xFFtemplate-001!", + ); + } + + let derived = derive_key_for_paths(&db_dir, db_dir.parent().unwrap().join("msg/attach").as_path()) + .unwrap(); + assert_eq!(derived.aes_key, aes_key); + assert_eq!(derived.xor_key, 42); + + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn collects_raw_and_normalized_wxid() { + let dir = temp_dir("wxid"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + fs::create_dir_all(&db_dir).unwrap(); + let wxids = collect_wxid_candidates(&db_dir); + assert_eq!(wxids, vec!["your_wxid_a1b2".to_string(), "your_wxid".to_string()]); + let _ = fs::remove_dir_all(dir); + } +} diff --git a/src/attachment/image_key/mod.rs b/src/attachment/image_key/mod.rs index ec4f8ad..74eee30 100644 --- a/src/attachment/image_key/mod.rs +++ b/src/attachment/image_key/mod.rs @@ -1,7 +1,5 @@ //! V2 image AES key 提取 — 平台相关。 //! -//! ⚠️ 此模块由 codex 落地。本文件只放公共 trait + 平台 dispatch 占位。 -//! //! 路径: //! - macOS:磁盘派生(`key__*.statistic` 文件名拿 uin → `md5(str(uin) + wxid)[:16]`) //! + brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24) @@ -9,26 +7,336 @@ //! 反验(`find_image_key.py` / `find_image_key.c` 已写实) //! - Linux:上游空白;当前不实现,遇到 V2 .dat 返回 unsupported 错误 -#[allow(dead_code)] +#[cfg(target_os = "linux")] +pub mod linux; +#[cfg(target_os = "macos")] pub mod macos; -#[allow(dead_code)] +#[cfg(target_os = "windows")] pub mod windows; use anyhow::Result; +use regex::bytes::Regex; +use std::collections::HashSet; +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::OnceLock; + +use crate::attachment::decoder::{detect_image_format, V2_MAGIC}; + +/// V2 图片真正需要的是两份材料: +/// - 16 字节 ASCII AES key +/// - XOR key(macOS 上来自 uin & 0xff,不是总能硬编码成 0x88) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ImageKeyMaterial { + pub aes_key: [u8; 16], + pub xor_key: u8, +} /// 单个 wxid 的 V2 image key 提取接口。 /// -/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时是稳定的)。 +/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时通常稳定)。 pub trait ImageKeyProvider { - /// 返回当前 wxid 的 16 字节 AES key。失败要带可执行的诊断(例如「macOS 没找到 - /// kvcomm cache,请确认微信已登录」/「Windows 进程不在跑」)。 - fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]>; + fn get_key(&self, wxid: &str) -> Result; + + fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]> { + Ok(self.get_key(wxid)?.aes_key) + } + + fn get_xor_key(&self, wxid: &str) -> Result { + Ok(self.get_key(wxid)?.xor_key) + } } -/// 平台默认实现(codex 后续填)。 -/// -/// 调用方目前可以直接传 `None`,让 resolver 在遇到 V2 .dat 时报「image key 未提取」错。 +/// 平台默认实现。 pub fn default_provider() -> Option> { - // TODO(codex): 按 cfg(target_os) 返回 macOS / Windows / 不支持 + #[cfg(target_os = "macos")] + { + return Some(Box::new(macos::MacosImageKeyProvider::from_current_config())); + } + #[cfg(target_os = "windows")] + { + return Some(Box::new(windows::WindowsImageKeyProvider::from_current_config())); + } + #[cfg(target_os = "linux")] + { + return Some(Box::new(linux::LinuxImageKeyProvider)); + } + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] + { + None + } +} + +pub(crate) fn configured_db_dir_for_wxid(configured_db_dir: &Path, requested_wxid: &str) -> PathBuf { + if requested_wxid.trim().is_empty() { + return configured_db_dir.to_path_buf(); + } + + let configured_leaf = wxid_from_db_dir(configured_db_dir); + if let Some(leaf) = configured_leaf.as_deref() { + if same_wxid(leaf, requested_wxid) { + return configured_db_dir.to_path_buf(); + } + } + + xwechat_files_root(configured_db_dir) + .map(|root| root.join(requested_wxid).join("db_storage")) + .unwrap_or_else(|| configured_db_dir.to_path_buf()) +} + +pub(crate) fn wxid_from_db_dir(db_dir: &Path) -> Option { + let mut components = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()); + while let Some(component) = components.next() { + if component == "xwechat_files" { + return components.next(); + } + } None } + +pub(crate) fn xwechat_files_root(db_dir: &Path) -> Option { + let parts: Vec<_> = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()) + .collect(); + let idx = parts.iter().position(|part| part == "xwechat_files")?; + Some(join_components(&parts[..=idx])) +} + +pub(crate) fn normalize_wxid(raw: &str) -> String { + let raw = raw.trim(); + if raw.is_empty() { + return String::new(); + } + if let Some(stripped) = raw.strip_prefix("wxid_") { + let head = stripped.split('_').next().unwrap_or(stripped); + return format!("wxid_{}", head); + } + if let Some((base, suffix)) = raw.rsplit_once('_') { + if suffix.len() == 4 && suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) { + return base.to_string(); + } + } + raw.to_string() +} + +pub(crate) fn same_wxid(a: &str, b: &str) -> bool { + a == b || normalize_wxid(a) == normalize_wxid(b) +} + +pub(crate) fn join_components(parts: &[String]) -> PathBuf { + let mut out = if parts.first().map(|part| part.is_empty()).unwrap_or(false) { + PathBuf::from("/") + } else { + PathBuf::new() + }; + for part in parts { + if part.is_empty() { + continue; + } + out.push(part); + } + out +} + +pub(crate) fn attach_root_for_db_dir(db_dir: &Path) -> PathBuf { + db_dir + .parent() + .map(|base| base.join("msg").join("attach")) + .unwrap_or_else(|| PathBuf::from("msg/attach")) +} + +pub(crate) fn find_v2_template_ciphertexts( + attach_dir: &Path, + max_templates: usize, + max_files: usize, +) -> Result> { + if !attach_dir.is_dir() { + return Ok(Vec::new()); + } + + let mut out = collect_templates_with_suffix(attach_dir, "_t.dat", max_templates, max_files)?; + if out.is_empty() { + out = collect_templates_with_suffix(attach_dir, ".dat", max_templates, max_files)?; + } + Ok(out) +} + +pub(crate) fn derive_xor_key_from_v2_dat( + attach_dir: &Path, + sample: usize, + min_samples: usize, +) -> Result> { + if !attach_dir.is_dir() { + return Ok(None); + } + let mut votes = Vec::new(); + visit_files(attach_dir, &mut |path| -> Result { + let Some(name) = path.file_name().and_then(|value| value.to_str()) else { + return Ok(false); + }; + if !name.ends_with(".dat") { + return Ok(false); + } + + let meta = fs::metadata(path)?; + if meta.len() < 0x20 { + return Ok(false); + } + + let bytes = fs::read(path)?; + if bytes.starts_with(&V2_MAGIC) { + let last = *bytes.last().unwrap(); + votes.push(last ^ 0xD9); + if votes.len() >= sample { + return Ok(true); + } + } + Ok(false) + })?; + + if votes.len() < min_samples { + return Ok(None); + } + + let mut counts = [0usize; 256]; + for vote in &votes { + counts[*vote as usize] += 1; + } + let (xor_key, top_votes) = counts + .iter() + .enumerate() + .max_by_key(|(_, count)| *count) + .map(|(idx, count)| (idx as u8, *count)) + .expect("votes 非空"); + Ok(Some((xor_key, top_votes, votes.len()))) +} + +pub(crate) fn verify_aes_key(aes_key: &[u8; 16], templates: &[[u8; 16]]) -> bool { + !templates.is_empty() + && templates + .iter() + .all(|template| decrypt_template_block(aes_key, template).is_some()) +} + +pub(crate) fn ascii_alnum_candidates<'a>(buf: &'a [u8], len: usize) -> Vec<&'a [u8]> { + let re = match len { + 16 => regex16(), + 32 => regex32(), + _ => return Vec::new(), + }; + + re.find_iter(buf) + .filter_map(|matched| { + let start = matched.start(); + let end = matched.end(); + let left_ok = start == 0 || !buf[start - 1].is_ascii_alphanumeric(); + let right_ok = end == buf.len() || !buf[end].is_ascii_alphanumeric(); + (left_ok && right_ok).then_some(&buf[start..end]) + }) + .collect() +} + +fn collect_templates_with_suffix( + dir: &Path, + suffix: &str, + max_templates: usize, + max_files: usize, +) -> Result> { + let mut out = Vec::new(); + let mut seen = HashSet::new(); + let mut examined = 0usize; + visit_files(dir, &mut |path| -> Result { + let Some(name) = path.file_name().and_then(|value| value.to_str()) else { + return Ok(false); + }; + if !name.ends_with(suffix) { + return Ok(false); + } + examined += 1; + let bytes = fs::read(path)?; + if bytes.len() >= 0x1F && bytes.starts_with(&V2_MAGIC) { + let template: [u8; 16] = bytes[0x0F..0x1F].try_into().unwrap(); + if seen.insert(template) { + out.push(template); + if out.len() >= max_templates { + return Ok(true); + } + } + } + Ok(examined >= max_files && !out.is_empty()) + })?; + Ok(out) +} + +fn visit_files(dir: &Path, f: &mut F) -> Result +where + F: FnMut(&Path) -> Result, +{ + let mut entries: Vec = fs::read_dir(dir)? + .flatten() + .map(|entry| entry.path()) + .collect(); + entries.sort(); + + for path in entries { + if path.is_dir() { + if visit_files(&path, f)? { + return Ok(true); + } + continue; + } + if f(&path)? { + return Ok(true); + } + } + Ok(false) +} + +fn decrypt_template_block(aes_key: &[u8; 16], ciphertext: &[u8; 16]) -> Option<&'static str> { + use aes::cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit}; + + let cipher = aes::Aes128::new(aes_key.into()); + let mut block = GenericArray::clone_from_slice(ciphertext); + cipher.decrypt_block(&mut block); + let block: [u8; 16] = block.as_slice().try_into().ok()?; + let format = detect_image_format(&block); + (format != "bin").then_some(format) +} + +fn regex16() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"[A-Za-z0-9]{16}").unwrap()) +} + +fn regex32() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"[A-Za-z0-9]{32}").unwrap()) +} + +#[cfg(test)] +mod tests { + use super::{ascii_alnum_candidates, normalize_wxid, same_wxid}; + + #[test] + fn regex_candidates_respect_boundaries() { + let buf = b"xx 0123456789ABCDef yy"; + let hits = ascii_alnum_candidates(buf, 16); + assert_eq!(hits, vec![&buf[3..19]]); + } + + #[test] + fn regex_candidates_ignore_embedded_runs() { + let buf = b"x0123456789ABCDefz"; + assert!(ascii_alnum_candidates(buf, 16).is_empty()); + } + + #[test] + fn wxid_normalization_matches_expected_forms() { + assert_eq!(normalize_wxid("wxid_abc_def"), "wxid_abc"); + assert_eq!(normalize_wxid("your_wxid_a1b2"), "your_wxid"); + assert!(same_wxid("your_wxid_a1b2", "your_wxid")); + } +} diff --git a/src/attachment/image_key/windows.rs b/src/attachment/image_key/windows.rs index 1a0080a..0b7acd8 100644 --- a/src/attachment/image_key/windows.rs +++ b/src/attachment/image_key/windows.rs @@ -1,10 +1,238 @@ //! Windows V2 image AES key 提取。 //! -//! 扫 `Weixin.exe` 进程内存,匹配模式 `(?, + cache: Mutex>, +} + +impl WindowsImageKeyProvider { + pub fn from_current_config() -> Self { + let configured_db_dir = config::load_config() + .map(|cfg| cfg.db_dir) + .map_err(|err| err.to_string()); + Self { + configured_db_dir, + cache: Mutex::new(HashMap::new()), + } + } +} + +impl ImageKeyProvider for WindowsImageKeyProvider { + fn get_key(&self, wxid: &str) -> Result { + let cache_key = wxid.trim().to_string(); + if let Some(found) = self.cache.lock().unwrap().get(&cache_key).copied() { + return Ok(found); + } + + let configured_db_dir = self + .configured_db_dir + .as_ref() + .map_err(|err| anyhow::anyhow!("读取 config.db_dir 失败: {}", err))?; + let db_dir = configured_db_dir_for_wxid(configured_db_dir, wxid); + let attach_dir = attach_root_for_db_dir(&db_dir); + let key = derive_key_for_paths(&attach_dir)?; + self.cache.lock().unwrap().insert(cache_key, key); + Ok(key) + } +} + +fn derive_key_for_paths(attach_dir: &std::path::Path) -> Result { + let templates = find_v2_template_ciphertexts(attach_dir, 3, 64)?; + if templates.is_empty() { + bail!("在 {} 下找不到 V2 模板文件", attach_dir.display()); + } + let xor_key = derive_xor_key_from_v2_dat(attach_dir, 10, 3)? + .map(|(key, _, _)| key) + .unwrap_or(0x88); + + let pid = find_wechat_pid().context("找不到 Weixin.exe 进程,请确认微信正在运行")?; + let process = unsafe { + OpenProcess(PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, false, pid) + .context("OpenProcess 失败,请以管理员权限运行")? + }; + + let aes_key = scan_memory_for_key(process, &templates); + unsafe { + let _ = CloseHandle(process); + } + + Ok(ImageKeyMaterial { + aes_key: aes_key?, + xor_key, + }) +} + +fn find_wechat_pid() -> Option { + let snapshot = unsafe { CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0).ok()? }; + let mut entry = PROCESSENTRY32 { + dwSize: std::mem::size_of::() as u32, + ..Default::default() + }; + + unsafe { + if Process32First(snapshot, &mut entry).is_err() { + let _ = CloseHandle(snapshot); + return None; + } + loop { + let name = + std::ffi::CStr::from_ptr(entry.szExeFile.as_ptr() as *const i8).to_string_lossy(); + if name.eq_ignore_ascii_case("Weixin.exe") { + let pid = entry.th32ProcessID; + let _ = CloseHandle(snapshot); + return Some(pid); + } + if Process32Next(snapshot, &mut entry).is_err() { + break; + } + } + let _ = CloseHandle(snapshot); + } + None +} + +fn scan_memory_for_key(process: HANDLE, templates: &[[u8; 16]]) -> Result<[u8; 16]> { + let mut seen = HashSet::<[u8; 16]>::new(); + let mut address = 0usize; + + loop { + let mut mbi = MEMORY_BASIC_INFORMATION::default(); + let ret = unsafe { + VirtualQueryEx( + process, + Some(address as *const _), + &mut mbi, + std::mem::size_of::(), + ) + }; + if ret == 0 { + break; + } + + let base = mbi.BaseAddress as usize; + let size = mbi.RegionSize; + if mbi.State == MEM_COMMIT && is_candidate_page(mbi.Protect.0) && size <= MAX_REGION_SIZE { + if let Some(aes_key) = scan_region(process, base, size, templates, &mut seen)? { + return Ok(aes_key); + } + } + + address = base.saturating_add(size); + if address == 0 { + break; + } + } + + bail!("Windows 进程内存里没有找到可验证的 V2 AES key") +} + +fn scan_region( + process: HANDLE, + base: usize, + size: usize, + templates: &[[u8; 16]], + seen: &mut HashSet<[u8; 16]>, +) -> Result> { + let overlap = 31usize; + let mut offset = 0usize; + + while offset < size { + let chunk_size = std::cmp::min(CHUNK_SIZE, size - offset); + let addr = base + offset; + let mut buf = vec![0u8; chunk_size]; + let mut bytes_read = 0usize; + + let ok = unsafe { + ReadProcessMemory( + process, + addr as *const _, + buf.as_mut_ptr() as *mut _, + chunk_size, + Some(&mut bytes_read), + ) + .is_ok() + }; + + if ok && bytes_read > 0 { + buf.truncate(bytes_read); + if let Some(key) = scan_candidate_buffer(&buf, templates, seen) { + return Ok(Some(key)); + } + } + + offset += if chunk_size > overlap { + chunk_size - overlap + } else { + chunk_size + }; + } + + Ok(None) +} + +fn scan_candidate_buffer( + buf: &[u8], + templates: &[[u8; 16]], + seen: &mut HashSet<[u8; 16]>, +) -> Option<[u8; 16]> { + for candidate in ascii_alnum_candidates(buf, 32) { + let mut key = [0u8; 16]; + key.copy_from_slice(&candidate[..16]); + if seen.insert(key) && verify_aes_key(&key, templates) { + return Some(key); + } + } + for candidate in ascii_alnum_candidates(buf, 16) { + let mut key = [0u8; 16]; + key.copy_from_slice(candidate); + if seen.insert(key) && verify_aes_key(&key, templates) { + return Some(key); + } + } + None +} + +fn is_candidate_page(protect: u32) -> bool { + if protect == PAGE_NOACCESS.0 || (protect & PAGE_GUARD.0) != 0 { + return false; + } + let base = protect & !(PAGE_GUARD.0 | PAGE_NOCACHE.0 | PAGE_WRITECOMBINE.0); + matches!( + base, + value if value == PAGE_READWRITE.0 + || value == PAGE_WRITECOPY.0 + || value == PAGE_EXECUTE_READWRITE.0 + || value == PAGE_EXECUTE_WRITECOPY.0 + ) +} From 2d88c9542dcba52c21405416c57b97ab47febe8b Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:40:57 +0800 Subject: [PATCH 3/5] feat(attachment): wire wx attachments / wx extract end-to-end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 把 V1 (legacy XOR + V1 fixed-AES) + 平台相关 V2 (macOS / Windows) image 解 密能力一路接到 CLI: - ipc: 新增 Attachments / Extract 两个 Request variant - daemon/server: dispatch 路由到 query::q_attachments / q_extract - daemon/cache: DbCache::db_dir() 公开,让 resolver 推 wxchat_base - daemon/query: q_attachments 走 Msg_ 表按 (local_type & 0xFFFFFFFF) IN (...) 过滤、按 ts DESC 全局排序后分页,返回不透明 attachment_id; q_extract 解码 attachment_id → 查 message_resource.db → 找本地 .dat → 按 magic 分发 v1/v2 解码 → 写盘。bridge 用 ImageKeyMaterial.{aes_key, xor_key}(codex 实测真实账号 xor_key=0xa2,不能硬编码 0x88) - cli: 新增 wx attachments / wx extract 两个子命令,flag 风格与现有 history / biz-articles 对齐 - README + SKILL: 加附件提取章节,含三档解码档位与 V2 image key 派生说明 --- README.md | 29 ++++ SKILL.md | 28 ++++ src/cli/attachments.rs | 42 ++++++ src/cli/extract.rs | 25 ++++ src/cli/mod.rs | 46 ++++++ src/daemon/cache.rs | 6 + src/daemon/query.rs | 312 +++++++++++++++++++++++++++++++++++++++++ src/daemon/server.rs | 12 ++ src/ipc.rs | 26 ++++ 9 files changed, 526 insertions(+) create mode 100644 src/cli/attachments.rs create mode 100644 src/cli/extract.rs diff --git a/README.md b/README.md index 8a8e23b..35589cd 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,35 @@ wx biz-articles --json | jq '.[].url' # 下游消费 URL 每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 +### 附件提取(图片 / 视频 / 文件 / 语音) + +聊天里的附件本体存在 `xwechat_files//msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。 + +```bash +# 1) 列出会话里的附件,先拿到不透明的 attachment_id(默认 image,可多选) +wx attachments "张三" +wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 + +# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等) +wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/x.jpg --overwrite +``` + +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。 + +`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 + +支持的解码档位: +- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) +- **V1 fixed-AES**(`07 08 V1 08 07`):AES-128-ECB + 固定 key `cfcd208495d565ef` +- **V2 AES + XOR**(`07 08 V2 08 07`):AES-128-ECB + raw + XOR;AES key 平台派生 + +V2 image key 提取: +- **macOS**:`kvcomm` cache(`key__*.statistic` 文件名取 uin → `md5(str(uin) + wxid)[:16]`)+ brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24);xor_key = `uin & 0xff`,**不是硬编码 0x88** +- **Windows**:扫 `Weixin.exe` 内存匹配 `[A-Za-z0-9]{32|16}` 候选,按 V2 template ciphertext-block 反验 +- **Linux**:上游空白,遇到 V2 .dat 会报 unsupported + ### 联系人 & 群组 ```bash diff --git a/SKILL.md b/SKILL.md index fe7418c..ddf02e1 100644 --- a/SKILL.md +++ b/SKILL.md @@ -242,6 +242,34 @@ wx biz-articles --since 2026-05-10 --json | jq '.[].url' 每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 +### 附件提取(图片 / 视频 / 文件 / 语音) + +聊天里的图片/视频/文件本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: + +```bash +# 1) 先列出附件,拿到不透明的 attachment_id(默认 image,可多选) +wx attachments "张三" +wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 + +# 2) 用 attachment_id 把单个资源解密写到指定路径 +wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/x.jpg --overwrite +``` + +`attachments` 输出每条带:`attachment_id` / `kind`(image/voice/video/file)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。 + +`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 + +支持的解码档位: +- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) +- **V1 fixed-AES**(`07 08 V1 08 07`):AES-128-ECB + 固定 key `cfcd208495d565ef` +- **V2 AES + XOR**(`07 08 V2 08 07`):AES-128-ECB + raw + XOR;AES key 平台派生 + +V2 image key 提取(macOS / Windows 自动;Linux 暂不支持): +- macOS:`kvcomm` cache(`key__*.statistic` 文件名取 uin → `md5(str(uin) + wxid)[:16]`)+ brute-force fallback;`xor_key = uin & 0xff` +- Windows:扫 `Weixin.exe` 内存匹配 `[A-Za-z0-9]{32|16}` 候选,按 V2 template ciphertext-block 反验 + ### 收藏与统计 ```bash diff --git a/src/cli/attachments.rs b/src/cli/attachments.rs new file mode 100644 index 0000000..662c256 --- /dev/null +++ b/src/cli/attachments.rs @@ -0,0 +1,42 @@ +use anyhow::Result; + +use crate::ipc::Request; +use super::history::{parse_time, parse_time_end}; +use super::output::{print_value, resolve}; +use super::transport; + +/// `wx attachments` — 列出指定会话的附件消息(默认 image,可多选)。 +/// +/// 输出每条 `attachment_id`,再传给 `wx extract` 才真正读 message_resource.db +/// 与本地 .dat 解码。这一步只查 `Msg_` 表,几千条群聊也能秒返。 +pub fn cmd_attachments( + chat: String, + kinds: Vec, + limit: usize, + offset: usize, + since: Option, + until: Option, + json: bool, +) -> Result<()> { + let since_ts = since.as_deref().map(parse_time).transpose()?; + let until_ts = until.as_deref().map(parse_time_end).transpose()?; + + // CLI 收上来的 Vec 为空时按默认(image)走,让 daemon 决定 fallback。 + let kinds_param = if kinds.is_empty() { None } else { Some(kinds) }; + + let req = Request::Attachments { + chat, + kinds: kinds_param, + limit, + offset, + since: since_ts, + until: until_ts, + }; + let resp = transport::send(req)?; + let data = resp + .data + .get("attachments") + .cloned() + .unwrap_or(serde_json::Value::Array(vec![])); + print_value(&data, &resolve(json)) +} diff --git a/src/cli/extract.rs b/src/cli/extract.rs new file mode 100644 index 0000000..a0eba0d --- /dev/null +++ b/src/cli/extract.rs @@ -0,0 +1,25 @@ +use anyhow::Result; + +use crate::ipc::Request; +use super::output::{print_value, resolve}; +use super::transport; + +/// `wx extract` — 把单个 `attachment_id` 对应的资源解密写到指定路径。 +/// +/// daemon 端:解析 `attachment_id` → 查 `message_resource.db` 拿 file md5 → +/// 在 `/msg/attach/...` 找 .dat → 按 magic 分发到 v1/v2 解码器 → +/// 写出真实图片/文件。 +pub fn cmd_extract( + attachment_id: String, + output: String, + overwrite: bool, + json: bool, +) -> Result<()> { + let req = Request::Extract { + attachment_id, + output, + overwrite, + }; + let resp = transport::send(req)?; + print_value(&resp.data, &resolve(json)) +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index b9e71fd..5fe4e8c 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,5 +1,7 @@ mod init; +pub mod attachments; pub mod biz_articles; +pub mod extract; pub mod sessions; pub mod history; pub mod search; @@ -262,6 +264,44 @@ enum Commands { #[arg(long)] json: bool, }, + /// 列出某会话的附件(图片 / 视频 / 文件 / 语音),返回不透明 attachment_id + Attachments { + /// 会话名称(联系人显示名 / wxid / @chatroom username 都可以) + chat: String, + /// 类型(多选,默认 image)。可选:image / voice / video / file + #[arg(long = "kind", value_name = "KIND", + value_parser = ["image", "voice", "video", "file", "audio", "img"])] + kinds: Vec, + /// 显示数量 + #[arg(short = 'n', long, default_value = "50")] + limit: usize, + /// 分页偏移 + #[arg(long, default_value = "0")] + offset: usize, + /// 起始时间 YYYY-MM-DD + #[arg(long)] + since: Option, + /// 结束时间 YYYY-MM-DD + #[arg(long)] + until: Option, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, + /// 把单个 attachment_id 对应的资源解密写到指定文件路径 + Extract { + /// 由 `wx attachments` 输出的不透明 ID(base64url 字符串) + attachment_id: String, + /// 输出文件路径(绝对或相对当前工作目录均可;扩展名建议保留为 .jpg 等) + #[arg(short = 'o', long)] + output: String, + /// 目标已存在时覆盖 + #[arg(long)] + overwrite: bool, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, /// 管理 wx-daemon Daemon { #[command(subcommand)] @@ -329,6 +369,12 @@ fn dispatch(cli: Cli) -> Result<()> { Commands::BizArticles { limit, account, since, until, unread, json } => { biz_articles::cmd_biz_articles(limit, account, since, until, unread, json) } + Commands::Attachments { chat, kinds, limit, offset, since, until, json } => { + attachments::cmd_attachments(chat, kinds, limit, offset, since, until, json) + } + Commands::Extract { attachment_id, output, overwrite, json } => { + extract::cmd_extract(attachment_id, output, overwrite, json) + } Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd), } } diff --git a/src/daemon/cache.rs b/src/daemon/cache.rs index 9801396..56e307c 100644 --- a/src/daemon/cache.rs +++ b/src/daemon/cache.rs @@ -54,6 +54,12 @@ impl DbCache { Ok(cache) } + /// 数据库根目录(即 `/db_storage`)。 + /// 上层(attachment resolver)需要 `db_dir.parent()` 来定位 `msg/attach/...` 解密图片。 + pub fn db_dir(&self) -> &Path { + &self.db_dir + } + fn cache_file_path(&self, rel_key: &str) -> PathBuf { let hash = format!("{:x}", md5::compute(rel_key.as_bytes())); self.cache_dir.join(format!("{}.db", hash)) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 167d88a..5a5d1b9 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -3285,6 +3285,318 @@ pub async fn q_biz_articles( Ok(json!({ "count": results.len(), "articles": results })) } +// ─── 附件(图片 / 视频 / 文件 / 语音)查询与提取 ───────────────────────────────── +// +// 设计要点: +// - `q_attachments` 只走 `Msg_` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤 +// 出附件消息行,再编出 `attachment_id`。**不**去翻 `message_resource.db`,因为列出动作 +// 要可枚举几千条;resource lookup 留到 `q_extract` 才做。 +// - `q_extract` 走完整链:`AttachmentId` → `message_resource.db` 查 md5 → +// `/msg/attach/...` 找 .dat → 按 magic 分发到 v1/v2 decoder → 写盘。 +// - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。 +// 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。 + +/// 列出某会话内的附件消息(默认 image,可多选)。返回每条的 `attachment_id`, +/// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。 +pub async fn q_attachments( + db: &DbCache, + names: &Names, + chat: &str, + kinds: Option>, + limit: usize, + offset: usize, + since: Option, + until: Option, +) -> Result { + use crate::attachment::{AttachmentId, AttachmentKind}; + + let username = resolve_username(chat, names) + .with_context(|| format!("找不到联系人: {}", chat))?; + let display = names.display(&username); + let chat_type = chat_type_of(&username, names); + let is_group = chat_type == "group"; + + // 解析 kinds → 低 32 bit local_type 集合 + let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?; + if kind_filters.is_empty() { + anyhow::bail!("kinds 为空 — 至少传一种 image/video/file/voice"); + } + let lo32_types: Vec = kind_filters.iter().map(|(_, t)| *t).collect(); + // local_type → AttachmentKind 反查(mask 完后定 kind) + let type_to_kind: HashMap = kind_filters.iter() + .map(|(k, t)| (*t, *k)) + .collect(); + + let tables = find_msg_tables(db, names, &username).await?; + if tables.is_empty() { + anyhow::bail!("找不到 {} 的消息记录", display); + } + + // 群聊需要 sender 显示名 + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; + + let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new(); + // 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx) + for (db_idx, (db_path, table_name)) in tables.iter().enumerate() { + let path = db_path.clone(); + let tname = table_name.clone(); + let uname = username.clone(); + let is_group2 = is_group; + let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); + let lo32_types2 = lo32_types.clone(); + let since2 = since; + let until2 = until; + // per-DB 软上限避免巨群全量加载 + let per_db_cap = (offset + limit).max(limit) * 2; + let db_idx2 = db_idx as i64; + + let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&path)?; + let id2u = load_id2u(&conn); + + // local_type 在 DB 里可能带高位 flag,过滤要 mask 低 32 bit + let placeholders = lo32_types2.iter().map(|_| "?").collect::>().join(","); + let mut clauses: Vec = vec![ + format!("(local_type & 4294967295) IN ({})", placeholders), + ]; + let mut params: Vec> = lo32_types2.iter() + .map(|t| Box::new(*t) as Box) + .collect(); + if let Some(s) = since2 { + clauses.push("create_time >= ?".into()); + params.push(Box::new(s)); + } + if let Some(u) = until2 { + clauses.push("create_time <= ?".into()); + params.push(Box::new(u)); + } + let where_clause = format!("WHERE {}", clauses.join(" AND ")); + + let sql = format!( + "SELECT local_id, local_type, create_time, real_sender_id, + message_content, WCDB_CT_message_content + FROM [{}] {} ORDER BY create_time DESC LIMIT ?", + tname, where_clause + ); + params.push(Box::new(per_db_cap as i64)); + + let params_ref: Vec<&dyn rusqlite::types::ToSql> = + params.iter().map(|p| p.as_ref()).collect(); + let mut stmt = conn.prepare(&sql)?; + let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = stmt + .query_map(params_ref.as_slice(), |row| { + let local_id: i64 = row.get(0)?; + let raw_type: i64 = row.get(1)?; + let lo32 = (raw_type as u64 & 0xFFFFFFFF) as i64; + let ts: i64 = row.get(2)?; + let real_sender_id: i64 = row.get(3)?; + let content_bytes = get_content_bytes(row, 4); + let ct: i64 = row.get::<_, i64>(5).unwrap_or(0); + let content = decompress_message(&content_bytes, ct); + let sender = if is_group2 { + sender_label(real_sender_id, &content, true, &uname, + &id2u, &names_map, &group_nicknames2) + } else { + String::new() + }; + Ok((local_id, lo32, ts, real_sender_id, sender, ts, db_idx2)) + })? + .filter_map(|r| r.ok()) + .collect(); + Ok::<_, anyhow::Error>(rows) + }) + .await??; + all_rows.extend(rows); + } + + // 全局按 ts DESC 排序后分页 + all_rows.sort_by_key(|r| std::cmp::Reverse(r.5)); + let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect(); + + // 翻成 JSON + let mut results: Vec = Vec::with_capacity(paged.len()); + for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged { + let kind = type_to_kind.get(&lo32).copied() + .unwrap_or(AttachmentKind::Image); // 理论不会 fallthrough + let id = AttachmentId { + v: 1, + chat: username.clone(), + local_id, + create_time: ts, + kind, + db: None, + }; + let id_str = id.encode()?; + + let mut row = json!({ + "attachment_id": id_str, + "kind": kind.as_str(), + "type": fmt_type(lo32), + "local_id": local_id, + "timestamp": ts, + "time": fmt_time(ts, "%Y-%m-%d %H:%M"), + }); + if is_group && !sender.is_empty() { + row["sender"] = Value::String(sender); + } + results.push(row); + } + + Ok(json!({ + "chat": display, + "username": username, + "is_group": is_group, + "chat_type": chat_type, + "count": results.len(), + "attachments": results, + })) +} + +/// 解码 attachment_id → 查 message_resource.db → 找本地 .dat → 解密 → 写盘。 +pub async fn q_extract( + db: &DbCache, + _names: &Names, + attachment_id: &str, + output: &str, + overwrite: bool, +) -> Result { + use crate::attachment::{ + attachment_id::AttachmentId, + decoder::{self, V2KeyMaterial}, + image_key, + resolver, + }; + + let id = AttachmentId::decode(attachment_id) + .context("解析 attachment_id 失败(不是合法 base64url(json)?)")?; + + let output_path = std::path::PathBuf::from(output); + if output_path.exists() && !overwrite { + anyhow::bail!( + "目标已存在:{}(加 --overwrite 覆盖)", + output_path.display() + ); + } + if let Some(parent) = output_path.parent() { + if !parent.as_os_str().is_empty() { + tokio::fs::create_dir_all(parent).await + .with_context(|| format!("创建输出目录失败:{}", parent.display()))?; + } + } + + // 1) 拿 message_resource.db + let resource_path = db.get("message/message_resource.db").await? + .context("无法解密 message_resource.db(请确认 all_keys.json 包含该 DB 的密钥)")?; + + // 2) 推 wxchat_base = db_dir.parent(),再拼 attach_root + let wxchat_base = db.db_dir().parent() + .ok_or_else(|| anyhow::anyhow!("db_dir 没有 parent,无法推断 xwechat_files 根目录"))? + .to_path_buf(); + let attach_root = resolver::attach_root_for(&wxchat_base); + + // 3) blocking pool 跑 resolver + 读盘 + 解码 + let id_for_task = id.clone(); + let resource_path2 = resource_path.clone(); + let attach_root2 = attach_root.clone(); + let wxchat_base2 = wxchat_base.clone(); + let output_path2 = output_path.clone(); + + let report: Value = tokio::task::spawn_blocking(move || -> Result { + let resolved = resolver::resolve_blocking(&id_for_task, &resource_path2, &attach_root2)?; + + let dat_bytes = std::fs::read(&resolved.dat_path) + .with_context(|| format!("读取 .dat 失败:{}", resolved.dat_path.display()))?; + + // V2 image key — 平台相关。`ImageKeyMaterial` 同时给 aes_key + xor_key。 + // xor_key 不能硬编码 0x88:实测 macOS 真实账号上是 `uin & 0xff` 派生的(0xa2 等), + // 所以这里桥接时必须把 provider 的 xor_key 透传给 V2KeyMaterial。 + // 缺 key 时让 decoder 自己抛带诊断的错。 + let provider = image_key::default_provider(); + let key_material = if let Some(p) = provider.as_ref() { + // 从 wxchat_base 末段拿 wxid + let wxid = wxchat_base2.file_name() + .and_then(|s| s.to_str()) + .unwrap_or_default() + .to_string(); + if wxid.is_empty() { + None + } else { + match p.get_key(&wxid) { + Ok(km) => Some(km), + Err(e) => { + eprintln!("[extract] image key 提取失败 (wxid={}): {} — V2 文件将无法解码", wxid, e); + None + } + } + } + } else { + None + }; + let v2_key = match key_material.as_ref() { + Some(km) => V2KeyMaterial { aes_key: Some(&km.aes_key), xor_key: km.xor_key }, + None => V2KeyMaterial::default(), + }; + + let decoded = decoder::dispatch(&dat_bytes, v2_key)?; + + // 写盘 + std::fs::write(&output_path2, &decoded.data) + .with_context(|| format!("写出文件失败:{}", output_path2.display()))?; + + Ok(json!({ + "ok": true, + "attachment_id": attachment_id_str(&id_for_task)?, + "kind": id_for_task.kind.as_str(), + "md5": resolved.md5, + "dat_path": resolved.dat_path.display().to_string(), + "dat_size": resolved.size, + "output": output_path2.display().to_string(), + "output_size": decoded.data.len(), + "format": decoded.format, + "decoder": decoded.decoder, + })) + }).await??; + + Ok(report) +} + +/// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。 +/// 缺省(None / 空)按 image 处理。 +fn parse_attachment_kinds( + kinds: Option<&[String]>, +) -> Result> { + use crate::attachment::AttachmentKind; + let raw = kinds.unwrap_or(&[]); + if raw.is_empty() { + return Ok(vec![(AttachmentKind::Image, 3)]); + } + let mut out: Vec<(AttachmentKind, i64)> = Vec::with_capacity(raw.len()); + let mut seen = HashSet::<&'static str>::new(); + for k in raw { + let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { + "image" | "img" => (AttachmentKind::Image, 3), + "voice" | "audio" => (AttachmentKind::Voice, 34), + "video" => (AttachmentKind::Video, 43), + "file" => (AttachmentKind::File, 49), + other => anyhow::bail!("未知附件类型:{}(支持 image/voice/video/file)", other), + }; + if seen.insert(kind.as_str()) { + out.push((kind, t)); + } + } + Ok(out) +} + +fn attachment_id_str(id: &crate::attachment::AttachmentId) -> Result { + id.encode() +} + #[cfg(test)] mod biz_tests { use super::*; diff --git a/src/daemon/server.rs b/src/daemon/server.rs index 3b06727..9f54076 100644 --- a/src/daemon/server.rs +++ b/src/daemon/server.rs @@ -240,5 +240,17 @@ async fn dispatch( Err(e) => Response::err(e.to_string()), } } + Attachments { chat, kinds, limit, offset, since, until } => { + match query::q_attachments(db, &names_arc, &chat, kinds, limit, offset, since, until).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } + Extract { attachment_id, output, overwrite } => { + match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } } } diff --git a/src/ipc.rs b/src/ipc.rs index c478ee4..78d6278 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -131,6 +131,32 @@ pub enum Request { }, /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) ReloadConfig, + /// 列出某个会话里的附件(图片 / 视频 / 文件 / 语音) + /// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体 + Attachments { + chat: String, + /// 类型过滤:image / video / file / voice,多选;缺省返回 image + #[serde(default, skip_serializing_if = "Option::is_none")] + kinds: Option>, + #[serde(default = "default_limit_50")] + limit: usize, + #[serde(default)] + offset: usize, + #[serde(skip_serializing_if = "Option::is_none")] + since: Option, + #[serde(skip_serializing_if = "Option::is_none")] + until: Option, + }, + /// 提取(解密)单个附件的本体到指定路径 + Extract { + /// `Attachments` 返回的不透明 ID + attachment_id: String, + /// 写入的绝对路径(daemon 直接写盘,不经 socket 传 binary) + output: String, + /// 已存在时是否覆盖 + #[serde(default)] + overwrite: bool, + }, } From 7feacc63714fd4f446c1548147dd813a57f1f9e6 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 18:48:46 +0800 Subject: [PATCH 4/5] fix(daemon): drop redundant `ok` from extract payload (collides with Response.ok) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Response 用 #[serde(flatten)] 把 q_* 返回的 Value 拼到 `{ok, error, ...data}` 里,q_extract 里再塞一个 `"ok": true` 就会在 wire 上写出两个同名 key,CLI 端 `serde_json::from_str::` 直接报「duplicate field `ok`」,对外 表现是「extract 失败 / 解析 daemon 响应失败」,但 daemon 实际已经把图解出来 了。其他 q_* 都没塞 ok(biz_articles / sessions / history 等),保持一致。 --- src/daemon/query.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 5a5d1b9..6bd46b2 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -3549,9 +3549,11 @@ pub async fn q_extract( std::fs::write(&output_path2, &decoded.data) .with_context(|| format!("写出文件失败:{}", output_path2.display()))?; + // 注意:不要在这里塞 `ok: true`。dispatch 会用 Response::ok(v) 包一层, + // Response 的 `data: Value` 字段是 #[serde(flatten)] 写出的,本 payload + // 的 `ok` 会和 Response 自带的 `ok` 在线上拼成两个同名 key,CLI 反序列化时 + // serde_json 直接报 "duplicate field",业务请求看上去像 daemon 解析失败。 Ok(json!({ - "ok": true, - "attachment_id": attachment_id_str(&id_for_task)?, "kind": id_for_task.kind.as_str(), "md5": resolved.md5, "dat_path": resolved.dat_path.display().to_string(), @@ -3593,10 +3595,6 @@ fn parse_attachment_kinds( Ok(out) } -fn attachment_id_str(id: &crate::attachment::AttachmentId) -> Result { - id.encode() -} - #[cfg(test)] mod biz_tests { use super::*; From b63589b368fc2d9c60018983231064fed026c8b1 Mon Sep 17 00:00:00 2001 From: jackwener Date: Thu, 14 May 2026 19:10:03 +0800 Subject: [PATCH 5/5] review: tighten attachment extraction scope --- README.md | 8 ++-- SKILL.md | 10 ++-- src/attachment/resolver.rs | 94 ++++++++++++++++++++++++++++++++++++-- src/cli/mod.rs | 6 +-- src/daemon/query.rs | 16 +++---- src/ipc.rs | 4 +- 6 files changed, 112 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 35589cd..29c8736 100644 --- a/README.md +++ b/README.md @@ -211,14 +211,14 @@ wx biz-articles --json | jq '.[].url' # 下游消费 URL 每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 -### 附件提取(图片 / 视频 / 文件 / 语音) +### 附件提取(图片) 聊天里的附件本体存在 `xwechat_files//msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。 ```bash -# 1) 列出会话里的附件,先拿到不透明的 attachment_id(默认 image,可多选) +# 1) 列出会话里的图片附件,先拿到不透明的 attachment_id wx attachments "张三" -wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 # 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等) @@ -226,7 +226,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。 +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/SKILL.md b/SKILL.md index ddf02e1..6b79e0d 100644 --- a/SKILL.md +++ b/SKILL.md @@ -242,14 +242,14 @@ wx biz-articles --since 2026-05-10 --json | jq '.[].url' 每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 -### 附件提取(图片 / 视频 / 文件 / 语音) +### 附件提取(图片) -聊天里的图片/视频/文件本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: +聊天里的图片本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: ```bash -# 1) 先列出附件,拿到不透明的 attachment_id(默认 image,可多选) +# 1) 先列出图片附件,拿到不透明的 attachment_id wx attachments "张三" -wx attachments "AI群" --kind image --kind video -n 100 +wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 # 2) 用 attachment_id 把单个资源解密写到指定路径 @@ -257,7 +257,7 @@ wx extract -o ~/Desktop/photo.jpg wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind`(image/voice/video/file)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。 +`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 `extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 diff --git a/src/attachment/resolver.rs b/src/attachment/resolver.rs index c32d84e..8db4f41 100644 --- a/src/attachment/resolver.rs +++ b/src/attachment/resolver.rs @@ -48,6 +48,7 @@ pub fn lookup_md5_blocking( resource_db_path: &Path, chat: &str, local_id: i64, + create_time: i64, msg_local_type_lo32: i64, ) -> Result> { let conn = Connection::open_with_flags( @@ -68,9 +69,25 @@ pub fn lookup_md5_blocking( return Ok(None); }; - // 2) MessageResourceInfo: 同 chat 内 local_id 也会复用,按 create_time DESC 取最新 + // 2) MessageResourceInfo: + // 同 chat 内 local_id 会复用,所以先用 create_time 精确命中; + // 若资源库里的时间戳跟 message_N.db 不完全对齐,再 fallback 到“同 local_id/type 取最新” // message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型 - let packed: Option> = conn + let packed_exact: Option> = conn + .query_row( + "SELECT packed_info FROM MessageResourceInfo + WHERE chat_id = ?1 + AND message_local_id = ?2 + AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3) + AND message_create_time = ?4 + ORDER BY rowid DESC + LIMIT 1", + rusqlite::params![chat_id, local_id, msg_local_type_lo32, create_time], + |row| row.get(0), + ) + .ok(); + + let packed: Option> = packed_exact.or_else(|| conn .query_row( "SELECT packed_info FROM MessageResourceInfo WHERE chat_id = ?1 @@ -81,7 +98,7 @@ pub fn lookup_md5_blocking( rusqlite::params![chat_id, local_id, msg_local_type_lo32], |row| row.get(0), ) - .ok(); + .ok()); let Some(blob) = packed else { return Ok(None); @@ -235,7 +252,13 @@ pub fn resolve_blocking( super::AttachmentKind::File => 49, }; - let meta = lookup_md5_blocking(resource_db_path, &id.chat, id.local_id, lo32_type)? + let meta = lookup_md5_blocking( + resource_db_path, + &id.chat, + id.local_id, + id.create_time, + lo32_type, + )? .ok_or_else(|| { anyhow!( "message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)", @@ -306,6 +329,69 @@ mod tests { assert!(extract_md5_from_packed_info(&blob).is_none()); } + #[test] + fn lookup_md5_prefers_exact_create_time_over_latest_reuse() { + let dir = tempdir_for_test(); + let db_path = dir.join("message_resource.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute( + "CREATE TABLE ChatName2Id (user_name TEXT)", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO ChatName2Id (rowid, user_name) VALUES (1, 'room@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "CREATE TABLE MessageResourceInfo ( + chat_id INTEGER, + message_local_id INTEGER, + message_local_type INTEGER, + message_create_time INTEGER, + packed_info BLOB + )", + [], + ) + .unwrap(); + + let old_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"11111111111111111111111111111111"); + blob + }; + let new_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"22222222222222222222222222222222"); + blob + }; + + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 1000i64, old_blob], + ) + .unwrap(); + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 2000i64, new_blob], + ) + .unwrap(); + + let old = lookup_md5_blocking(&db_path, "room@chatroom", 7, 1000, 3) + .unwrap() + .unwrap(); + let new = lookup_md5_blocking(&db_path, "room@chatroom", 7, 2000, 3) + .unwrap() + .unwrap(); + assert_eq!(old.md5, "11111111111111111111111111111111"); + assert_eq!(new.md5, "22222222222222222222222222222222"); + } + #[test] fn three_month_candidates_includes_prev_curr_next() { // 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09 diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 5fe4e8c..2ec2476 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -264,13 +264,13 @@ enum Commands { #[arg(long)] json: bool, }, - /// 列出某会话的附件(图片 / 视频 / 文件 / 语音),返回不透明 attachment_id + /// 列出某会话的图片附件,返回不透明 attachment_id Attachments { /// 会话名称(联系人显示名 / wxid / @chatroom username 都可以) chat: String, - /// 类型(多选,默认 image)。可选:image / voice / video / file + /// 类型(当前仅支持 image) #[arg(long = "kind", value_name = "KIND", - value_parser = ["image", "voice", "video", "file", "audio", "img"])] + value_parser = ["image", "img"])] kinds: Vec, /// 显示数量 #[arg(short = 'n', long, default_value = "50")] diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 6bd46b2..634ff2d 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -3285,7 +3285,7 @@ pub async fn q_biz_articles( Ok(json!({ "count": results.len(), "articles": results })) } -// ─── 附件(图片 / 视频 / 文件 / 语音)查询与提取 ───────────────────────────────── +// ─── 附件(当前先支持图片)查询与提取 ───────────────────────────────── // // 设计要点: // - `q_attachments` 只走 `Msg_` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤 @@ -3296,7 +3296,7 @@ pub async fn q_biz_articles( // - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。 // 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。 -/// 列出某会话内的附件消息(默认 image,可多选)。返回每条的 `attachment_id`, +/// 列出某会话内的附件消息(当前仅 image)。返回每条的 `attachment_id`, /// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。 pub async fn q_attachments( db: &DbCache, @@ -3319,7 +3319,7 @@ pub async fn q_attachments( // 解析 kinds → 低 32 bit local_type 集合 let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?; if kind_filters.is_empty() { - anyhow::bail!("kinds 为空 — 至少传一种 image/video/file/voice"); + anyhow::bail!("kinds 为空 — 当前至少传一种 image"); } let lo32_types: Vec = kind_filters.iter().map(|(_, t)| *t).collect(); // local_type → AttachmentKind 反查(mask 完后定 kind) @@ -3569,7 +3569,7 @@ pub async fn q_extract( } /// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。 -/// 缺省(None / 空)按 image 处理。 +/// 当前只支持 image;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 fn parse_attachment_kinds( kinds: Option<&[String]>, ) -> Result> { @@ -3583,10 +3583,10 @@ fn parse_attachment_kinds( for k in raw { let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { "image" | "img" => (AttachmentKind::Image, 3), - "voice" | "audio" => (AttachmentKind::Voice, 34), - "video" => (AttachmentKind::Video, 43), - "file" => (AttachmentKind::File, 49), - other => anyhow::bail!("未知附件类型:{}(支持 image/voice/video/file)", other), + "voice" | "audio" | "video" | "file" => { + anyhow::bail!("当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通") + } + other => anyhow::bail!("未知附件类型:{}(当前仅支持 image)", other), }; if seen.insert(kind.as_str()) { out.push((kind, t)); diff --git a/src/ipc.rs b/src/ipc.rs index 78d6278..a4615eb 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -131,11 +131,11 @@ pub enum Request { }, /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) ReloadConfig, - /// 列出某个会话里的附件(图片 / 视频 / 文件 / 语音) + /// 列出某个会话里的图片附件 /// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体 Attachments { chat: String, - /// 类型过滤:image / video / file / voice,多选;缺省返回 image + /// 类型过滤:当前仅支持 image #[serde(default, skip_serializing_if = "Option::is_none")] kinds: Option>, #[serde(default = "default_limit_50")]