mirror of https://github.com/jackwener/wx-cli.git
feat(attachment): scaffold module + V1 decoders + resource resolver
Lays down the skeleton for聊天附件 (chat attachment) extraction. This commit
introduces the `attachment` module with:
- `attachment_id`: opaque base64url(json) round-trip handle for CLI/IPC. Carries
`(chat, local_id, create_time, kind)` — `local_id` alone is not unique
(实测同 chat 内最多 7 条同 local_id 的记录), so create_time is required for
disambiguation.
- `decoder/`: dispatch by 6B header magic. Three branches:
- `V2_MAGIC` → AES-128-ECB + raw + XOR (need image AES key)
- `V1_MAGIC` → AES-128-ECB with fixed key `cfcd208495d565ef` (= md5("0")[:16])
- else → legacy single-byte XOR with magic auto-detect
Manual ECB + PKCS7 unpad to avoid pulling in another crate.
- `resolver`: `message_resource.db` lookup chain
`username → ChatName2Id.rowid → MessageResourceInfo.packed_info → md5`
+ on-disk `.dat` selection (full > _h > _t) under
`<wxchat_base>/msg/attach/<md5(chat)>/<YYYY-MM>/Img/<md5>[_t|_h].dat`.
Honors `message_local_type % 2^32` to strip the high flag bits, and orders by
`message_create_time DESC` to handle local_id reuse.
- `image_key/`: stub trait + macOS / Windows placeholders. To be filled by
codex with the V2 image key extraction (kvcomm + brute-force on macOS, memory
scan on Windows).
V1 decoder ships with 6 unit tests covering every supported magic + the BMP
extra validation; resolver ships with packed_info parser + dat-file selection
tests; v2 decoder ships with header validation tests. 21 tests pass.
`cargo check` and `cargo check --target x86_64-pc-windows-gnu` both clean.
pull/57/head
parent
5c001b18be
commit
14fdfde1d3
|
|
@ -105,6 +105,12 @@ version = "1.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.22.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.11.1"
|
||||
|
|
@ -1311,6 +1317,7 @@ version = "0.1.11"
|
|||
dependencies = [
|
||||
"aes",
|
||||
"anyhow",
|
||||
"base64",
|
||||
"cbc",
|
||||
"chrono",
|
||||
"clap",
|
||||
|
|
|
|||
|
|
@ -50,6 +50,9 @@ dirs = "5"
|
|||
# MD5 (联系人表名 Msg_<md5>)
|
||||
md5 = "0.7"
|
||||
|
||||
# 附件 ID 编码(base64url)
|
||||
base64 = "0.22"
|
||||
|
||||
# 正则表达式
|
||||
regex = "1"
|
||||
roxmltree = "0.20"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,153 @@
|
|||
//! 不透明附件 ID — 跨 CLI / IPC 的圆 trip 句柄。
|
||||
//!
|
||||
//! 编码:`base64url_no_pad(serde_json(payload))`。
|
||||
//! 选择 base64url(json) 而不是紧凑 bit-pack:
|
||||
//! - phase 1 求稳,不发明二进制协议
|
||||
//! - 后面加字段(`resource_md5` / `decoder_hint` 之类)老 CLI 不 break
|
||||
//! - debug 直接 base64 -d | jq 看字段
|
||||
//!
|
||||
//! ⚠️ `local_id` 在同一 chat 内会被 WeChat 复用(实测同 chat 最多 7 条同 local_id),
|
||||
//! 所以 `(chat, local_id, create_time)` 三元组才是定位资源行的最小集。
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum AttachmentKind {
|
||||
Image,
|
||||
Video,
|
||||
File,
|
||||
Voice,
|
||||
}
|
||||
|
||||
impl AttachmentKind {
|
||||
/// 从 message.local_type 推 attachment kind(只覆盖 phase 1 关心的几种)。
|
||||
/// 高 32 bit 是版本/会话 flag,要先 mask 到低 32 bit。
|
||||
pub fn from_local_type(local_type: i64) -> Option<Self> {
|
||||
let lo = (local_type as u64) & 0xFFFF_FFFF;
|
||||
match lo {
|
||||
3 => Some(AttachmentKind::Image),
|
||||
34 => Some(AttachmentKind::Voice),
|
||||
43 => Some(AttachmentKind::Video),
|
||||
// type=49 是 appmsg,里面 subtype=6 才是文件;这里偏宽松返回 File,
|
||||
// 由 resolver 进一步根据 appmsg subtype 决定是否真的能 extract
|
||||
49 => Some(AttachmentKind::File),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
AttachmentKind::Image => "image",
|
||||
AttachmentKind::Video => "video",
|
||||
AttachmentKind::File => "file",
|
||||
AttachmentKind::Voice => "voice",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 附件 ID payload(序列化后 base64url 编码)。
|
||||
///
|
||||
/// `v` 是版本字段,将来 schema 变了可以走分支兼容。当前 v=1。
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AttachmentId {
|
||||
/// payload schema version
|
||||
pub v: u32,
|
||||
/// 会话 username(同时用于 ChatName2Id 查 chat_id 和拼 attach 路径)
|
||||
pub chat: String,
|
||||
/// 消息行的 local_id
|
||||
pub local_id: i64,
|
||||
/// 消息行的 create_time(unix 秒)— 用于 disambiguate 同 chat 内 local_id 复用
|
||||
pub create_time: i64,
|
||||
/// 附件类别
|
||||
pub kind: AttachmentKind,
|
||||
/// 可选 hint:消息所在 message_N.db 的 N。给定时 resolver 可跳过 shard 扫描;
|
||||
/// 缺省时 resolver 会按 `find_msg_tables` 逻辑全量扫
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub db: Option<u8>,
|
||||
}
|
||||
|
||||
impl AttachmentId {
|
||||
pub fn encode(&self) -> Result<String> {
|
||||
let json = serde_json::to_vec(self).context("序列化 AttachmentId")?;
|
||||
Ok(URL_SAFE_NO_PAD.encode(json))
|
||||
}
|
||||
|
||||
pub fn decode(s: &str) -> Result<Self> {
|
||||
let bytes = URL_SAFE_NO_PAD
|
||||
.decode(s.trim())
|
||||
.map_err(|e| anyhow!("attachment_id 不是合法 base64url: {}", e))?;
|
||||
let id: AttachmentId =
|
||||
serde_json::from_slice(&bytes).context("attachment_id payload 非合法 JSON")?;
|
||||
if id.v != 1 {
|
||||
return Err(anyhow!("不支持的 attachment_id 版本 v={}", id.v));
|
||||
}
|
||||
Ok(id)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn round_trip_minimal() {
|
||||
let id = AttachmentId {
|
||||
v: 1,
|
||||
chat: "wxid_abc".to_string(),
|
||||
local_id: 12345,
|
||||
create_time: 1_715_678_901,
|
||||
kind: AttachmentKind::Image,
|
||||
db: None,
|
||||
};
|
||||
let s = id.encode().unwrap();
|
||||
let back = AttachmentId::decode(&s).unwrap();
|
||||
assert_eq!(back.chat, id.chat);
|
||||
assert_eq!(back.local_id, id.local_id);
|
||||
assert_eq!(back.create_time, id.create_time);
|
||||
assert_eq!(back.kind, id.kind);
|
||||
assert_eq!(back.db, id.db);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_trip_with_db_hint() {
|
||||
let id = AttachmentId {
|
||||
v: 1,
|
||||
chat: "1234@chatroom".to_string(),
|
||||
local_id: 42,
|
||||
create_time: 1,
|
||||
kind: AttachmentKind::Image,
|
||||
db: Some(2),
|
||||
};
|
||||
let s = id.encode().unwrap();
|
||||
assert!(!s.contains('=')); // base64url no-pad
|
||||
let back = AttachmentId::decode(&s).unwrap();
|
||||
assert_eq!(back.db, Some(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_type_mask_high_bits() {
|
||||
// monitor_web.py 里 image push 路径:高位带 flag,低 32 bit 是 3
|
||||
let high_flag = (0xDEAD_BEEFu64 << 32) as i64 | 3;
|
||||
assert_eq!(
|
||||
AttachmentKind::from_local_type(high_flag),
|
||||
Some(AttachmentKind::Image)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_unknown_version() {
|
||||
let id = AttachmentId {
|
||||
v: 99,
|
||||
chat: "x".to_string(),
|
||||
local_id: 0,
|
||||
create_time: 0,
|
||||
kind: AttachmentKind::Image,
|
||||
db: None,
|
||||
};
|
||||
let s = id.encode().unwrap();
|
||||
assert!(AttachmentId::decode(&s).is_err());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
//! `.dat` 文件解码:根据 6B header magic 分发到具体 decoder。
|
||||
//!
|
||||
//! 三档:
|
||||
//! | header[0..6] | decoder | 备注 |
|
||||
//! |-------------------------|-------------------|-----------------------------------------|
|
||||
//! | `07 08 V2 08 07` | `v2` | AES-128-ECB + XOR 混合,需要 image AES key |
|
||||
//! | `07 08 V1 08 07` | `v1_aes` | 固定 AES key `cfcd208495d565ef` |
|
||||
//! | (其他, 通常无 magic) | `v1_xor` | legacy single-byte XOR,magic 自动探测 |
|
||||
//!
|
||||
//! 决策点放在 `dispatch`,让上层(`resolver` / CLI extract 命令)只跟一个入口打交道。
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
pub mod v1_xor;
|
||||
pub mod v2;
|
||||
|
||||
/// 完整 V2 magic:`\x07\x08V2\x08\x07`
|
||||
pub const V2_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'2', 0x08, 0x07];
|
||||
/// 完整 V1 magic:`\x07\x08V1\x08\x07`
|
||||
pub const V1_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'1', 0x08, 0x07];
|
||||
|
||||
/// 解码后的产物 + 探测出的图片格式
|
||||
#[derive(Debug)]
|
||||
pub struct DecodedImage {
|
||||
pub data: Vec<u8>,
|
||||
/// 推断出的图片扩展名(不带点),由 magic 决定。例如 "jpg" / "png" / "gif" / "webp" /
|
||||
/// "tif" / "bmp" / "hevc"(wxgf 容器)/ "bin"(未识别)
|
||||
pub format: &'static str,
|
||||
/// 解码器名称("legacy_xor" / "v1_aes" / "v2"),用于 CLI 调试输出
|
||||
pub decoder: &'static str,
|
||||
}
|
||||
|
||||
/// 由 caller 提供的 V2 image AES key(codex 的 `image_key` 模块负责拿到)。
|
||||
/// 缺省时遇到 V2 文件会返回 `Err`,caller 可以拿到具体错误信息再处理。
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct V2KeyMaterial<'a> {
|
||||
pub aes_key: Option<&'a [u8; 16]>,
|
||||
/// XOR key — WeChat 4.x 默认 0x88,可 override
|
||||
pub xor_key: u8,
|
||||
}
|
||||
|
||||
impl<'a> V2KeyMaterial<'a> {
|
||||
pub fn with_aes(key: &'a [u8; 16]) -> Self {
|
||||
Self { aes_key: Some(key), xor_key: 0x88 }
|
||||
}
|
||||
}
|
||||
|
||||
/// 根据 `dat_bytes` 头部 magic 自动分发到对应 decoder。
|
||||
///
|
||||
/// `v2_key` 仅在文件是 V2 magic 时被消费。
|
||||
pub fn dispatch(dat_bytes: &[u8], v2_key: V2KeyMaterial<'_>) -> Result<DecodedImage> {
|
||||
if dat_bytes.len() >= 6 {
|
||||
let head: &[u8; 6] = dat_bytes[..6].try_into().unwrap();
|
||||
if head == &V2_MAGIC {
|
||||
return v2::decode(dat_bytes, v2_key);
|
||||
}
|
||||
if head == &V1_MAGIC {
|
||||
// V1 fixed-AES: 固定 key = md5("0")[:16] = "cfcd208495d565ef"
|
||||
let fixed_key: [u8; 16] = *b"cfcd208495d565ef";
|
||||
return v2::decode(
|
||||
dat_bytes,
|
||||
V2KeyMaterial { aes_key: Some(&fixed_key), xor_key: v2_key.xor_key },
|
||||
)
|
||||
.map(|mut d| {
|
||||
d.decoder = "v1_aes";
|
||||
d
|
||||
});
|
||||
}
|
||||
}
|
||||
if dat_bytes.is_empty() {
|
||||
return Err(anyhow!("空 .dat 文件"));
|
||||
}
|
||||
v1_xor::decode(dat_bytes)
|
||||
}
|
||||
|
||||
/// 从解密后的字节流头部探测图片格式扩展名。
|
||||
///
|
||||
/// 与上游 `decode_image.py::detect_image_format` 一致;新增 wxgf (HEVC 裸流) 的探测,
|
||||
/// 因为 V2 解码后产物可能直接是 wxgf 容器。
|
||||
pub fn detect_image_format(bytes: &[u8]) -> &'static str {
|
||||
if bytes.len() >= 4 && &bytes[..4] == b"wxgf" {
|
||||
return "hevc";
|
||||
}
|
||||
if bytes.len() >= 3 && bytes[..3] == [0xFF, 0xD8, 0xFF] {
|
||||
return "jpg";
|
||||
}
|
||||
if bytes.len() >= 4 && bytes[..4] == [0x89, 0x50, 0x4E, 0x47] {
|
||||
return "png";
|
||||
}
|
||||
if bytes.len() >= 3 && &bytes[..3] == b"GIF" {
|
||||
return "gif";
|
||||
}
|
||||
if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WEBP" {
|
||||
return "webp";
|
||||
}
|
||||
if bytes.len() >= 4 && bytes[..4] == [0x49, 0x49, 0x2A, 0x00] {
|
||||
return "tif";
|
||||
}
|
||||
if bytes.len() >= 2 && &bytes[..2] == b"BM" {
|
||||
return "bmp";
|
||||
}
|
||||
"bin"
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn detect_basic_formats() {
|
||||
assert_eq!(detect_image_format(&[0xFF, 0xD8, 0xFF, 0xE0]), "jpg");
|
||||
assert_eq!(detect_image_format(&[0x89, 0x50, 0x4E, 0x47]), "png");
|
||||
assert_eq!(detect_image_format(b"GIF89a"), "gif");
|
||||
assert_eq!(detect_image_format(b"BM\0\0\0\0\0\0\0\0\0\0\0\0"), "bmp");
|
||||
let mut webp = b"RIFF\0\0\0\0WEBP".to_vec();
|
||||
webp.extend_from_slice(&[0; 4]);
|
||||
assert_eq!(detect_image_format(&webp), "webp");
|
||||
assert_eq!(detect_image_format(&[0x49, 0x49, 0x2A, 0x00]), "tif");
|
||||
assert_eq!(detect_image_format(b"wxgfXXXX"), "hevc");
|
||||
assert_eq!(detect_image_format(&[0, 0, 0, 0]), "bin");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
//! Legacy single-byte XOR decoder(无 magic 头的旧 .dat)
|
||||
//!
|
||||
//! 算法:用已知图片 magic 反推 XOR key —— `key = file[0] ^ magic[0]`。
|
||||
//! 然后用同一个 key 校验 `file[i] ^ key == magic[i]`,全部命中才接受这个 key。
|
||||
//!
|
||||
//! 优先级(按 magic 长度降序,避免短 magic 假阳性):
|
||||
//! PNG (4) > GIF (4) > TIF (4) > WEBP (4, RIFF) > JPG (3) > BMP (2, 需额外校验)
|
||||
//!
|
||||
//! BMP 只有 2 字节 magic,假阳性高;额外用 BMP file header 里的
|
||||
//! `bf_size`(offset 2, u32 LE)和 `bf_offset`(offset 10, u32 LE)做合理性校验:
|
||||
//! - `|bf_size - file_size| < 1024`(允许微小 padding 差)
|
||||
//! - `14 <= bf_offset <= 1078`(最大调色板 256*4 + header 14 = 1038,留点余量)
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
use super::{detect_image_format, DecodedImage};
|
||||
|
||||
const PNG: &[u8] = &[0x89, 0x50, 0x4E, 0x47];
|
||||
const GIF: &[u8] = &[0x47, 0x49, 0x46, 0x38];
|
||||
const TIF: &[u8] = &[0x49, 0x49, 0x2A, 0x00];
|
||||
const WEBP_RIFF: &[u8] = &[0x52, 0x49, 0x46, 0x46];
|
||||
const JPG: &[u8] = &[0xFF, 0xD8, 0xFF];
|
||||
const BMP: &[u8] = &[0x42, 0x4D];
|
||||
|
||||
/// 在 `header` 上尝试一个固定 magic:返回 `Some(key)` 当且仅当所有字节都对得上。
|
||||
fn try_magic(header: &[u8], magic: &[u8]) -> Option<u8> {
|
||||
if header.len() < magic.len() {
|
||||
return None;
|
||||
}
|
||||
let key = header[0] ^ magic[0];
|
||||
for i in 1..magic.len() {
|
||||
if header[i] ^ key != magic[i] {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(key)
|
||||
}
|
||||
|
||||
/// 探测 XOR key。失败返回 `None`(caller 决定是不是错)。
|
||||
pub fn detect_key(file_bytes: &[u8]) -> Option<u8> {
|
||||
if file_bytes.len() < 4 {
|
||||
return None;
|
||||
}
|
||||
let header = &file_bytes[..file_bytes.len().min(16)];
|
||||
|
||||
// 先试 3+ 字节 magic
|
||||
for magic in [PNG, GIF, TIF, WEBP_RIFF, JPG] {
|
||||
if let Some(k) = try_magic(header, magic) {
|
||||
return Some(k);
|
||||
}
|
||||
}
|
||||
|
||||
// 最后试 BMP(只有 2B magic,需额外校验)
|
||||
if let Some(k) = try_magic(header, BMP) {
|
||||
if header.len() >= 14 {
|
||||
// 解 BMP file header 14 字节
|
||||
let mut dec = [0u8; 14];
|
||||
for i in 0..14 {
|
||||
dec[i] = header[i] ^ k;
|
||||
}
|
||||
let bmp_size = u32::from_le_bytes([dec[2], dec[3], dec[4], dec[5]]);
|
||||
let bmp_offset = u32::from_le_bytes([dec[10], dec[11], dec[12], dec[13]]);
|
||||
let file_size = file_bytes.len() as u32;
|
||||
// 允许 1024 字节 padding 差;offset 在合理范围
|
||||
if file_size.abs_diff(bmp_size) < 1024 && (14..=1078).contains(&bmp_offset) {
|
||||
return Some(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// XOR 解码整个 `.dat` 内容。
|
||||
pub fn decode(file_bytes: &[u8]) -> Result<DecodedImage> {
|
||||
let key =
|
||||
detect_key(file_bytes).ok_or_else(|| anyhow!("legacy XOR: 无法识别图片 magic(key 探测失败)"))?;
|
||||
let data: Vec<u8> = file_bytes.iter().map(|b| b ^ key).collect();
|
||||
let format = detect_image_format(&data);
|
||||
if format == "bin" {
|
||||
return Err(anyhow!("legacy XOR: 解出 key=0x{:02x} 但产物 magic 不识别", key));
|
||||
}
|
||||
Ok(DecodedImage { data, format, decoder: "legacy_xor" })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// 把一段 plaintext 用单字节 key XOR 加密,模拟 .dat 文件
|
||||
fn xor_encrypt(plain: &[u8], key: u8) -> Vec<u8> {
|
||||
plain.iter().map(|b| b ^ key).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_jpg_key() {
|
||||
let plain = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46];
|
||||
let enc = xor_encrypt(&plain, 0x3C);
|
||||
assert_eq!(detect_key(&enc), Some(0x3C));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_png_key() {
|
||||
let mut plain = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
|
||||
plain.extend_from_slice(&[0; 16]);
|
||||
let enc = xor_encrypt(&plain, 0xA5);
|
||||
assert_eq!(detect_key(&enc), Some(0xA5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_gif_key() {
|
||||
let mut plain = b"GIF89a".to_vec();
|
||||
plain.extend_from_slice(&[0; 16]);
|
||||
let enc = xor_encrypt(&plain, 0x77);
|
||||
assert_eq!(detect_key(&enc), Some(0x77));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_webp_riff_key() {
|
||||
let mut plain = b"RIFF\x00\x00\x00\x00WEBP".to_vec();
|
||||
plain.extend_from_slice(&[0; 8]);
|
||||
let enc = xor_encrypt(&plain, 0x12);
|
||||
assert_eq!(detect_key(&enc), Some(0x12));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_tif_key() {
|
||||
let mut plain = vec![0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00];
|
||||
plain.extend_from_slice(&[0; 16]);
|
||||
let enc = xor_encrypt(&plain, 0xC3);
|
||||
assert_eq!(detect_key(&enc), Some(0xC3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_bmp_with_valid_header() {
|
||||
// BMP 14B header: 'BM' + size(u32 LE) + reserved(2*u16) + offset(u32 LE)
|
||||
let mut plain = Vec::new();
|
||||
plain.extend_from_slice(b"BM");
|
||||
plain.extend_from_slice(&100u32.to_le_bytes()); // file_size = 100
|
||||
plain.extend_from_slice(&[0; 4]); // reserved
|
||||
plain.extend_from_slice(&54u32.to_le_bytes()); // pixel data offset = 54
|
||||
plain.resize(100, 0); // 整个文件 100 字节,匹配 file_size
|
||||
let enc = xor_encrypt(&plain, 0x55);
|
||||
assert_eq!(detect_key(&enc), Some(0x55));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reject_random_bytes() {
|
||||
// 全 0 文件:BMP 检测会算出 key = 0x42 ^ 0 = 0x42,
|
||||
// 但解密出的 BMP file_size = 0 vs file_size = 100,差距 > 1024 →
|
||||
// 应该 reject
|
||||
let bytes = vec![0u8; 100];
|
||||
assert_eq!(detect_key(&bytes), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_round_trip_jpg() {
|
||||
let mut plain = vec![0xFF, 0xD8, 0xFF, 0xE0];
|
||||
plain.extend_from_slice(b"JFIF padding here");
|
||||
let enc = xor_encrypt(&plain, 0xAB);
|
||||
let out = decode(&enc).unwrap();
|
||||
assert_eq!(out.format, "jpg");
|
||||
assert_eq!(out.decoder, "legacy_xor");
|
||||
assert_eq!(out.data, plain);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
//! V2 .dat 解码:`AES-128-ECB(PKCS7) + raw + XOR` 三段拼接。
|
||||
//!
|
||||
//! 文件结构(来自上游 `decode_image.py::v2_decrypt_file`):
|
||||
//! `[6B magic V2/V1] [4B aes_size LE] [4B xor_size LE] [1B padding]`
|
||||
//! `[aligned_aes_size bytes AES-ECB ciphertext]`
|
||||
//! `[len - aligned_aes_size - xor_size bytes raw_data (不加密)]`
|
||||
//! `[xor_size bytes XOR (单字节 key)]`
|
||||
//!
|
||||
//! `aligned_aes_size`:把 `aes_size` 向上对齐到 16 的倍数;当 `aes_size` 本身是
|
||||
//! 16 的倍数时,PKCS7 还会再加一整块 padding,所以再 +16。等价于
|
||||
//! `aes_size + (16 - aes_size % 16)`。
|
||||
//!
|
||||
//! ⚠️ 此模块由 codex 落地完整 V2 实现 + image key 模块。当前只提供一个
|
||||
//! `decode` 入口骨架,方便 v1_aes 路径(固定 key)和 dispatch 一起编译过。
|
||||
//! `aes_key=None` 时返回带具体诊断信息的错误。
|
||||
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
|
||||
use super::{detect_image_format, DecodedImage, V2KeyMaterial, V1_MAGIC, V2_MAGIC};
|
||||
|
||||
const HEADER_SIZE: usize = 15;
|
||||
|
||||
pub fn decode(file_bytes: &[u8], key: V2KeyMaterial<'_>) -> Result<DecodedImage> {
|
||||
if file_bytes.len() < HEADER_SIZE {
|
||||
bail!("V2 .dat: 文件过短({} < {} 字节)", file_bytes.len(), HEADER_SIZE);
|
||||
}
|
||||
let magic: &[u8; 6] = file_bytes[..6].try_into().unwrap();
|
||||
if magic != &V2_MAGIC && magic != &V1_MAGIC {
|
||||
bail!("V2 .dat: header magic 不匹配 V1/V2");
|
||||
}
|
||||
|
||||
let aes_key = key.aes_key.ok_or_else(|| {
|
||||
anyhow!("V2 .dat: 需要 image AES key(codex 的 image_key 模块尚未填充)")
|
||||
})?;
|
||||
|
||||
let aes_size = u32::from_le_bytes(file_bytes[6..10].try_into().unwrap()) as usize;
|
||||
let xor_size = u32::from_le_bytes(file_bytes[10..14].try_into().unwrap()) as usize;
|
||||
|
||||
// PKCS7 对齐:aes_size 不是 16 的倍数 → 向上对齐;是 16 的倍数 → 再加一整块
|
||||
let aligned_aes_size = aes_size + (16 - (aes_size % 16));
|
||||
|
||||
let aes_end = HEADER_SIZE.checked_add(aligned_aes_size).ok_or_else(|| anyhow!("aes 段长度溢出"))?;
|
||||
if aes_end > file_bytes.len() {
|
||||
bail!(
|
||||
"V2 .dat: 头部宣称 aes_size={} (aligned={}) 超过文件长度 {}",
|
||||
aes_size,
|
||||
aligned_aes_size,
|
||||
file_bytes.len()
|
||||
);
|
||||
}
|
||||
let raw_end = file_bytes.len().checked_sub(xor_size).ok_or_else(|| {
|
||||
anyhow!("V2 .dat: 头部宣称 xor_size={} 超过文件长度 {}", xor_size, file_bytes.len())
|
||||
})?;
|
||||
if aes_end > raw_end {
|
||||
bail!(
|
||||
"V2 .dat: aes_end={} > raw_end={}(aes/xor 段重叠)",
|
||||
aes_end,
|
||||
raw_end
|
||||
);
|
||||
}
|
||||
|
||||
// === AES-128-ECB 解密 + PKCS7 unpad ===
|
||||
let aes_data = &file_bytes[HEADER_SIZE..aes_end];
|
||||
let dec_aes = aes_ecb_decrypt_pkcs7(aes_key, aes_data)?;
|
||||
|
||||
// === Raw 段(未加密) ===
|
||||
let raw_data = &file_bytes[aes_end..raw_end];
|
||||
|
||||
// === XOR 段 ===
|
||||
let xor_data: Vec<u8> = file_bytes[raw_end..].iter().map(|b| b ^ key.xor_key).collect();
|
||||
|
||||
let mut out = Vec::with_capacity(dec_aes.len() + raw_data.len() + xor_data.len());
|
||||
out.extend_from_slice(&dec_aes);
|
||||
out.extend_from_slice(raw_data);
|
||||
out.extend_from_slice(&xor_data);
|
||||
|
||||
let format = detect_image_format(&out);
|
||||
if format == "bin" {
|
||||
bail!("V2 .dat: AES 解密成功但产物 magic 不识别(key 可能错)");
|
||||
}
|
||||
Ok(DecodedImage { data: out, format, decoder: "v2" })
|
||||
}
|
||||
|
||||
/// AES-128-ECB 解密 + PKCS7 unpad。失败时返回 `Err`,不返回半结果。
|
||||
///
|
||||
/// 不引第三方 ECB 包;ECB 本身就是 block-by-block,手工跑就行。
|
||||
/// PKCS7 padding 由本函数最后一段做 strict 校验:长度 1..=16,且尾部全是同值字节。
|
||||
fn aes_ecb_decrypt_pkcs7(key: &[u8; 16], cipher: &[u8]) -> Result<Vec<u8>> {
|
||||
use aes::cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit};
|
||||
if cipher.is_empty() || cipher.len() % 16 != 0 {
|
||||
bail!("AES 输入长度 {} 不是 16 的倍数", cipher.len());
|
||||
}
|
||||
let aes = aes::Aes128::new(key.into());
|
||||
let mut out = Vec::with_capacity(cipher.len());
|
||||
for chunk in cipher.chunks_exact(16) {
|
||||
let mut block = GenericArray::clone_from_slice(chunk);
|
||||
aes.decrypt_block(&mut block);
|
||||
out.extend_from_slice(&block);
|
||||
}
|
||||
let pad = *out.last().ok_or_else(|| anyhow!("AES PKCS7: 空输出"))? as usize;
|
||||
if pad == 0 || pad > 16 || pad > out.len() {
|
||||
bail!("AES PKCS7: 非法 padding 长度 {}", pad);
|
||||
}
|
||||
let tail = &out[out.len() - pad..];
|
||||
if !tail.iter().all(|&b| b as usize == pad) {
|
||||
bail!("AES PKCS7: padding 字节不一致");
|
||||
}
|
||||
out.truncate(out.len() - pad);
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn rejects_short_file() {
|
||||
let r = decode(&[0u8; 4], V2KeyMaterial::default());
|
||||
assert!(r.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_v2_without_key() {
|
||||
let mut buf = V2_MAGIC.to_vec();
|
||||
buf.extend_from_slice(&[0u8; HEADER_SIZE - 6]);
|
||||
let r = decode(&buf, V2KeyMaterial::default());
|
||||
let err = r.unwrap_err().to_string();
|
||||
assert!(err.contains("AES key"), "{}", err);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
//! macOS V2 image AES key 提取。
|
||||
//!
|
||||
//! 主路径:从 `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/key_<uin>_*.statistic`
|
||||
//! 文件名拿 uin,然后 `md5(str(uin) + sanitize(wxid)).hex()[:16]` 派生 AES key。
|
||||
//!
|
||||
//! Fallback:枚举 uin 候选 2^24 个(`uint32`,但 wxid 4-byte 前缀只看后 24 bit),
|
||||
//! 通过 `md5(str(uin))[:4] == wxid 后 4 字节` 匹配。
|
||||
//! 上游 `find_image_key_macos.py` 实测 1-2s 完成。
|
||||
//!
|
||||
//! ⚠️ codex 落实现。
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
//! V2 image AES key 提取 — 平台相关。
|
||||
//!
|
||||
//! ⚠️ 此模块由 codex 落地。本文件只放公共 trait + 平台 dispatch 占位。
|
||||
//!
|
||||
//! 路径:
|
||||
//! - macOS:磁盘派生(`key_<uin>_*.statistic` 文件名拿 uin → `md5(str(uin) + wxid)[:16]`)
|
||||
//! + brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24)
|
||||
//! - Windows:扫 `Weixin.exe` 内存,匹配 `[a-zA-Z0-9]{32}` 候选,按已知 AES ciphertext-block
|
||||
//! 反验(`find_image_key.py` / `find_image_key.c` 已写实)
|
||||
//! - Linux:上游空白;当前不实现,遇到 V2 .dat 返回 unsupported 错误
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub mod macos;
|
||||
#[allow(dead_code)]
|
||||
pub mod windows;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
/// 单个 wxid 的 V2 image key 提取接口。
|
||||
///
|
||||
/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时是稳定的)。
|
||||
pub trait ImageKeyProvider {
|
||||
/// 返回当前 wxid 的 16 字节 AES key。失败要带可执行的诊断(例如「macOS 没找到
|
||||
/// kvcomm cache,请确认微信已登录」/「Windows 进程不在跑」)。
|
||||
fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]>;
|
||||
}
|
||||
|
||||
/// 平台默认实现(codex 后续填)。
|
||||
///
|
||||
/// 调用方目前可以直接传 `None`,让 resolver 在遇到 V2 .dat 时报「image key 未提取」错。
|
||||
pub fn default_provider() -> Option<Box<dyn ImageKeyProvider + Send + Sync>> {
|
||||
// TODO(codex): 按 cfg(target_os) 返回 macOS / Windows / 不支持
|
||||
None
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
//! Windows V2 image AES key 提取。
|
||||
//!
|
||||
//! 扫 `Weixin.exe` 进程内存,匹配模式 `(?<![a-zA-Z0-9])[a-zA-Z0-9]{32}(?![a-zA-Z0-9])`
|
||||
//! 取候选 key,然后用已知 AES ciphertext-block 反验:每个 candidate 用 AES-128-ECB
|
||||
//! 解一段已知 ciphertext,看产物是否落在合理图片 magic 上。
|
||||
//!
|
||||
//! 上游 `find_image_key.py` / `find_image_key.c` 已经把 signature scan + false-positive
|
||||
//! 控制写实,可以直接对照。
|
||||
//!
|
||||
//! ⚠️ codex 落实现。
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
//! 聊天附件提取链路(图片 / 视频 / 语音 / 文件本体的本地解码)
|
||||
//!
|
||||
//! 整条链:
|
||||
//! message_N.db (Msg_<md5>) → message_resource.db (ChatName2Id + MessageResourceInfo)
|
||||
//! → packed_info protobuf md5 提取 → xwechat_files/<wxid>/msg/attach/.../Img/<md5>[_t|_h].dat
|
||||
//! → magic 分发 (legacy XOR / V1 fixed-AES / V2 AES+XOR) → 写出实际图片
|
||||
//!
|
||||
//! 模块切分:
|
||||
//! - `attachment_id`:跨 IPC / CLI 的不透明 ID(base64url(json))
|
||||
//! - `resolver`:从 `attachment_id` 反查 message_resource.db,定位本地 .dat
|
||||
//! - `decoder`:根据文件 magic 分发到具体解码器(V1 / V2 等)
|
||||
//! - `image_key`:V2 image AES key 提取(macOS / Windows)
|
||||
//!
|
||||
//! V2 / image_key 模块由 codex 落地,先放空 stub 以便 V1 / resolver / CLI 不被 block。
|
||||
|
||||
// 此模块由分多个 PR/commit 增量启用:
|
||||
// 1) 先落 attachment_id / decoder / resolver / image_key 骨架(本 commit)
|
||||
// 2) IPC + CLI + daemon route 把它们串起来(后续 commit)
|
||||
// 3) image_key 平台实现(codex 后续 commit)
|
||||
// 在 step 1 完成、step 2 未到时,大量公开 API 仍未被引用,#[allow(dead_code)] 抑制噪音
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub mod attachment_id;
|
||||
pub mod decoder;
|
||||
pub mod resolver;
|
||||
pub mod image_key;
|
||||
|
||||
pub use attachment_id::{AttachmentId, AttachmentKind};
|
||||
|
|
@ -0,0 +1,353 @@
|
|||
//! 把 `AttachmentId` 翻译成本地 `.dat` 路径。
|
||||
//!
|
||||
//! 流程:
|
||||
//! 1. `chat` username → `ChatName2Id.rowid`(资源库)
|
||||
//! 2. `(chat_id, local_id)` + `ORDER BY message_create_time DESC LIMIT 1` →
|
||||
//! `MessageResourceInfo.packed_info`
|
||||
//! 3. 从 `packed_info` (protobuf) 提取 32 字节 ASCII hex MD5
|
||||
//! 4. 在 `<wxchat_base>/msg/attach/<md5(chat)>/<YYYY-MM>/Img/<md5>[_t|_h].dat`
|
||||
//! 下找对应文件,按 full > _h > _t 优先级选一个
|
||||
//!
|
||||
//! `<wxchat_base>` 由 daemon 已知(同 `db_dir` 的父目录),路径 layout 平台差异:
|
||||
//! - Linux: `~/Documents/xwechat_files/<wxid>`
|
||||
//! - macOS: `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/<wxid>`
|
||||
//! ⚠️ msg/attach/... 子树 layout 待我用真实账号验证;上游 docstring 只写了 Windows
|
||||
//! - Windows: `<root>\xwechat_files\<wxid>`(root 从 `%APPDATA%\Tencent\xwechat\config\*.ini` 读)
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use chrono::TimeZone;
|
||||
use rusqlite::Connection;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use super::AttachmentId;
|
||||
|
||||
/// 单条 attachment 在资源库 + 本地 attach 树下的解析结果。
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ResolvedAttachment {
|
||||
pub id: AttachmentId,
|
||||
/// 从 `packed_info` 提取出的资源 MD5(小写 hex)
|
||||
pub md5: String,
|
||||
/// 命中的本地 .dat 路径(按 full > _h > _t 优先级选一个)
|
||||
pub dat_path: PathBuf,
|
||||
/// 文件 size(字节)
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
/// 仅 schema lookup(不去找本地 .dat)。
|
||||
/// 用于 `wx attachments` 列表时填 `md5` 字段——文件可能根本不在本地。
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AttachmentMetadata {
|
||||
pub md5: String,
|
||||
}
|
||||
|
||||
/// 用 `(chat, local_id)` 查 message_resource.db 拿 file md5。
|
||||
///
|
||||
/// 调用方传已经解密好的 `message_resource.db` 路径(由 daemon 的 `DBCache` 准备)。
|
||||
/// 同步函数 — caller 在 `spawn_blocking` 里跑。
|
||||
pub fn lookup_md5_blocking(
|
||||
resource_db_path: &Path,
|
||||
chat: &str,
|
||||
local_id: i64,
|
||||
msg_local_type_lo32: i64,
|
||||
) -> Result<Option<AttachmentMetadata>> {
|
||||
let conn = Connection::open_with_flags(
|
||||
resource_db_path,
|
||||
rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
|
||||
)
|
||||
.with_context(|| format!("打开 message_resource.db {:?}", resource_db_path))?;
|
||||
|
||||
// 1) ChatName2Id: user_name -> rowid
|
||||
let chat_id: Option<i64> = conn
|
||||
.query_row(
|
||||
"SELECT rowid FROM ChatName2Id WHERE user_name = ?1",
|
||||
[chat],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.ok();
|
||||
let Some(chat_id) = chat_id else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
// 2) MessageResourceInfo: 同 chat 内 local_id 也会复用,按 create_time DESC 取最新
|
||||
// message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型
|
||||
let packed: Option<Vec<u8>> = conn
|
||||
.query_row(
|
||||
"SELECT packed_info FROM MessageResourceInfo
|
||||
WHERE chat_id = ?1
|
||||
AND message_local_id = ?2
|
||||
AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3)
|
||||
ORDER BY message_create_time DESC
|
||||
LIMIT 1",
|
||||
rusqlite::params![chat_id, local_id, msg_local_type_lo32],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.ok();
|
||||
|
||||
let Some(blob) = packed else {
|
||||
return Ok(None);
|
||||
};
|
||||
Ok(extract_md5_from_packed_info(&blob).map(|md5| AttachmentMetadata { md5 }))
|
||||
}
|
||||
|
||||
/// 从 `MessageResourceInfo.packed_info` (protobuf) 提取 32 字节 ASCII hex md5。
|
||||
///
|
||||
/// 主路径:搜 4 字节 marker `12 22 0a 20`(field=2 LEN, length=34, sub field=1 LEN, length=32),
|
||||
/// 紧跟 32 字节 ASCII hex。
|
||||
/// Fallback:扫整个 blob 找连续 32 字节合法 hex 字符。
|
||||
pub fn extract_md5_from_packed_info(blob: &[u8]) -> Option<String> {
|
||||
const MARKER: &[u8; 4] = &[0x12, 0x22, 0x0A, 0x20];
|
||||
|
||||
// 主路径
|
||||
if let Some(pos) = find_subslice(blob, MARKER) {
|
||||
let start = pos + MARKER.len();
|
||||
if start + 32 <= blob.len() {
|
||||
if let Ok(s) = std::str::from_utf8(&blob[start..start + 32]) {
|
||||
if s.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
return Some(s.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback:连续 32 字节合法 hex
|
||||
if blob.len() >= 32 {
|
||||
for start in 0..=blob.len() - 32 {
|
||||
let chunk = &blob[start..start + 32];
|
||||
if let Ok(s) = std::str::from_utf8(chunk) {
|
||||
if s.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
return Some(s.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// 简单的子串扫描(避免拉 memchr/memmem 依赖;blob 通常 < 1KB)
|
||||
fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
if needle.is_empty() || needle.len() > haystack.len() {
|
||||
return None;
|
||||
}
|
||||
haystack
|
||||
.windows(needle.len())
|
||||
.position(|w| w == needle)
|
||||
}
|
||||
|
||||
/// 在 `<attach_root>/<md5(chat)>/<YYYY-MM>/Img/<md5>[_t|_h].dat` 下找文件。
|
||||
///
|
||||
/// 优先级:full > `_h`(HD thumbnail)> `_t`(thumbnail)。返回最优的一个;
|
||||
/// 找不到返回 None。
|
||||
///
|
||||
/// `attach_root` = `<wxchat_base>/msg/attach`。
|
||||
/// `create_time` 用于先定位 `<YYYY-MM>` 子目录;找不到时再 fallback 全月份扫描,
|
||||
/// 因为 WeChat 的 `YYYY-MM` 目录有时跟消息时间差 1 个月(按收到时间归档)。
|
||||
pub fn find_dat_file(
|
||||
attach_root: &Path,
|
||||
chat: &str,
|
||||
file_md5: &str,
|
||||
create_time: i64,
|
||||
) -> Option<PathBuf> {
|
||||
let chat_hash = format!("{:x}", md5::compute(chat.as_bytes()));
|
||||
let chat_dir = attach_root.join(&chat_hash);
|
||||
if !chat_dir.is_dir() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// 第一步:试 create_time 当月 + 前后各一个月(共 3 个候选目录)
|
||||
let candidates_ym: Vec<String> = three_month_candidates(create_time);
|
||||
for ym in &candidates_ym {
|
||||
let img_dir = chat_dir.join(ym).join("Img");
|
||||
if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) {
|
||||
return Some(p);
|
||||
}
|
||||
}
|
||||
|
||||
// 第二步 fallback:扫整个 chat_dir 的所有月份子目录
|
||||
let entries = std::fs::read_dir(&chat_dir).ok()?;
|
||||
let mut all_months: Vec<PathBuf> = entries
|
||||
.filter_map(|e| e.ok())
|
||||
.map(|e| e.path())
|
||||
.filter(|p| p.is_dir())
|
||||
.collect();
|
||||
// 已经试过的 3 个候选可以跳过,但成本极小;保留全量扫
|
||||
all_months.sort();
|
||||
for month_dir in all_months {
|
||||
let img_dir = month_dir.join("Img");
|
||||
if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) {
|
||||
return Some(p);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn pick_best_in_img_dir(img_dir: &Path, file_md5: &str) -> Option<PathBuf> {
|
||||
if !img_dir.is_dir() {
|
||||
return None;
|
||||
}
|
||||
let full = img_dir.join(format!("{}.dat", file_md5));
|
||||
if full.is_file() {
|
||||
return Some(full);
|
||||
}
|
||||
let hd = img_dir.join(format!("{}_h.dat", file_md5));
|
||||
if hd.is_file() {
|
||||
return Some(hd);
|
||||
}
|
||||
let thumb = img_dir.join(format!("{}_t.dat", file_md5));
|
||||
if thumb.is_file() {
|
||||
return Some(thumb);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn three_month_candidates(unix_ts: i64) -> Vec<String> {
|
||||
use chrono::{Datelike, Duration};
|
||||
let dt = match chrono::Local.timestamp_opt(unix_ts, 0).single() {
|
||||
Some(d) => d,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let prev = dt - Duration::days(31);
|
||||
let next = dt + Duration::days(31);
|
||||
[prev, dt, next]
|
||||
.iter()
|
||||
.map(|d| format!("{:04}-{:02}", d.year(), d.month()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// 把 `<wxchat_base>` (即 `db_storage` 父目录)拼成 `<base>/msg/attach`。
|
||||
pub fn attach_root_for(wxchat_base: &Path) -> PathBuf {
|
||||
wxchat_base.join("msg").join("attach")
|
||||
}
|
||||
|
||||
/// 完整流程:用 `attachment_id` 拿 md5 + 找 .dat。失败返回带具体诊断信息的 `Err`。
|
||||
///
|
||||
/// `resource_db_path` 由 daemon 提供(DBCache 已经解密好);
|
||||
/// `attach_root` 由 caller 拼好(`attach_root_for(wxchat_base)`)。
|
||||
/// 同步函数 — caller 在 `spawn_blocking` 里跑。
|
||||
pub fn resolve_blocking(
|
||||
id: &AttachmentId,
|
||||
resource_db_path: &Path,
|
||||
attach_root: &Path,
|
||||
) -> Result<ResolvedAttachment> {
|
||||
let lo32_type: i64 = match id.kind {
|
||||
super::AttachmentKind::Image => 3,
|
||||
super::AttachmentKind::Voice => 34,
|
||||
super::AttachmentKind::Video => 43,
|
||||
super::AttachmentKind::File => 49,
|
||||
};
|
||||
|
||||
let meta = lookup_md5_blocking(resource_db_path, &id.chat, id.local_id, lo32_type)?
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)",
|
||||
id.chat,
|
||||
id.local_id,
|
||||
lo32_type
|
||||
)
|
||||
})?;
|
||||
|
||||
let dat_path = find_dat_file(attach_root, &id.chat, &meta.md5, id.create_time).ok_or_else(
|
||||
|| {
|
||||
anyhow!(
|
||||
"找不到本地 .dat(md5={} chat={} create_time={})— 微信可能尚未下载该附件,或附件已被清理",
|
||||
meta.md5,
|
||||
id.chat,
|
||||
id.create_time
|
||||
)
|
||||
},
|
||||
)?;
|
||||
let size = std::fs::metadata(&dat_path).map(|m| m.len()).unwrap_or(0);
|
||||
|
||||
Ok(ResolvedAttachment { id: id.clone(), md5: meta.md5, dat_path, size })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn extract_md5_main_path() {
|
||||
// 构造一段含 12 22 0a 20 marker 的 blob
|
||||
let mut blob = vec![0xAA, 0xBB, 0xCC];
|
||||
blob.extend_from_slice(&[0x12, 0x22, 0x0A, 0x20]);
|
||||
blob.extend_from_slice(b"deadbeefcafebabe1234567890abcdef");
|
||||
blob.extend_from_slice(&[0xFF, 0xFF]);
|
||||
assert_eq!(
|
||||
extract_md5_from_packed_info(&blob),
|
||||
Some("deadbeefcafebabe1234567890abcdef".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_md5_fallback_no_marker() {
|
||||
// 没有 marker,但 blob 里有合法 32 字节 hex
|
||||
let mut blob = vec![0xFF, 0x00];
|
||||
blob.extend_from_slice(b"00112233445566778899aabbccddeeff");
|
||||
blob.extend_from_slice(&[0x01]);
|
||||
assert_eq!(
|
||||
extract_md5_from_packed_info(&blob),
|
||||
Some("00112233445566778899aabbccddeeff".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_md5_uppercase_normalized_to_lower() {
|
||||
let mut blob = vec![0x12, 0x22, 0x0A, 0x20];
|
||||
blob.extend_from_slice(b"DEADBEEFCAFEBABE1234567890ABCDEF");
|
||||
// 上游/CI/本地 file md5 都是 lowercase;强制小写化避免大小写不一致导致命中失败
|
||||
assert_eq!(
|
||||
extract_md5_from_packed_info(&blob),
|
||||
Some("deadbeefcafebabe1234567890abcdef".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_md5_returns_none_on_garbage() {
|
||||
let blob = vec![0; 16];
|
||||
assert!(extract_md5_from_packed_info(&blob).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three_month_candidates_includes_prev_curr_next() {
|
||||
// 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09
|
||||
let ts = chrono::Local
|
||||
.with_ymd_and_hms(2025, 8, 15, 12, 0, 0)
|
||||
.unwrap()
|
||||
.timestamp();
|
||||
let v = three_month_candidates(ts);
|
||||
assert!(v.contains(&"2025-07".to_string()));
|
||||
assert!(v.contains(&"2025-08".to_string()));
|
||||
assert!(v.contains(&"2025-09".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pick_best_prefers_full_then_h_then_t() {
|
||||
let tmp = tempdir_for_test();
|
||||
let img = tmp.join("Img");
|
||||
std::fs::create_dir_all(&img).unwrap();
|
||||
let md5 = "abcd1234";
|
||||
std::fs::write(img.join(format!("{}_t.dat", md5)), b"thumb").unwrap();
|
||||
std::fs::write(img.join(format!("{}_h.dat", md5)), b"hd").unwrap();
|
||||
// 只有 _t / _h 时取 _h
|
||||
assert_eq!(
|
||||
pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(),
|
||||
format!("{}_h.dat", md5).as_str()
|
||||
);
|
||||
// 加 full 后取 full
|
||||
std::fs::write(img.join(format!("{}.dat", md5)), b"full").unwrap();
|
||||
assert_eq!(
|
||||
pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(),
|
||||
format!("{}.dat", md5).as_str()
|
||||
);
|
||||
}
|
||||
|
||||
fn tempdir_for_test() -> PathBuf {
|
||||
let pid = std::process::id();
|
||||
let nanos = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos();
|
||||
let p = std::env::temp_dir().join(format!("wx-cli-attach-test-{}-{}", pid, nanos));
|
||||
std::fs::create_dir_all(&p).unwrap();
|
||||
p
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ mod crypto;
|
|||
mod scanner;
|
||||
mod daemon;
|
||||
mod cli;
|
||||
mod attachment;
|
||||
|
||||
fn main() {
|
||||
if std::env::var("WX_DAEMON_MODE").is_ok() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue