mirror of https://github.com/jackwener/wx-cli.git
Merge c3cb372c8c into 08af894594
commit
c3611225d8
|
|
@ -243,11 +243,14 @@ wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
|
|||
# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等)
|
||||
wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||
wx extract <attachment_id> -o /tmp/raw.wxgf --raw # 保留原始 WXGF/HEVC 容器
|
||||
```
|
||||
|
||||
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages`:`sender_username` 是 wxid,用于两个同名成员之间的稳定区分;解析不到 wxid 时这三字段不输出)。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||
|
||||
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际写出的图片格式:jpg / png / gif / webp 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||
|
||||
微信 4 会把部分图片保存成内部 `WXGF/WXAM` 容器(解码后头部为 `wxgf`,报告里的 `source_format` 为 `hevc`)。默认 `wx extract` 会从 WXGF 中提取最大的 HEVC partition,并调用 `ffmpeg` 转成 JPG;报告会额外带 `source_format` / `source_size` / `transcoder` / `wxgf_partition_*`。如果本机没有 `ffmpeg`,请安装后重试,或用 `WX_FFMPEG=/path/to/ffmpeg` 指定路径;确实需要原始容器时传 `--raw`。如果微信里从未点开过该图片,本地通常只有 `_t.dat` 缩略图,先在微信客户端点开图片让它下载完整 `.dat`,再重新执行 `wx extract`。
|
||||
|
||||
支持的解码档位:
|
||||
- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推)
|
||||
|
|
|
|||
5
SKILL.md
5
SKILL.md
|
|
@ -280,11 +280,14 @@ wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
|
|||
# 2) 用 attachment_id 把单个资源解密写到指定路径
|
||||
wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||
wx extract <attachment_id> -o /tmp/raw.wxgf --raw # 保留原始 WXGF/HEVC 容器
|
||||
```
|
||||
|
||||
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||
|
||||
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际写出的图片格式:jpg / png / gif / webp 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||
|
||||
微信 4 图片可能解码成 `WXGF/WXAM` 容器(头部 `wxgf`,内部是 HEVC)。默认 `wx extract` 会自动提取 WXGF 里的最大 HEVC partition 并用 `ffmpeg` 转 JPG;报告会带 `source_format: hevc`、`source_size`、`transcoder` 和 `wxgf_partition_*`。如果只需要原始容器,传 `--raw`。如果本地只拿到 `_t.dat` 缩略图,输出会很小且文字图片不可读;让用户先在微信客户端点开图片,等完整 `.dat` 下载到本地后再重新 `wx extract`。
|
||||
|
||||
支持的解码档位:
|
||||
- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推)
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use anyhow::{anyhow, Result};
|
|||
|
||||
pub mod v1_xor;
|
||||
pub mod v2;
|
||||
pub mod wxgf;
|
||||
|
||||
/// 完整 V2 magic:`\x07\x08V2\x08\x07`
|
||||
pub const V2_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'2', 0x08, 0x07];
|
||||
|
|
|
|||
|
|
@ -0,0 +1,199 @@
|
|||
//! WeChat 4 WXGF/WXAM image container support.
|
||||
//!
|
||||
//! `wxgf` is not a normal image format. It is a private WeChat container whose
|
||||
//! largest data partition is usually an Annex B HEVC bitstream. We keep the
|
||||
//! parser tiny: find HEVC start codes after the WXGF header, validate the
|
||||
//! 4-byte big-endian length immediately before the start code, then hand the
|
||||
//! largest partition to ffmpeg.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
const WXGF_MAGIC: &[u8; 4] = b"wxgf";
|
||||
const FFMPEG_ENV: &str = "WX_FFMPEG";
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct WxgfPartition {
|
||||
pub offset: usize,
|
||||
/// Partition byte length, including the HEVC start code at `offset`.
|
||||
pub size: usize,
|
||||
pub ratio: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct WxgfJpeg {
|
||||
pub data: Vec<u8>,
|
||||
pub partition: WxgfPartition,
|
||||
pub ffmpeg: String,
|
||||
}
|
||||
|
||||
struct TempPaths {
|
||||
input: PathBuf,
|
||||
output: PathBuf,
|
||||
}
|
||||
|
||||
impl Drop for TempPaths {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::fs::remove_file(&self.input);
|
||||
let _ = std::fs::remove_file(&self.output);
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the largest HEVC Annex B partition inside a WXGF/WXAM container.
|
||||
pub fn largest_partition(data: &[u8]) -> Result<WxgfPartition> {
|
||||
if data.len() < 15 || &data[..4] != WXGF_MAGIC {
|
||||
bail!("invalid WXGF image container");
|
||||
}
|
||||
|
||||
let header_len = data[4] as usize;
|
||||
if header_len >= data.len() {
|
||||
bail!("invalid WXGF header length {}", header_len);
|
||||
}
|
||||
|
||||
for pattern in [&[0x00, 0x00, 0x00, 0x01][..], &[0x00, 0x00, 0x01][..]] {
|
||||
let mut partitions = Vec::new();
|
||||
let mut rel_offset = 0usize;
|
||||
|
||||
while header_len + rel_offset < data.len() {
|
||||
let search_from = header_len + rel_offset;
|
||||
let Some(idx) = find_subslice(&data[search_from..], pattern) else {
|
||||
break;
|
||||
};
|
||||
let abs_idx = search_from + idx;
|
||||
if abs_idx < 4 {
|
||||
rel_offset = rel_offset.saturating_add(idx + 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
let size = u32::from_be_bytes(data[abs_idx - 4..abs_idx].try_into().unwrap()) as usize;
|
||||
if size > 0 && abs_idx.checked_add(size).is_some_and(|end| end <= data.len()) {
|
||||
partitions.push(WxgfPartition {
|
||||
offset: abs_idx,
|
||||
size,
|
||||
ratio: size as f64 / data.len() as f64,
|
||||
});
|
||||
rel_offset = abs_idx - header_len + size;
|
||||
} else {
|
||||
rel_offset = abs_idx - header_len + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(max) = partitions.into_iter().max_by_key(|p| p.size) {
|
||||
return Ok(max);
|
||||
}
|
||||
}
|
||||
|
||||
bail!("WXGF image has no valid HEVC partition")
|
||||
}
|
||||
|
||||
/// Convert a WXGF/WXAM image to JPEG through ffmpeg.
|
||||
///
|
||||
/// The ffmpeg path is resolved from `WX_FFMPEG`, then falls back to `ffmpeg` in
|
||||
/// PATH. This avoids adding Python or native HEVC decoder dependencies.
|
||||
pub fn transcode_to_jpeg(data: &[u8]) -> Result<WxgfJpeg> {
|
||||
let partition = largest_partition(data)?;
|
||||
let hevc = &data[partition.offset..partition.offset + partition.size];
|
||||
let ffmpeg = std::env::var(FFMPEG_ENV).unwrap_or_else(|_| "ffmpeg".to_string());
|
||||
let paths = temp_paths();
|
||||
|
||||
std::fs::write(&paths.input, hevc)
|
||||
.with_context(|| format!("写出 WXGF/HEVC 临时输入失败:{}", paths.input.display()))?;
|
||||
|
||||
let output = Command::new(&ffmpeg)
|
||||
.arg("-y")
|
||||
.arg("-hide_banner")
|
||||
.arg("-loglevel")
|
||||
.arg("error")
|
||||
.arg("-f")
|
||||
.arg("hevc")
|
||||
.arg("-i")
|
||||
.arg(&paths.input)
|
||||
.arg("-vframes")
|
||||
.arg("1")
|
||||
.arg("-c:v")
|
||||
.arg("mjpeg")
|
||||
.arg("-q:v")
|
||||
.arg("4")
|
||||
.arg(&paths.output)
|
||||
.output()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"启动 ffmpeg 失败;请安装 ffmpeg 或用 {FFMPEG_ENV} 指定路径,或用 wx extract --raw 导出原始 WXGF"
|
||||
)
|
||||
})?;
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
bail!(
|
||||
"ffmpeg 转码 WXGF/HEVC 失败:{}",
|
||||
stderr.trim().chars().take(800).collect::<String>()
|
||||
);
|
||||
}
|
||||
|
||||
let data = std::fs::read(&paths.output)
|
||||
.with_context(|| format!("读取 ffmpeg 输出失败:{}", paths.output.display()))?;
|
||||
if data.is_empty() {
|
||||
bail!("ffmpeg 转码 WXGF/HEVC 成功但没有输出 JPEG 数据");
|
||||
}
|
||||
|
||||
Ok(WxgfJpeg {
|
||||
data,
|
||||
partition,
|
||||
ffmpeg,
|
||||
})
|
||||
}
|
||||
|
||||
fn temp_paths() -> TempPaths {
|
||||
static COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos())
|
||||
.unwrap_or(0);
|
||||
let seq = COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
let stem = format!("wx-cli-wxgf-{}-{}-{}", std::process::id(), nanos, seq);
|
||||
let dir = std::env::temp_dir();
|
||||
TempPaths {
|
||||
input: dir.join(format!("{}.hevc", stem)),
|
||||
output: dir.join(format!("{}.jpg", stem)),
|
||||
}
|
||||
}
|
||||
|
||||
fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
if needle.is_empty() || needle.len() > haystack.len() {
|
||||
return None;
|
||||
}
|
||||
haystack.windows(needle.len()).position(|w| w == needle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn finds_largest_partition() {
|
||||
let mut data = b"wxgf".to_vec();
|
||||
data.push(19); // header length
|
||||
data.extend_from_slice(&[0; 14]);
|
||||
|
||||
data.extend_from_slice(&8u32.to_be_bytes());
|
||||
data.extend_from_slice(&[0, 0, 0, 1]);
|
||||
data.extend_from_slice(&[1, 2, 3, 4]);
|
||||
|
||||
let second_offset = data.len() + 4;
|
||||
data.extend_from_slice(&12u32.to_be_bytes());
|
||||
data.extend_from_slice(&[0, 0, 0, 1]);
|
||||
data.extend_from_slice(&[5, 6, 7, 8, 9, 10, 11, 12]);
|
||||
|
||||
let p = largest_partition(&data).unwrap();
|
||||
assert_eq!(p.offset, second_offset);
|
||||
assert_eq!(p.size, 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_wxgf() {
|
||||
let err = largest_partition(b"not wxgf").unwrap_err().to_string();
|
||||
assert!(err.contains("WXGF"));
|
||||
}
|
||||
}
|
||||
|
|
@ -13,12 +13,14 @@ pub fn cmd_extract(
|
|||
attachment_id: String,
|
||||
output: String,
|
||||
overwrite: bool,
|
||||
raw: bool,
|
||||
json: bool,
|
||||
) -> Result<()> {
|
||||
let req = Request::Extract {
|
||||
attachment_id,
|
||||
output,
|
||||
overwrite,
|
||||
raw,
|
||||
};
|
||||
let resp = transport::send(req)?;
|
||||
print_value(&resp.data, &resolve(json))
|
||||
|
|
|
|||
|
|
@ -305,6 +305,9 @@ enum Commands {
|
|||
/// 目标已存在时覆盖
|
||||
#[arg(long)]
|
||||
overwrite: bool,
|
||||
/// 原样导出解码后的附件数据;WXGF/HEVC 图片不转 JPG
|
||||
#[arg(long)]
|
||||
raw: bool,
|
||||
/// 输出 JSON(默认 YAML)
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
|
|
@ -518,8 +521,9 @@ fn dispatch(cli: Cli) -> Result<()> {
|
|||
attachment_id,
|
||||
output,
|
||||
overwrite,
|
||||
raw,
|
||||
json,
|
||||
} => extract::cmd_extract(attachment_id, output, overwrite, json),
|
||||
} => extract::cmd_extract(attachment_id, output, overwrite, raw, json),
|
||||
Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4483,6 +4483,7 @@ pub async fn q_extract(
|
|||
attachment_id: &str,
|
||||
output: &str,
|
||||
overwrite: bool,
|
||||
raw: bool,
|
||||
) -> Result<Value> {
|
||||
use crate::attachment::{
|
||||
attachment_id::AttachmentId,
|
||||
|
|
@ -4573,25 +4574,63 @@ pub async fn q_extract(
|
|||
};
|
||||
|
||||
let decoded = decoder::dispatch(&dat_bytes, v2_key)?;
|
||||
let source_format = decoded.format;
|
||||
let source_size = decoded.data.len();
|
||||
let mut output_format = source_format.to_string();
|
||||
let mut decoder_name = decoded.decoder.to_string();
|
||||
let mut output_data = decoded.data;
|
||||
let mut wxgf_partition_offset: Option<usize> = None;
|
||||
let mut wxgf_partition_size: Option<usize> = None;
|
||||
let mut wxgf_partition_ratio: Option<f64> = None;
|
||||
let mut transcoder: Option<String> = None;
|
||||
|
||||
if source_format == "hevc" && !raw {
|
||||
let jpg = decoder::wxgf::transcode_to_jpeg(&output_data)
|
||||
.context("WXGF/HEVC 图片转 JPG 失败;可安装 ffmpeg 或用 wx extract --raw 导出原始 WXGF")?;
|
||||
wxgf_partition_offset = Some(jpg.partition.offset);
|
||||
wxgf_partition_size = Some(jpg.partition.size);
|
||||
wxgf_partition_ratio = Some(jpg.partition.ratio);
|
||||
transcoder = Some(format!("ffmpeg:{}", jpg.ffmpeg));
|
||||
output_data = jpg.data;
|
||||
output_format = "jpg".to_string();
|
||||
decoder_name.push_str("+wxgf_ffmpeg");
|
||||
}
|
||||
|
||||
// 写盘
|
||||
std::fs::write(&output_path2, &decoded.data)
|
||||
std::fs::write(&output_path2, &output_data)
|
||||
.with_context(|| format!("写出文件失败:{}", output_path2.display()))?;
|
||||
|
||||
// 注意:不要在这里塞 `ok: true`。dispatch 会用 Response::ok(v) 包一层,
|
||||
// Response 的 `data: Value` 字段是 #[serde(flatten)] 写出的,本 payload
|
||||
// 的 `ok` 会和 Response 自带的 `ok` 在线上拼成两个同名 key,CLI 反序列化时
|
||||
// serde_json 直接报 "duplicate field",业务请求看上去像 daemon 解析失败。
|
||||
Ok(json!({
|
||||
let mut report = json!({
|
||||
"kind": id_for_task.kind.as_str(),
|
||||
"md5": resolved.md5,
|
||||
"dat_path": resolved.dat_path.display().to_string(),
|
||||
"dat_size": resolved.size,
|
||||
"output": output_path2.display().to_string(),
|
||||
"output_size": decoded.data.len(),
|
||||
"format": decoded.format,
|
||||
"decoder": decoded.decoder,
|
||||
}))
|
||||
"output_size": output_data.len(),
|
||||
"format": output_format,
|
||||
"decoder": decoder_name,
|
||||
});
|
||||
if source_format != report["format"].as_str().unwrap_or_default() {
|
||||
report["source_format"] = json!(source_format);
|
||||
report["source_size"] = json!(source_size);
|
||||
}
|
||||
if let Some(transcoder) = transcoder {
|
||||
report["transcoder"] = json!(transcoder);
|
||||
}
|
||||
if let Some(offset) = wxgf_partition_offset {
|
||||
report["wxgf_partition_offset"] = json!(offset);
|
||||
}
|
||||
if let Some(size) = wxgf_partition_size {
|
||||
report["wxgf_partition_size"] = json!(size);
|
||||
}
|
||||
if let Some(ratio) = wxgf_partition_ratio {
|
||||
report["wxgf_partition_ratio"] = json!(ratio);
|
||||
}
|
||||
Ok(report)
|
||||
})
|
||||
.await??;
|
||||
|
||||
|
|
|
|||
|
|
@ -353,7 +353,8 @@ async fn dispatch(req: Request, db: &DbCache, names: &tokio::sync::RwLock<Arc<Na
|
|||
attachment_id,
|
||||
output,
|
||||
overwrite,
|
||||
} => match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite).await {
|
||||
raw,
|
||||
} => match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite, raw).await {
|
||||
Ok(v) => Response::ok(v),
|
||||
Err(e) => Response::err(e.to_string()),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -184,6 +184,9 @@ pub enum Request {
|
|||
/// 已存在时是否覆盖
|
||||
#[serde(default)]
|
||||
overwrite: bool,
|
||||
/// 原样导出解码后的附件数据;图片为 WXGF/HEVC 时不调用 ffmpeg 转 JPG
|
||||
#[serde(default, skip_serializing_if = "is_false")]
|
||||
raw: bool,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue