From d7bd351cecc356ee574c306242c37593af67ba1c Mon Sep 17 00:00:00 2001 From: Richard Liu <1625351+richardzone@users.noreply.github.com> Date: Tue, 9 Jun 2026 13:12:17 +0800 Subject: [PATCH] feat: transcribe WeChat voice attachments --- README.md | 33 +- SKILL.md | 30 +- src/attachment/resolver.rs | 618 +++++++++++++++++++++++++++++++++++-- src/cli/attachments.rs | 3 +- src/cli/extract.rs | 8 +- src/cli/mod.rs | 57 +++- src/cli/transcribe.rs | 467 ++++++++++++++++++++++++++++ src/daemon/query.rs | 246 ++++++++++++--- src/ipc.rs | 6 +- 9 files changed, 1373 insertions(+), 95 deletions(-) create mode 100644 src/cli/transcribe.rs diff --git a/README.md b/README.md index 1c1c7b5..af5ff9b 100644 --- a/README.md +++ b/README.md @@ -230,9 +230,9 @@ wx biz-articles --json | jq '.[].url' # 下游消费 URL 每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 -### 附件提取(图片) +### 附件提取(图片;语音 POC) -聊天里的附件本体存在 `xwechat_files//msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。 +聊天里的附件本体存在本地数据库或 `xwechat_files//msg/attach/...` 下的资源文件。图片需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图;语音目前是 POC,优先从 `message/media_0.db::VoiceInfo` 导出 `voice_data`,未命中时再尝试本地文件缓存,只做原样复制,不做转码或转文字。 ```bash # 1) 列出会话里的图片附件,先拿到不透明的 attachment_id @@ -240,14 +240,37 @@ wx attachments "张三" wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 -# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等) +# POC: 列出语音消息资源 +wx attachments "张三" --kind voice -n 20 + +# 2) 把单个 attachment_id 写出去(图片会解码;语音 POC 原样复制) wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/voice.aud wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages`:`sender_username` 是 wxid,用于两个同名成员之间的稳定区分;解析不到 wxid 时这三字段不输出)。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages`:`sender_username` 是 wxid,用于两个同名成员之间的稳定区分;解析不到 wxid 时这三字段不输出)。默认 `kind` 是 `image`;`--kind voice` / `--kind audio` 是实验能力,依赖本地 `media_0.db` 或语音文件缓存仍可读取。 -`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 +`extract` 输出报告里带:`output` / `output_size` / `format` / `decoder`;从本地附件文件命中时还带 `md5` / `dat_path` / `dat_size`。图片的 `format` 是实际识别出的图片格式(jpg / png / gif / webp / hevc 等),`decoder` 是 `legacy_xor` / `v1_aes` / `v2`;语音 POC 的 `decoder` 是 `media_0_voice_data` 或 `raw_copy`。 + +#### 语音转文字 POC + +`wx transcribe` 会把语音 `attachment_id` 走完整本地链路:导出 WeChat 原始语音 bytes → SILK v3 decoder 转 PCM → `ffmpeg` 转 16k mono WAV → `whisper.cpp` 本地 ASR。wx-cli 不内置模型,也不下载依赖;所有工具都在本机执行。`--keep-temp` 会保留中间音频文件,目录权限保持 `0700`,但这些文件仍然是私密语音数据,只应在调试时使用。 + +```bash +# 依赖示例: +# 1) kn007/silk-v3-decoder 编译得到 silk/decoder +# 2) whisper.cpp 编译得到 whisper-cli,并下载 ggml 多语种模型 +# 3) ffmpeg 在 PATH 中 + +wx transcribe \ + --silk-decoder /path/to/silk-v3-decoder/silk/decoder \ + --whisper-bin /path/to/whisper.cpp/build/bin/whisper-cli \ + --model /path/to/whisper.cpp/models/ggml-large-v3-turbo.bin \ + --language zh +``` + +也可用环境变量减少参数:`WX_SILK_DECODER` / `WX_WHISPER_BIN` / `WX_WHISPER_MODEL` / `WX_FFMPEG`。 支持的解码档位: - **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) diff --git a/SKILL.md b/SKILL.md index 61082fe..857a0e1 100644 --- a/SKILL.md +++ b/SKILL.md @@ -267,24 +267,42 @@ wx biz-articles --since 2026-05-10 --json | jq '.[].url' 每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 -### 附件提取(图片) +### 附件提取(图片;语音 POC) -聊天里的图片本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: +聊天里的附件本体存在本地数据库或 `xwechat_files//msg/attach/...` 下的资源文件。图片需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图;语音目前是 POC,优先从 `message/media_0.db::VoiceInfo` 导出 `voice_data`,未命中时再尝试本地文件缓存,只做原样复制,不做转码或转文字。 ```bash -# 1) 先列出图片附件,拿到不透明的 attachment_id +# 1) 先列出附件,拿到不透明的 attachment_id wx attachments "张三" wx attachments "AI群" --kind image -n 100 wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 -# 2) 用 attachment_id 把单个资源解密写到指定路径 +# POC: 列出语音消息资源 +wx attachments "张三" --kind voice -n 20 + +# 2) 用 attachment_id 把单个资源写到指定路径 wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/voice.aud wx extract -o /tmp/x.jpg --overwrite ``` -`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。默认 `kind` 是 `image`;`--kind voice` / `--kind audio` 是 POC,优先从 `message/media_0.db::VoiceInfo` 导出 `voice_data`,未命中时再尝试本地文件缓存,只做原样复制,不做转码或转文字。 -`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 +`extract` 报告里带:`output` / `output_size` / `format` / `decoder`;从本地附件文件命中时还带 `md5` / `dat_path` / `dat_size`。图片的 `decoder` 是 `legacy_xor` / `v1_aes` / `v2`;语音 POC 的 `decoder` 是 `media_0_voice_data` 或 `raw_copy`。 + +#### 语音转文字 POC + +`wx transcribe` 会把语音 `attachment_id` 走完整本地链路:导出 WeChat 原始语音 bytes → SILK v3 decoder 转 PCM → `ffmpeg` 转 16k mono WAV → `whisper.cpp` 本地 ASR。wx-cli 不内置模型,也不下载依赖;所有工具都在本机执行。`--keep-temp` 会保留中间音频文件,目录权限保持 `0700`,但这些文件仍然是私密语音数据,只应在调试时使用。 + +```bash +wx transcribe \ + --silk-decoder /path/to/silk-v3-decoder/silk/decoder \ + --whisper-bin /path/to/whisper.cpp/build/bin/whisper-cli \ + --model /path/to/whisper.cpp/models/ggml-large-v3-turbo.bin \ + --language zh +``` + +也可用环境变量减少参数:`WX_SILK_DECODER` / `WX_WHISPER_BIN` / `WX_WHISPER_MODEL` / `WX_FFMPEG`。 支持的解码档位: - **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) diff --git a/src/attachment/resolver.rs b/src/attachment/resolver.rs index 8db4f41..282eee9 100644 --- a/src/attachment/resolver.rs +++ b/src/attachment/resolver.rs @@ -17,9 +17,10 @@ use anyhow::{anyhow, Context, Result}; use chrono::TimeZone; use rusqlite::Connection; +use std::collections::HashSet; use std::path::{Path, PathBuf}; -use super::AttachmentId; +use super::{AttachmentId, AttachmentKind}; /// 单条 attachment 在资源库 + 本地 attach 树下的解析结果。 #[derive(Debug, Clone)] @@ -40,6 +41,14 @@ pub struct AttachmentMetadata { pub md5: String, } +/// `message/media_0.db::VoiceInfo` 中的一条语音资源。 +#[derive(Debug, Clone)] +pub struct ResolvedVoiceMedia { + pub data: Vec, + pub chunks: usize, + pub svr_id: Option, +} + /// 用 `(chat, local_id)` 查 message_resource.db 拿 file md5。 /// /// 调用方传已经解密好的 `message_resource.db` 路径(由 daemon 的 `DBCache` 准备)。 @@ -87,8 +96,8 @@ pub fn lookup_md5_blocking( ) .ok(); - let packed: Option> = packed_exact.or_else(|| conn - .query_row( + let packed: Option> = packed_exact.or_else(|| { + conn.query_row( "SELECT packed_info FROM MessageResourceInfo WHERE chat_id = ?1 AND message_local_id = ?2 @@ -98,7 +107,8 @@ pub fn lookup_md5_blocking( rusqlite::params![chat_id, local_id, msg_local_type_lo32], |row| row.get(0), ) - .ok()); + .ok() + }); let Some(blob) = packed else { return Ok(None); @@ -106,6 +116,170 @@ pub fn lookup_md5_blocking( Ok(extract_md5_from_packed_info(&blob).map(|md5| AttachmentMetadata { md5 })) } +/// 从 `message/media_0.db` 的 VoiceInfo 表读取语音 BLOB。 +/// +/// WeChat 4.x 语音不一定进入 `message_resource.db`,常见路径是: +/// `media_0.db::VoiceInfo(local_id, create_time, voice_data, data_index)`。 +/// `data_index` 预留分片能力,所以这里按 data_index 顺序拼接同一条语音的所有 chunk。 +pub fn lookup_voice_media_blocking( + media_db_path: &Path, + chat: &str, + local_id: i64, + create_time: i64, +) -> Result> { + let conn = Connection::open_with_flags( + media_db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI, + ) + .with_context(|| format!("打开 media_0.db {:?}", media_db_path))?; + + let has_voice_info: bool = conn + .query_row( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name='VoiceInfo'", + [], + |_| Ok(()), + ) + .is_ok(); + if !has_voice_info { + return Ok(None); + } + + let columns = table_columns(&conn, "VoiceInfo")?; + if !columns.contains("voice_data") { + return Ok(None); + } + let data_index_expr = if columns.contains("data_index") { + "CAST(COALESCE(data_index, '0') AS INTEGER)" + } else { + "0" + }; + let svr_id_expr = if columns.contains("svr_id") { + "svr_id" + } else { + "NULL" + }; + + let mut rows = Vec::new(); + + if columns.contains("local_id") { + if columns.contains("chat_name_id") { + let chat_id: Option = conn + .query_row( + "SELECT rowid FROM Name2Id WHERE user_name = ?1", + [chat], + |row| row.get(0), + ) + .ok(); + + let Some(chat_id) = chat_id else { + return Ok(None); + }; + + if columns.contains("create_time") { + rows = query_voice_rows( + &conn, + "chat_name_id = ?1 AND local_id = ?2 AND create_time = ?3", + rusqlite::params![chat_id, local_id, create_time], + data_index_expr, + svr_id_expr, + )?; + } + if rows.is_empty() && !columns.contains("create_time") { + rows = query_voice_rows( + &conn, + "chat_name_id = ?1 AND local_id = ?2", + rusqlite::params![chat_id, local_id], + data_index_expr, + svr_id_expr, + )?; + } + } + } + + if rows.is_empty() && columns.contains("msgid") { + if !columns.contains("user_name") { + return Ok(None); + } + if columns.contains("msgtime") { + rows = query_voice_rows( + &conn, + "user_name = ?1 AND msgid = ?2 AND msgtime = ?3", + rusqlite::params![chat, local_id, create_time], + data_index_expr, + svr_id_expr, + )?; + } + if rows.is_empty() && !columns.contains("msgtime") { + rows = query_voice_rows( + &conn, + "user_name = ?1 AND msgid = ?2", + rusqlite::params![chat, local_id], + data_index_expr, + svr_id_expr, + )?; + } + } + + if rows.is_empty() { + return Ok(None); + } + + rows.sort_by_key(|row| row.0); + let svr_id = rows.iter().find_map(|row| row.2); + let chunks = rows.len(); + let total_len: usize = rows.iter().map(|row| row.1.len()).sum(); + if total_len == 0 { + return Ok(None); + } + let mut data = Vec::with_capacity(total_len); + for (_idx, chunk, _svr_id) in rows { + data.extend_from_slice(&chunk); + } + + Ok(Some(ResolvedVoiceMedia { + data, + chunks, + svr_id, + })) +} + +fn table_columns(conn: &Connection, table: &str) -> Result> { + let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?; + let columns = stmt + .query_map([], |row| row.get::<_, String>(1))? + .collect::>>()?; + Ok(columns) +} + +fn query_voice_rows

( + conn: &Connection, + where_clause: &str, + params: P, + data_index_expr: &str, + svr_id_expr: &str, +) -> Result, Option)>> +where + P: rusqlite::Params, +{ + let sql = format!( + "SELECT {data_index_expr} AS voice_index, voice_data, {svr_id_expr} AS voice_svr_id + FROM VoiceInfo + WHERE {where_clause} + ORDER BY voice_index, rowid" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt + .query_map(params, |row| { + Ok(( + row.get::<_, i64>(0).unwrap_or(0), + row.get::<_, Vec>(1).unwrap_or_default(), + row.get::<_, i64>(2).ok(), + )) + })? + .collect::>>()?; + Ok(rows) +} + /// 从 `MessageResourceInfo.packed_info` (protobuf) 提取 32 字节 ASCII hex md5。 /// /// 主路径:搜 4 字节 marker `12 22 0a 20`(field=2 LEN, length=34, sub field=1 LEN, length=32), @@ -145,12 +319,10 @@ fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option { if needle.is_empty() || needle.len() > haystack.len() { return None; } - haystack - .windows(needle.len()) - .position(|w| w == needle) + haystack.windows(needle.len()).position(|w| w == needle) } -/// 在 `///Img/[_t|_h].dat` 下找文件。 +/// 在 `///Img/[_t|_h].dat` 下找图片文件。 /// /// 优先级:full > `_h`(HD thumbnail)> `_t`(thumbnail)。返回最优的一个; /// 找不到返回 None。 @@ -163,18 +335,40 @@ pub fn find_dat_file( chat: &str, file_md5: &str, create_time: i64, +) -> Option { + find_media_file( + attach_root, + chat, + file_md5, + create_time, + AttachmentKind::Image, + ) +} + +/// 在本地附件树中定位指定 kind 的媒体文件。 +/// +/// image 走已经验证过的 `Img/[_h|_t].dat` 规则;voice 是 POC 路径,优先试 +/// `Voice` / `Audio` 目录里的 md5 同名文件,最后在 `msg/attach` 下按 md5 前缀递归兜底。 +pub fn find_media_file( + attach_root: &Path, + chat: &str, + file_md5: &str, + create_time: i64, + kind: AttachmentKind, ) -> Option { let chat_hash = format!("{:x}", md5::compute(chat.as_bytes())); let chat_dir = attach_root.join(&chat_hash); if !chat_dir.is_dir() { - return None; + return match kind { + AttachmentKind::Voice => find_by_md5_recursive(attach_root, file_md5, kind), + _ => None, + }; } // 第一步:试 create_time 当月 + 前后各一个月(共 3 个候选目录) let candidates_ym: Vec = three_month_candidates(create_time); for ym in &candidates_ym { - let img_dir = chat_dir.join(ym).join("Img"); - if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + if let Some(p) = pick_best_in_month_dir(&chat_dir.join(ym), file_md5, kind) { return Some(p); } } @@ -189,12 +383,37 @@ pub fn find_dat_file( // 已经试过的 3 个候选可以跳过,但成本极小;保留全量扫 all_months.sort(); for month_dir in all_months { - let img_dir = month_dir.join("Img"); - if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + if let Some(p) = pick_best_in_month_dir(&month_dir, file_md5, kind) { return Some(p); } } - None + + // POC fallback:Mac 4.x 的语音路径未完全验证。若上面的目录名猜错,仍按资源 md5 + // 在 attach 树下递归找一次,避免因为 `Voice`/`Audio` 布局差异直接失败。 + match kind { + AttachmentKind::Voice => find_by_md5_recursive(attach_root, file_md5, kind), + _ => None, + } +} + +fn pick_best_in_month_dir( + month_dir: &Path, + file_md5: &str, + kind: AttachmentKind, +) -> Option { + match kind { + AttachmentKind::Image => pick_best_in_img_dir(&month_dir.join("Img"), file_md5), + AttachmentKind::Voice => { + for subdir in ["Voice", "Audio", "Aud"] { + if let Some(p) = pick_best_media_file(&month_dir.join(subdir), file_md5, kind) { + return Some(p); + } + } + None + } + AttachmentKind::Video => pick_best_media_file(&month_dir.join("Video"), file_md5, kind), + AttachmentKind::File => pick_best_media_file(month_dir, file_md5, kind), + } } fn pick_best_in_img_dir(img_dir: &Path, file_md5: &str) -> Option { @@ -216,6 +435,94 @@ fn pick_best_in_img_dir(img_dir: &Path, file_md5: &str) -> Option { None } +fn pick_best_media_file(media_dir: &Path, file_md5: &str, kind: AttachmentKind) -> Option { + if !media_dir.is_dir() { + return None; + } + + for name in exact_media_names(file_md5, kind) { + let path = media_dir.join(name); + if path.is_file() { + return Some(path); + } + } + + let mut candidates = media_dir + .read_dir() + .ok()? + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| { + p.is_file() + && p.file_name() + .and_then(|s| s.to_str()) + .map(|name| name.starts_with(file_md5)) + .unwrap_or(false) + }) + .collect::>(); + candidates.sort_by_key(|p| { + let size = p.metadata().map(|m| m.len()).unwrap_or(0); + std::cmp::Reverse(size) + }); + candidates.into_iter().next() +} + +fn exact_media_names(file_md5: &str, kind: AttachmentKind) -> Vec { + match kind { + AttachmentKind::Image => vec![ + format!("{}.dat", file_md5), + format!("{}_h.dat", file_md5), + format!("{}_t.dat", file_md5), + ], + AttachmentKind::Voice => ["", ".aud", ".amr", ".silk", ".wav", ".m4a", ".mp3", ".dat"] + .iter() + .map(|ext| format!("{}{}", file_md5, ext)) + .collect(), + AttachmentKind::Video => [".mp4", ".mov", ".m4v", ".dat"] + .iter() + .map(|ext| format!("{}{}", file_md5, ext)) + .collect(), + AttachmentKind::File => vec![file_md5.to_string()], + } +} + +fn find_by_md5_recursive(root: &Path, file_md5: &str, kind: AttachmentKind) -> Option { + if !root.is_dir() { + return None; + } + let mut stack = vec![root.to_path_buf()]; + let mut matches = Vec::new(); + while let Some(dir) = stack.pop() { + let Ok(entries) = std::fs::read_dir(&dir) else { + continue; + }; + for entry in entries.filter_map(|e| e.ok()) { + let path = entry.path(); + if path.is_dir() { + stack.push(path); + continue; + } + if !path.is_file() { + continue; + } + let Some(name) = path.file_name().and_then(|s| s.to_str()) else { + continue; + }; + if name == file_md5 + || exact_media_names(file_md5, kind).iter().any(|n| n == name) + || name.starts_with(file_md5) + { + matches.push(path); + } + } + } + matches.sort_by_key(|p| { + let size = p.metadata().map(|m| m.len()).unwrap_or(0); + std::cmp::Reverse(size) + }); + matches.into_iter().next() +} + fn three_month_candidates(unix_ts: i64) -> Vec { use chrono::{Datelike, Duration}; let dt = match chrono::Local.timestamp_opt(unix_ts, 0).single() { @@ -268,19 +575,26 @@ pub fn resolve_blocking( ) })?; - let dat_path = find_dat_file(attach_root, &id.chat, &meta.md5, id.create_time).ok_or_else( - || { - anyhow!( - "找不到本地 .dat(md5={} chat={} create_time={})— 微信可能尚未下载该附件,或附件已被清理", - meta.md5, - id.chat, - id.create_time - ) - }, - )?; + let dat_path = + find_media_file(attach_root, &id.chat, &meta.md5, id.create_time, id.kind).ok_or_else( + || { + anyhow!( + "找不到本地附件文件(kind={} md5={} chat={} create_time={})— 微信可能尚未下载该附件,或附件已被清理", + id.kind.as_str(), + meta.md5, + id.chat, + id.create_time + ) + }, + )?; let size = std::fs::metadata(&dat_path).map(|m| m.len()).unwrap_or(0); - Ok(ResolvedAttachment { id: id.clone(), md5: meta.md5, dat_path, size }) + Ok(ResolvedAttachment { + id: id.clone(), + md5: meta.md5, + dat_path, + size, + }) } #[cfg(test)] @@ -334,11 +648,8 @@ mod tests { let dir = tempdir_for_test(); let db_path = dir.join("message_resource.db"); let conn = Connection::open(&db_path).unwrap(); - conn.execute( - "CREATE TABLE ChatName2Id (user_name TEXT)", - [], - ) - .unwrap(); + conn.execute("CREATE TABLE ChatName2Id (user_name TEXT)", []) + .unwrap(); conn.execute( "INSERT INTO ChatName2Id (rowid, user_name) VALUES (1, 'room@chatroom')", [], @@ -392,6 +703,208 @@ mod tests { assert_eq!(new.md5, "22222222222222222222222222222222"); } + #[test] + fn lookup_voice_media_reads_chunks_from_media_db() { + let dir = tempdir_for_test(); + let db_path = dir.join("media_0.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute("CREATE TABLE Name2Id (user_name TEXT)", []) + .unwrap(); + conn.execute( + "INSERT INTO Name2Id (rowid, user_name) VALUES (9, 'room@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "CREATE TABLE VoiceInfo ( + chat_name_id INTEGER, + create_time INTEGER, + local_id INTEGER, + svr_id INTEGER, + voice_data BLOB, + data_index TEXT DEFAULT '0' + )", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO VoiceInfo + (chat_name_id, create_time, local_id, svr_id, voice_data, data_index) + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![9i64, 2000i64, 7i64, 123i64, b"two", "2"], + ) + .unwrap(); + conn.execute( + "INSERT INTO VoiceInfo + (chat_name_id, create_time, local_id, svr_id, voice_data, data_index) + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![9i64, 2000i64, 7i64, 123i64, b"one", "1"], + ) + .unwrap(); + + let media = lookup_voice_media_blocking(&db_path, "room@chatroom", 7, 2000) + .unwrap() + .unwrap(); + assert_eq!(media.data, b"onetwo"); + assert_eq!(media.chunks, 2); + assert_eq!(media.svr_id, Some(123)); + } + + #[test] + fn lookup_voice_media_keeps_rows_scoped_to_chat() { + let dir = tempdir_for_test(); + let db_path = dir.join("media_0.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute("CREATE TABLE Name2Id (user_name TEXT)", []) + .unwrap(); + conn.execute( + "INSERT INTO Name2Id (rowid, user_name) VALUES (9, 'room@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO Name2Id (rowid, user_name) VALUES (10, 'other@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "CREATE TABLE VoiceInfo ( + chat_name_id INTEGER, + create_time INTEGER, + local_id INTEGER, + svr_id INTEGER, + voice_data BLOB, + data_index TEXT DEFAULT '0' + )", + [], + ) + .unwrap(); + for (chat_id, data) in [(10i64, b"wrong".as_slice()), (9i64, b"right".as_slice())] { + conn.execute( + "INSERT INTO VoiceInfo + (chat_name_id, create_time, local_id, svr_id, voice_data, data_index) + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![chat_id, 2000i64, 7i64, 123i64, data, "0"], + ) + .unwrap(); + } + + let media = lookup_voice_media_blocking(&db_path, "room@chatroom", 7, 2000) + .unwrap() + .unwrap(); + assert_eq!(media.data, b"right"); + } + + #[test] + fn lookup_voice_media_uses_create_time_to_disambiguate_reused_local_id() { + let dir = tempdir_for_test(); + let db_path = dir.join("media_0.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute("CREATE TABLE Name2Id (user_name TEXT)", []) + .unwrap(); + conn.execute( + "INSERT INTO Name2Id (rowid, user_name) VALUES (9, 'room@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "CREATE TABLE VoiceInfo ( + chat_name_id INTEGER, + create_time INTEGER, + local_id INTEGER, + svr_id INTEGER, + voice_data BLOB, + data_index TEXT DEFAULT '0' + )", + [], + ) + .unwrap(); + for (create_time, data) in [(1000i64, b"old".as_slice()), (2000i64, b"new".as_slice())] { + conn.execute( + "INSERT INTO VoiceInfo + (chat_name_id, create_time, local_id, svr_id, voice_data, data_index) + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![9i64, create_time, 7i64, 123i64, data, "0"], + ) + .unwrap(); + } + + let media = lookup_voice_media_blocking(&db_path, "room@chatroom", 7, 2000) + .unwrap() + .unwrap(); + assert_eq!(media.data, b"new"); + assert!( + lookup_voice_media_blocking(&db_path, "room@chatroom", 7, 3000) + .unwrap() + .is_none() + ); + } + + #[test] + fn lookup_voice_media_reads_legacy_schema_without_chunk_columns() { + let dir = tempdir_for_test(); + let db_path = dir.join("media_0.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute( + "CREATE TABLE VoiceInfo ( + user_name TEXT, + msgid INTEGER, + msgtime INTEGER, + voice_data BLOB + )", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO VoiceInfo (user_name, msgid, msgtime, voice_data) + VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["room@chatroom", 7i64, 2000i64, b"voice"], + ) + .unwrap(); + + let media = lookup_voice_media_blocking(&db_path, "room@chatroom", 7, 2000) + .unwrap() + .unwrap(); + assert_eq!(media.data, b"voice"); + assert_eq!(media.chunks, 1); + assert_eq!(media.svr_id, None); + } + + #[test] + fn lookup_voice_media_legacy_schema_uses_msgtime_to_disambiguate_reused_msgid() { + let dir = tempdir_for_test(); + let db_path = dir.join("media_0.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute( + "CREATE TABLE VoiceInfo ( + user_name TEXT, + msgid INTEGER, + msgtime INTEGER, + voice_data BLOB + )", + [], + ) + .unwrap(); + for (msgtime, data) in [(1000i64, b"old".as_slice()), (2000i64, b"new".as_slice())] { + conn.execute( + "INSERT INTO VoiceInfo (user_name, msgid, msgtime, voice_data) + VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["room@chatroom", 7i64, msgtime, data], + ) + .unwrap(); + } + + let media = lookup_voice_media_blocking(&db_path, "room@chatroom", 7, 2000) + .unwrap() + .unwrap(); + assert_eq!(media.data, b"new"); + assert!( + lookup_voice_media_blocking(&db_path, "room@chatroom", 7, 3000) + .unwrap() + .is_none() + ); + } + #[test] fn three_month_candidates_includes_prev_curr_next() { // 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09 @@ -415,17 +928,58 @@ mod tests { std::fs::write(img.join(format!("{}_h.dat", md5)), b"hd").unwrap(); // 只有 _t / _h 时取 _h assert_eq!( - pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + pick_best_in_img_dir(&img, md5) + .unwrap() + .file_name() + .unwrap(), format!("{}_h.dat", md5).as_str() ); // 加 full 后取 full std::fs::write(img.join(format!("{}.dat", md5)), b"full").unwrap(); assert_eq!( - pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + pick_best_in_img_dir(&img, md5) + .unwrap() + .file_name() + .unwrap(), format!("{}.dat", md5).as_str() ); } + #[test] + fn find_media_file_finds_voice_by_month_voice_dir() { + let tmp = tempdir_for_test(); + let chat = "room@chatroom"; + let chat_hash = format!("{:x}", md5::compute(chat.as_bytes())); + let ts = chrono::Local + .with_ymd_and_hms(2026, 6, 9, 12, 0, 0) + .unwrap() + .timestamp(); + let voice_dir = tmp.join(chat_hash).join("2026-06").join("Voice"); + std::fs::create_dir_all(&voice_dir).unwrap(); + let md5 = "00112233445566778899aabbccddeeff"; + std::fs::write(voice_dir.join(format!("{}.aud", md5)), b"voice").unwrap(); + + let found = find_media_file(&tmp, chat, md5, ts, AttachmentKind::Voice).unwrap(); + assert_eq!(found.file_name().unwrap(), format!("{}.aud", md5).as_str()); + } + + #[test] + fn find_media_file_voice_recurses_when_layout_unknown() { + let tmp = tempdir_for_test(); + let chat = "room@chatroom"; + let ts = chrono::Local + .with_ymd_and_hms(2026, 6, 9, 12, 0, 0) + .unwrap() + .timestamp(); + let odd_dir = tmp.join("somehash").join("2026-06").join("NotVoice"); + std::fs::create_dir_all(&odd_dir).unwrap(); + let md5 = "abcdefabcdefabcdefabcdefabcdefab"; + std::fs::write(odd_dir.join(format!("{}.silk", md5)), b"voice").unwrap(); + + let found = find_media_file(&tmp, chat, md5, ts, AttachmentKind::Voice).unwrap(); + assert_eq!(found.file_name().unwrap(), format!("{}.silk", md5).as_str()); + } + fn tempdir_for_test() -> PathBuf { let pid = std::process::id(); let nanos = std::time::SystemTime::now() diff --git a/src/cli/attachments.rs b/src/cli/attachments.rs index 87e4434..c693d81 100644 --- a/src/cli/attachments.rs +++ b/src/cli/attachments.rs @@ -8,7 +8,8 @@ use crate::ipc::Request; /// `wx attachments` — 列出指定会话的附件消息(默认 image,可多选)。 /// /// 输出每条 `attachment_id`,再传给 `wx extract` 才真正读 message_resource.db -/// 与本地 .dat 解码。这一步只查 `Msg_` 表,几千条群聊也能秒返。 +/// 与本地资源文件。POC 中 image 解码,voice/audio 原样复制;这一步只查 +/// `Msg_` 表,几千条群聊也能秒返。 pub fn cmd_attachments( chat: String, kinds: Vec, diff --git a/src/cli/extract.rs b/src/cli/extract.rs index a0eba0d..d0dd41a 100644 --- a/src/cli/extract.rs +++ b/src/cli/extract.rs @@ -1,14 +1,14 @@ use anyhow::Result; -use crate::ipc::Request; use super::output::{print_value, resolve}; use super::transport; +use crate::ipc::Request; -/// `wx extract` — 把单个 `attachment_id` 对应的资源解密写到指定路径。 +/// `wx extract` — 把单个 `attachment_id` 对应的资源写到指定路径。 /// /// daemon 端:解析 `attachment_id` → 查 `message_resource.db` 拿 file md5 → -/// 在 `/msg/attach/...` 找 .dat → 按 magic 分发到 v1/v2 解码器 → -/// 写出真实图片/文件。 +/// 在 `/msg/attach/...` 找资源文件。image 按 magic 分发到 v1/v2 +/// 解码器,voice/audio POC 原样复制。 pub fn cmd_extract( attachment_id: String, output: String, diff --git a/src/cli/mod.rs b/src/cli/mod.rs index b4d6cf4..c6d3ab3 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -16,12 +16,14 @@ pub mod sns_feed; pub mod sns_notifications; pub mod sns_search; pub mod stats; +pub mod transcribe; pub mod transport; pub mod unread; use self::output::OutputOpts; use anyhow::Result; use clap::{Parser, Subcommand}; +use std::path::PathBuf; /// wx — 微信本地数据 CLI #[derive(Parser)] @@ -271,13 +273,13 @@ enum Commands { #[arg(long)] json: bool, }, - /// 列出某会话的图片附件,返回不透明 attachment_id + /// 列出某会话的附件,返回不透明 attachment_id Attachments { /// 会话名称(联系人显示名 / wxid / @chatroom username 都可以) chat: String, - /// 类型(当前仅支持 image) + /// 类型(POC 支持 image / voice) #[arg(long = "kind", value_name = "KIND", - value_parser = ["image", "img"])] + value_parser = ["image", "img", "voice", "audio"])] kinds: Vec, /// 显示数量 #[arg(short = 'n', long, default_value = "50")] @@ -295,11 +297,11 @@ enum Commands { #[arg(long)] json: bool, }, - /// 把单个 attachment_id 对应的资源解密写到指定文件路径 + /// 把单个 attachment_id 对应的资源写到指定文件路径 Extract { /// 由 `wx attachments` 输出的不透明 ID(base64url 字符串) attachment_id: String, - /// 输出文件路径(绝对或相对当前工作目录均可;扩展名建议保留为 .jpg 等) + /// 输出文件路径(图片建议 .jpg/.png;语音 POC 建议先保留原始扩展名) #[arg(short = 'o', long)] output: String, /// 目标已存在时覆盖 @@ -309,6 +311,32 @@ enum Commands { #[arg(long)] json: bool, }, + /// 转写单个语音 attachment_id(SILK -> WAV -> whisper.cpp) + Transcribe { + /// 由 `wx attachments --kind voice` 输出的不透明 ID(base64url 字符串) + attachment_id: String, + /// whisper.cpp 模型路径;也可用 WX_WHISPER_MODEL + #[arg(long, value_name = "PATH")] + model: Option, + /// whisper.cpp 的 whisper-cli 路径;默认找 WX_WHISPER_BIN 或 PATH 里的 whisper-cli + #[arg(long = "whisper-bin", value_name = "PATH")] + whisper_bin: Option, + /// SILK v3 decoder 路径;默认找 WX_SILK_DECODER 或 PATH 里的 silk-decoder/silk_v3_decoder/silk_decoder + #[arg(long = "silk-decoder", value_name = "PATH")] + silk_decoder: Option, + /// ffmpeg 路径;默认找 WX_FFMPEG 或 PATH 里的 ffmpeg + #[arg(long, value_name = "PATH")] + ffmpeg: Option, + /// 语音语言,传给 whisper.cpp -l;普通话建议 zh,自动识别用 auto + #[arg(short = 'l', long = "language", default_value = "zh")] + language: String, + /// 保留中间文件(raw/silk/pcm/wav),用于调试转码质量;目录权限保持 0700 + #[arg(long)] + keep_temp: bool, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, /// 管理 wx-daemon Daemon { #[command(subcommand)] @@ -520,6 +548,25 @@ fn dispatch(cli: Cli) -> Result<()> { overwrite, json, } => extract::cmd_extract(attachment_id, output, overwrite, json), + Commands::Transcribe { + attachment_id, + model, + whisper_bin, + silk_decoder, + ffmpeg, + language, + keep_temp, + json, + } => transcribe::cmd_transcribe( + attachment_id, + model, + whisper_bin, + silk_decoder, + ffmpeg, + language, + keep_temp, + json, + ), Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd), } } diff --git a/src/cli/transcribe.rs b/src/cli/transcribe.rs new file mode 100644 index 0000000..3d80635 --- /dev/null +++ b/src/cli/transcribe.rs @@ -0,0 +1,467 @@ +use anyhow::{anyhow, Context, Result}; +use serde_json::{json, Value}; +use std::ffi::OsStr; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use super::output::{print_value, resolve}; +use super::transport; +use crate::ipc::Request; + +/// `wx transcribe` — 从语音 attachment_id 导出音频并调用本机 ASR。 +/// +/// Pipeline: +/// 1. daemon `Extract` 导出 WeChat 原始语音 bytes +/// 2. SILK v3: 规整 `#!SILK` header → decoder 输出 s16le PCM +/// 3. ffmpeg 转为 whisper.cpp 需要的 16k mono WAV +/// 4. whisper-cli 做本地 ASR +pub fn cmd_transcribe( + attachment_id: String, + model: Option, + whisper_bin: Option, + silk_decoder: Option, + ffmpeg: Option, + language: String, + keep_temp: bool, + json_out: bool, +) -> Result<()> { + let model = resolve_required_model(model)?; + let whisper_bin = resolve_tool( + whisper_bin, + "WX_WHISPER_BIN", + &["whisper-cli"], + "找不到 whisper.cpp 的 whisper-cli;请用 --whisper-bin 指定路径,或设置 WX_WHISPER_BIN", + )?; + let ffmpeg = resolve_tool( + ffmpeg, + "WX_FFMPEG", + &["ffmpeg"], + "找不到 ffmpeg;请安装 ffmpeg,或用 --ffmpeg 指定路径", + )?; + + let work = WorkDir::new(keep_temp)?; + let raw_path = work.path.join("voice.aud"); + let silk_path = work.path.join("voice.silk"); + let pcm_path = work.path.join("voice.pcm"); + let wav_path = work.path.join("voice.wav"); + + let extract_report = extract_voice(&attachment_id, &raw_path)?; + let kind = extract_report + .get("kind") + .and_then(Value::as_str) + .unwrap_or(""); + if kind != "voice" { + return Err(anyhow!( + "attachment_id 不是语音资源(kind={}),请先用 `wx attachments CHAT --kind voice` 获取语音 ID", + kind + )); + } + + let raw_bytes = std::fs::read(&raw_path) + .with_context(|| format!("读取语音文件失败:{}", raw_path.display()))?; + let format = detect_audio_format( + extract_report + .get("format") + .and_then(Value::as_str) + .unwrap_or_default(), + &raw_bytes, + &raw_path, + ); + + let mut silk_header_offset: Option = None; + let decode_stage = if format == "silk" { + let silk_decoder = resolve_tool( + silk_decoder, + "WX_SILK_DECODER", + &["silk-decoder", "silk_v3_decoder", "silk_decoder"], + "找不到 SILK v3 decoder;请用 --silk-decoder 指定 kn007/silk-v3-decoder 的 silk/decoder 路径,或设置 WX_SILK_DECODER", + )?; + silk_header_offset = Some(write_normalized_silk(&raw_bytes, &silk_path)?); + run_silk_decoder(&silk_decoder, &silk_path, &pcm_path)?; + run_ffmpeg_pcm_to_wav(&ffmpeg, &pcm_path, &wav_path)?; + json!({ + "input_format": "silk", + "silk_header_offset": silk_header_offset, + "silk_decoder": silk_decoder.display().to_string(), + }) + } else { + run_ffmpeg_audio_to_wav(&ffmpeg, &raw_path, &wav_path)?; + json!({ + "input_format": format, + "silk_header_offset": silk_header_offset, + }) + }; + + let whisper = run_whisper(&whisper_bin, &model, &wav_path, &language)?; + let transcript = clean_whisper_stdout(&whisper.stdout); + + let mut report = json!({ + "transcript": transcript, + "language": language, + "engine": "whisper.cpp", + "model": model.display().to_string(), + "whisper_bin": whisper_bin.display().to_string(), + "ffmpeg": ffmpeg.display().to_string(), + "audio": { + "source": extract_report.get("source").cloned(), + "format": format, + "decoder": extract_report.get("decoder").cloned(), + "output_size": extract_report.get("output_size").cloned(), + }, + "decode": decode_stage, + "whisper": { + "stderr": whisper.stderr.trim(), + }, + "kept_temp": keep_temp, + }); + + if keep_temp { + report["temp_dir"] = json!(work.path.display().to_string()); + report["files"] = json!({ + "raw": raw_path.display().to_string(), + "silk": if silk_path.exists() { Some(silk_path.display().to_string()) } else { None }, + "pcm": if pcm_path.exists() { Some(pcm_path.display().to_string()) } else { None }, + "wav": wav_path.display().to_string(), + }); + } + + print_value(&report, &resolve(json_out)) +} + +fn extract_voice(attachment_id: &str, raw_path: &Path) -> Result { + let resp = transport::send(Request::Extract { + attachment_id: attachment_id.to_string(), + output: raw_path.display().to_string(), + overwrite: true, + })?; + set_private_file_permissions(raw_path)?; + Ok(resp.data) +} + +fn resolve_required_model(model: Option) -> Result { + if let Some(path) = model { + return require_existing_file(path, "--model"); + } + if let Ok(path) = std::env::var("WX_WHISPER_MODEL") { + return require_existing_file(PathBuf::from(path), "WX_WHISPER_MODEL"); + } + Err(anyhow!( + "缺少 whisper.cpp 模型路径;请传 --model /path/to/ggml-large-v3-turbo.bin,或设置 WX_WHISPER_MODEL" + )) +} + +fn resolve_tool( + explicit: Option, + env_name: &str, + candidates: &[&str], + missing_msg: &str, +) -> Result { + if let Some(path) = explicit { + return require_existing_file(path, env_name); + } + if let Ok(path) = std::env::var(env_name) { + return require_existing_file(PathBuf::from(path), env_name); + } + for candidate in candidates { + if let Some(path) = find_in_path(candidate) { + return Ok(path); + } + } + Err(anyhow!(missing_msg.to_string())) +} + +fn require_existing_file(path: PathBuf, label: &str) -> Result { + if path.is_file() { + Ok(path) + } else { + Err(anyhow!("{} 指向的文件不存在:{}", label, path.display())) + } +} + +fn find_in_path(name: &str) -> Option { + let candidate = Path::new(name); + if candidate.components().count() > 1 && candidate.is_file() { + return Some(candidate.to_path_buf()); + } + let paths = std::env::var_os("PATH")?; + for dir in std::env::split_paths(&paths) { + let path = dir.join(name); + if path.is_file() { + return Some(path); + } + } + None +} + +fn detect_audio_format<'a>(reported: &'a str, bytes: &[u8], path: &Path) -> &'a str { + if find_subslice_prefix(bytes, b"#!SILK", 8).is_some() { + return "silk"; + } + if bytes.starts_with(b"#!AMR") { + return "amr"; + } + if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WAVE" { + return "wav"; + } + if bytes.starts_with(b"ID3") || bytes.starts_with(&[0xFF, 0xFB]) { + return "mp3"; + } + if bytes.len() >= 12 && &bytes[4..8] == b"ftyp" { + return "m4a"; + } + if !reported.is_empty() && reported != "bin" && reported != "dat" { + return reported; + } + match path.extension().and_then(OsStr::to_str).unwrap_or_default() { + "amr" => "amr", + "wav" => "wav", + "m4a" => "m4a", + "mp3" => "mp3", + "silk" | "slk" => "silk", + _ => "bin", + } +} + +fn write_normalized_silk(bytes: &[u8], silk_path: &Path) -> Result { + let offset = find_subslice_prefix(bytes, b"#!SILK", 8).ok_or_else(|| { + anyhow!("语音报告为 SILK,但前 8 字节内找不到 #!SILK header,无法调用 SILK decoder") + })?; + write_private_file(silk_path, &bytes[offset..]) + .with_context(|| format!("写出 SILK 中间文件失败:{}", silk_path.display()))?; + Ok(offset) +} + +fn find_subslice_prefix(haystack: &[u8], needle: &[u8], max_offset: usize) -> Option { + if needle.is_empty() || haystack.len() < needle.len() { + return None; + } + let end = haystack.len().saturating_sub(needle.len()).min(max_offset); + (0..=end).find(|&idx| &haystack[idx..idx + needle.len()] == needle) +} + +fn run_silk_decoder(decoder: &Path, silk_path: &Path, pcm_path: &Path) -> Result<()> { + let output = Command::new(decoder) + .arg(silk_path) + .arg(pcm_path) + .output() + .with_context(|| format!("启动 SILK decoder 失败:{}", decoder.display()))?; + if !output.status.success() || !pcm_path.is_file() { + return Err(anyhow!( + "SILK decoder 失败:{}\n{}", + output.status, + String::from_utf8_lossy(&output.stderr).trim() + )); + } + set_private_file_permissions(pcm_path)?; + Ok(()) +} + +fn run_ffmpeg_pcm_to_wav(ffmpeg: &Path, pcm_path: &Path, wav_path: &Path) -> Result<()> { + run_command( + Command::new(ffmpeg) + .arg("-y") + .arg("-f") + .arg("s16le") + .arg("-ar") + .arg("24000") + .arg("-ac") + .arg("1") + .arg("-i") + .arg(pcm_path) + .arg("-ar") + .arg("16000") + .arg("-ac") + .arg("1") + .arg("-c:a") + .arg("pcm_s16le") + .arg(wav_path), + "ffmpeg PCM -> WAV", + )?; + set_private_file_permissions(wav_path) +} + +fn run_ffmpeg_audio_to_wav(ffmpeg: &Path, input_path: &Path, wav_path: &Path) -> Result<()> { + run_command( + Command::new(ffmpeg) + .arg("-y") + .arg("-i") + .arg(input_path) + .arg("-ar") + .arg("16000") + .arg("-ac") + .arg("1") + .arg("-c:a") + .arg("pcm_s16le") + .arg(wav_path), + "ffmpeg audio -> WAV", + )?; + set_private_file_permissions(wav_path) +} + +fn run_whisper( + whisper_bin: &Path, + model: &Path, + wav_path: &Path, + language: &str, +) -> Result { + let output = Command::new(whisper_bin) + .arg("-m") + .arg(model) + .arg("-f") + .arg(wav_path) + .arg("-l") + .arg(language) + .arg("-nt") + .arg("-np") + .output() + .with_context(|| format!("启动 whisper-cli 失败:{}", whisper_bin.display()))?; + if !output.status.success() { + return Err(anyhow!( + "whisper-cli 失败:{}\n{}", + output.status, + String::from_utf8_lossy(&output.stderr).trim() + )); + } + Ok(CommandOutput { + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }) +} + +fn run_command(cmd: &mut Command, stage: &str) -> Result<()> { + let output = cmd + .output() + .with_context(|| format!("启动 {} 失败", stage))?; + if output.status.success() { + Ok(()) + } else { + Err(anyhow!( + "{} 失败:{}\n{}", + stage, + output.status, + String::from_utf8_lossy(&output.stderr).trim() + )) + } +} + +fn write_private_file(path: &Path, bytes: &[u8]) -> Result<()> { + let mut options = std::fs::OpenOptions::new(); + options.write(true).create_new(true); + #[cfg(unix)] + { + use std::os::unix::fs::OpenOptionsExt; + options.mode(0o600); + } + let mut file = options + .open(path) + .with_context(|| format!("创建私有文件失败:{}", path.display()))?; + file.write_all(bytes) + .with_context(|| format!("写入私有文件失败:{}", path.display()))?; + set_private_file_permissions(path) +} + +fn set_private_file_permissions(path: &Path) -> Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600)) + .with_context(|| format!("设置文件权限失败:{}", path.display()))?; + } + Ok(()) +} + +fn clean_whisper_stdout(stdout: &str) -> String { + stdout + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + .collect::>() + .join("\n") +} + +struct CommandOutput { + stdout: String, + stderr: String, +} + +struct WorkDir { + path: PathBuf, + keep: bool, +} + +impl WorkDir { + fn new(keep: bool) -> Result { + for attempt in 0..128u32 { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + let path = std::env::temp_dir().join(format!( + "wx-transcribe-{}-{}-{}", + std::process::id(), + nanos, + attempt + )); + match create_private_dir(&path) { + Ok(()) => { + return Ok(Self { path, keep }); + } + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => continue, + Err(e) => { + return Err(e).with_context(|| format!("创建临时目录失败:{}", path.display())); + } + } + } + Err(anyhow!("创建临时目录失败:连续 128 次命名冲突")) + } +} + +fn create_private_dir(path: &Path) -> std::io::Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::DirBuilderExt; + std::fs::DirBuilder::new().mode(0o700).create(path) + } + #[cfg(not(unix))] + { + std::fs::create_dir(path) + } +} + +impl Drop for WorkDir { + fn drop(&mut self) { + if !self.keep { + let _ = std::fs::remove_dir_all(&self.path); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn find_silk_header_after_wechat_prefix() { + assert_eq!( + find_subslice_prefix(b"\x02#!SILK_V3", b"#!SILK", 8), + Some(1) + ); + assert_eq!(find_subslice_prefix(b"#!SILK_V3", b"#!SILK", 8), Some(0)); + } + + #[test] + fn clean_whisper_stdout_keeps_non_empty_lines() { + assert_eq!(clean_whisper_stdout("\n 你好 \n\n世界\n"), "你好\n世界"); + } + + #[cfg(unix)] + #[test] + fn workdir_is_private() { + use std::os::unix::fs::PermissionsExt; + let work = WorkDir::new(true).unwrap(); + let mode = std::fs::metadata(&work.path).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o700); + std::fs::remove_dir_all(&work.path).unwrap(); + } +} diff --git a/src/daemon/query.rs b/src/daemon/query.rs index ac9ec0d..3868271 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -925,7 +925,8 @@ fn query_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender_username = sender_username(real_sender_id, &content, is_group, chat_username, &id2u); + let sender_username = + sender_username(real_sender_id, &content, is_group, chat_username, &id2u); let sender = sender_label( real_sender_id, &content, @@ -946,7 +947,13 @@ fn query_messages( "type": fmt_type(local_type), "local_id": local_id, }); - add_sender_identity(&mut msg, is_group, &sender_username, names_map, group_nicknames); + add_sender_identity( + &mut msg, + is_group, + &sender_username, + names_map, + group_nicknames, + ); if let Some(u) = url { msg["url"] = serde_json::Value::String(u); } @@ -1032,7 +1039,8 @@ fn search_in_table( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender_username = sender_username(real_sender_id, &content, is_group, chat_username, &id2u); + let sender_username = + sender_username(real_sender_id, &content, is_group, chat_username, &id2u); let sender = sender_label( real_sender_id, &content, @@ -1057,7 +1065,13 @@ fn search_in_table( "content": text, "type": fmt_type(local_type), }); - add_sender_identity(&mut msg, is_group, &sender_username, names_map, group_nicknames); + add_sender_identity( + &mut msg, + is_group, + &sender_username, + names_map, + group_nicknames, + ); if let Some(u) = url { msg["url"] = serde_json::Value::String(u); } @@ -1558,11 +1572,13 @@ fn add_sender_identity( } row["sender_username"] = Value::String(username.to_string()); row["sender_contact_display"] = Value::String( - names.get(username).cloned().unwrap_or_else(|| username.to_string()) - ); - row["sender_group_nickname"] = Value::String( - group_nicknames.get(username).cloned().unwrap_or_default() + names + .get(username) + .cloned() + .unwrap_or_else(|| username.to_string()), ); + row["sender_group_nickname"] = + Value::String(group_nicknames.get(username).cloned().unwrap_or_default()); } fn sender_label( @@ -2193,14 +2209,7 @@ mod appmsg_tests { .expect("create message table"); conn.execute( "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - rusqlite::params![ - 1_i64, - 1_i64, - 1775146911_i64, - 42_i64, - "hello", - 0_i64 - ], + rusqlite::params![1_i64, 1_i64, 1775146911_i64, 42_i64, "hello", 0_i64], ) .expect("insert text message"); } @@ -2227,7 +2236,10 @@ mod appmsg_tests { assert_eq!(rows.len(), 1); assert_eq!(rows[0]["sender"].as_str(), Some("同名")); assert_eq!(rows[0]["sender_username"].as_str(), Some("wxid_alice")); - assert_eq!(rows[0]["sender_contact_display"].as_str(), Some("Alice Contact")); + assert_eq!( + rows[0]["sender_contact_display"].as_str(), + Some("Alice Contact") + ); assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名")); } @@ -2284,7 +2296,10 @@ mod appmsg_tests { assert_eq!(rows.len(), 1); assert_eq!(rows[0]["sender"].as_str(), Some("同名")); assert_eq!(rows[0]["sender_username"].as_str(), Some("wxid_alice")); - assert_eq!(rows[0]["sender_contact_display"].as_str(), Some("Alice Contact")); + assert_eq!( + rows[0]["sender_contact_display"].as_str(), + Some("Alice Contact") + ); assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名")); } @@ -2314,7 +2329,10 @@ mod appmsg_tests { add_sender_identity(&mut alice_row, true, "wxid_alice", &names, &group_nicknames); assert_eq!(alice_row["sender"].as_str(), Some("同名")); assert_eq!(alice_row["sender_username"].as_str(), Some("wxid_alice")); - assert_eq!(alice_row["sender_contact_display"].as_str(), Some("Alice Contact")); + assert_eq!( + alice_row["sender_contact_display"].as_str(), + Some("Alice Contact") + ); assert_eq!(alice_row["sender_group_nickname"].as_str(), Some("同名")); let mut bob_row = json!({ @@ -2336,7 +2354,13 @@ mod appmsg_tests { // 非群 chat 不该追加 identity 字段(行为对齐 history/search/new-messages) let mut private_row = json!({"attachment_id": "ghi", "sender": ""}); - add_sender_identity(&mut private_row, false, "wxid_alice", &names, &group_nicknames); + add_sender_identity( + &mut private_row, + false, + "wxid_alice", + &names, + &group_nicknames, + ); assert!(private_row.get("sender_username").is_none()); assert!(private_row.get("sender_contact_display").is_none()); assert!(private_row.get("sender_group_nickname").is_none()); @@ -2992,7 +3016,8 @@ pub async fn q_new_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender_username = sender_username(real_sender_id, &content, is_group, &uname2, &id2u); + let sender_username = + sender_username(real_sender_id, &content, is_group, &uname2, &id2u); let sender = sender_label( real_sender_id, &content, @@ -3015,7 +3040,13 @@ pub async fn q_new_messages( "content": text, "type": fmt_type(local_type), }); - add_sender_identity(&mut msg, is_group, &sender_username, &names_map, &group_nicknames2); + add_sender_identity( + &mut msg, + is_group, + &sender_username, + &names_map, + &group_nicknames2, + ); if let Some(u) = url { msg["url"] = serde_json::Value::String(u); } @@ -4393,18 +4424,21 @@ pub async fn q_attachments( &names_map, &group_nicknames2, ), - sender_username( - real_sender_id, - &content, - true, - &uname, - &id2u, - ), + sender_username(real_sender_id, &content, true, &uname, &id2u), ) } else { (String::new(), String::new()) }; - Ok((local_id, lo32, ts, real_sender_id, sender, sender_uname, ts, db_idx2)) + Ok(( + local_id, + lo32, + ts, + real_sender_id, + sender, + sender_uname, + ts, + db_idx2, + )) })? .filter_map(|r| r.ok()) .collect(); @@ -4449,7 +4483,13 @@ pub async fn q_attachments( if is_group && !sender.is_empty() { row["sender"] = Value::String(sender); } - add_sender_identity(&mut row, is_group, &sender_uname, &names.map, &group_nicknames); + add_sender_identity( + &mut row, + is_group, + &sender_uname, + &names.map, + &group_nicknames, + ); results.push(row); } let unknown_shards = current_unknown_shards(db, names); @@ -4476,7 +4516,9 @@ pub async fn q_attachments( })) } -/// 解码 attachment_id → 查 message_resource.db → 找本地 .dat → 解密 → 写盘。 +/// 解码 attachment_id → 写出附件资源。 +/// image: message_resource.db → 本地 .dat → 解码。 +/// voice POC: 优先 media_0.db::VoiceInfo → 原样写出 SILK/音频 bytes;未命中再走资源文件 fallback。 pub async fn q_extract( db: &DbCache, _names: &Names, @@ -4487,7 +4529,7 @@ pub async fn q_extract( use crate::attachment::{ attachment_id::AttachmentId, decoder::{self, V2KeyMaterial}, - image_key, resolver, + image_key, resolver, AttachmentKind, }; let id = AttachmentId::decode(attachment_id) @@ -4508,6 +4550,44 @@ pub async fn q_extract( } } + if id.kind == AttachmentKind::Voice { + if let Some(media_path) = db.get("message/media_0.db").await? { + let id_for_task = id.clone(); + let output_path2 = output_path.clone(); + let report = tokio::task::spawn_blocking(move || -> Result> { + let Some(voice) = resolver::lookup_voice_media_blocking( + &media_path, + &id_for_task.chat, + id_for_task.local_id, + id_for_task.create_time, + )? + else { + return Ok(None); + }; + + std::fs::write(&output_path2, &voice.data) + .with_context(|| format!("写出文件失败:{}", output_path2.display()))?; + Ok(Some(json!({ + "kind": id_for_task.kind.as_str(), + "source": "message/media_0.db", + "local_id": id_for_task.local_id, + "create_time": id_for_task.create_time, + "chunks": voice.chunks, + "svr_id": voice.svr_id, + "output": output_path2.display().to_string(), + "output_size": voice.data.len(), + "format": raw_media_format(&output_path2, &voice.data), + "decoder": "media_0_voice_data", + "poc": true, + }))) + }) + .await??; + if let Some(report) = report { + return Ok(report); + } + } + } + // 1) 拿 message_resource.db let resource_path = db .get("message/message_resource.db") @@ -4535,6 +4615,22 @@ pub async fn q_extract( let dat_bytes = std::fs::read(&resolved.dat_path) .with_context(|| format!("读取 .dat 失败:{}", resolved.dat_path.display()))?; + if id_for_task.kind != AttachmentKind::Image { + std::fs::write(&output_path2, &dat_bytes) + .with_context(|| format!("写出文件失败:{}", output_path2.display()))?; + return Ok(json!({ + "kind": id_for_task.kind.as_str(), + "md5": resolved.md5, + "dat_path": resolved.dat_path.display().to_string(), + "dat_size": resolved.size, + "output": output_path2.display().to_string(), + "output_size": dat_bytes.len(), + "format": raw_media_format(&resolved.dat_path, &dat_bytes), + "decoder": "raw_copy", + "poc": true, + })); + } + // V2 image key — 平台相关。`ImageKeyMaterial` 同时给 aes_key + xor_key。 // xor_key 不能硬编码 0x88:实测 macOS 真实账号上是 `uin & 0xff` 派生的(0xa2 等), // 所以这里桥接时必须把 provider 的 xor_key 透传给 V2KeyMaterial。 @@ -4599,7 +4695,7 @@ pub async fn q_extract( } /// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。 -/// 当前只支持 image;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 +/// 默认 image;voice/audio 是 POC:可以枚举并 raw-copy 本地语音文件,但不做转码/转写。 fn parse_attachment_kinds( kinds: Option<&[String]>, ) -> Result> { @@ -4613,12 +4709,13 @@ fn parse_attachment_kinds( for k in raw { let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { "image" | "img" => (AttachmentKind::Image, 3), - "voice" | "audio" | "video" | "file" => { + "voice" | "audio" => (AttachmentKind::Voice, 34), + "video" | "file" => { anyhow::bail!( - "当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通" + "当前只支持 image 和 voice POC;video/file 的资源路径与 decoder 还没接通" ) } - other => anyhow::bail!("未知附件类型:{}(当前仅支持 image)", other), + other => anyhow::bail!("未知附件类型:{}(当前支持 image / voice POC)", other), }; if seen.insert(kind.as_str()) { out.push((kind, t)); @@ -4627,10 +4724,75 @@ fn parse_attachment_kinds( Ok(out) } +fn raw_media_format(path: &std::path::Path, bytes: &[u8]) -> &'static str { + if bytes.starts_with(b"#!SILK") + || bytes + .windows(b"#!SILK".len()) + .take(8) + .any(|chunk| chunk == b"#!SILK") + { + return "silk"; + } + if bytes.starts_with(b"#!AMR") { + return "amr"; + } + if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WAVE" { + return "wav"; + } + if bytes.starts_with(b"ID3") || bytes.starts_with(&[0xFF, 0xFB]) { + return "mp3"; + } + if bytes.len() >= 12 && &bytes[4..8] == b"ftyp" { + return "m4a"; + } + match path + .extension() + .and_then(|s| s.to_str()) + .unwrap_or_default() + { + "aud" => "aud", + "amr" => "amr", + "silk" => "silk", + "wav" => "wav", + "m4a" => "m4a", + "mp3" => "mp3", + "dat" => "dat", + _ => "bin", + } +} + #[cfg(test)] mod biz_tests { use super::*; + #[test] + fn parse_attachment_kinds_accepts_voice_aliases() { + let kinds = vec!["voice".to_string(), "audio".to_string()]; + let parsed = parse_attachment_kinds(Some(&kinds)).unwrap(); + assert_eq!(parsed.len(), 1); + assert_eq!(parsed[0].0.as_str(), "voice"); + assert_eq!(parsed[0].1, 34); + } + + #[test] + fn raw_media_format_detects_common_audio_headers() { + assert_eq!( + raw_media_format(std::path::Path::new("x.bin"), b"#!SILK_V3"), + "silk" + ); + assert_eq!( + raw_media_format(std::path::Path::new("x.aud"), b"\x02#!SILK_V3"), + "silk" + ); + assert_eq!( + raw_media_format(std::path::Path::new("x.bin"), b"#!AMR\n"), + "amr" + ); + let mut wav = b"RIFF0000WAVE".to_vec(); + wav.extend_from_slice(&[0; 8]); + assert_eq!(raw_media_format(std::path::Path::new("x.bin"), &wav), "wav"); + } + #[test] fn extract_cdata_normal() { let xml = "<![CDATA[TencentResearch]]>"; @@ -4837,12 +4999,18 @@ mod group_nickname_tests { assert_eq!(top.len(), 2); assert_eq!(top[0]["sender"].as_str(), Some("同名")); assert_eq!(top[0]["sender_username"].as_str(), Some("wxid_alice")); - assert_eq!(top[0]["sender_contact_display"].as_str(), Some("Alice Contact")); + assert_eq!( + top[0]["sender_contact_display"].as_str(), + Some("Alice Contact") + ); assert_eq!(top[0]["sender_group_nickname"].as_str(), Some("同名")); assert_eq!(top[0]["count"].as_i64(), Some(7)); assert_eq!(top[1]["sender"].as_str(), Some("同名")); assert_eq!(top[1]["sender_username"].as_str(), Some("wxid_bob")); - assert_eq!(top[1]["sender_contact_display"].as_str(), Some("Bob Contact")); + assert_eq!( + top[1]["sender_contact_display"].as_str(), + Some("Bob Contact") + ); assert_eq!(top[1]["sender_group_nickname"].as_str(), Some("同名")); assert_eq!(top[1]["count"].as_i64(), Some(3)); } diff --git a/src/ipc.rs b/src/ipc.rs index 93306fb..d382b46 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -155,11 +155,11 @@ pub enum Request { }, /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) ReloadConfig, - /// 列出某个会话里的图片附件 + /// 列出某个会话里的附件 /// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体 Attachments { chat: String, - /// 类型过滤:当前仅支持 image + /// 类型过滤:默认 image;POC 支持 voice/audio #[serde(default, skip_serializing_if = "Option::is_none")] kinds: Option>, #[serde(default = "default_limit_50")] @@ -175,7 +175,7 @@ pub enum Request { #[serde(default, skip_serializing_if = "is_false")] debug_source: bool, }, - /// 提取(解密)单个附件的本体到指定路径 + /// 提取单个附件的本体到指定路径;图片解码,语音 POC 原样复制 Extract { /// `Attachments` 返回的不透明 ID attachment_id: String,