diff --git a/Cargo.lock b/Cargo.lock index 36b1c72..56821e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.11.1" @@ -1307,10 +1313,11 @@ checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "wx-cli" -version = "0.1.10" +version = "0.2.0" dependencies = [ "aes", "anyhow", + "base64", "cbc", "chrono", "clap", diff --git a/Cargo.toml b/Cargo.toml index 0c5ef05..527b5e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wx-cli" -version = "0.1.10" +version = "0.2.0" edition = "2021" description = "WeChat 4.x (macOS/Linux) local data CLI — decrypt SQLCipher DBs, query chat history, watch new messages" license = "Apache-2.0" @@ -50,6 +50,9 @@ dirs = "5" # MD5 (联系人表名 Msg_) md5 = "0.7" +# 附件 ID 编码(base64url) +base64 = "0.22" + # 正则表达式 regex = "1" roxmltree = "0.20" diff --git a/README.md b/README.md index e0f06da..29c8736 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Platform](https://img.shields.io/badge/platform-macOS%20%7C%20Linux%20%7C%20Windows-lightgrey.svg)](#安装) [![Rust](https://img.shields.io/badge/built%20with-Rust-orange.svg)](https://www.rust-lang.org) -会话 · 聊天记录 · 搜索 · 联系人 · 群成员 · 收藏 · 统计 · 导出 +会话 · 聊天记录 · 搜索 · 联系人 · 群成员 · 群昵称 · 收藏 · 统计 · 导出 @@ -100,10 +100,16 @@ cargo build --release # 1. 签名(只需做一次,WeChat 更新后重做) codesign --force --deep --sign - /Applications/WeChat.app -# 2. 重启微信,等待完全登录 +# 2. 清理旧 TCC 授权记录(重签名后必做,否则微信截图/通话权限可能 silent 失效) +for s in ScreenCapture Camera Microphone AppleEvents AddressBook \ + SystemPolicyDocumentsFolder SystemPolicyDownloadsFolder SystemPolicyDesktopFolder; do + tccutil reset "$s" com.tencent.xinWeChat +done + +# 3. 重启微信,等待完全登录 killall WeChat && open /Applications/WeChat.app -# 3. 初始化 +# 4. 初始化 sudo wx init ``` @@ -112,6 +118,8 @@ sudo wx init > codesign --remove-signature "/Applications/WeChat.app/Contents/Frameworks/vlc_plugins/librtp_mpeg4_plugin.dylib" > codesign --force --deep --sign - /Applications/WeChat.app > ``` +> +> 重签名后 macOS 的 TCC 隐私授权按新 code signature 重新校验,旧记录会失效。如果跳过 `tccutil reset`,微信截图/视频通话/麦克风等权限可能"看起来已开启但实际拒绝"。详见 [macOS 权限与签名指南](docs/macos-permission-guide.md#五重签名后微信权限-silent-失效)。 **Linux** @@ -156,6 +164,17 @@ wx search "会议" --in "工作群" --since 2026-01-01 会话/消息输出里都带 `chat_type` 字段,取值为 `private` / `group` / `official_account` / `folded`。`official_account` 涵盖公众号、订阅号、服务号及 `mphelper` / `qqsafe` 等系统通知;`folded` 对应微信里的"订阅号折叠"和"折叠群聊"两个聚合入口。 +群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 + +引用消息会在 `history` / `search` / `new-messages` 输出中显示当前回复和被引用原文: + +```text +[引用] 当前回复 + ↳ 发送者: 被引用内容 +``` + +`--type link` / `--type file` 会包含微信 appmsg 里的链接、文件、合并聊天记录和引用消息等变体;搜索时也会匹配解压后可见的引用原文。 + ### 朋友圈(SNS) 三个独立命令,区分"通知"和"帖子": @@ -177,6 +196,50 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01 朋友圈数据只覆盖你本地刷到过的帖子(微信 app 按需下载)。 +### 公众号文章 + +公众号文章推送存在独立的 `biz_message_0.db`,用 `biz-articles` 单独查: + +```bash +wx biz-articles # 最近 50 篇 +wx biz-articles -n 200 # 更多 +wx biz-articles --account "返朴" # 限定公众号(名称模糊匹配) +wx biz-articles --since 2026-05-01 --until 2026-05-10 +wx biz-articles --unread # 仅有未读的公众号,每号取最新 1 篇 +wx biz-articles --json | jq '.[].url' # 下游消费 URL +``` + +每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。 + +### 附件提取(图片) + +聊天里的附件本体存在 `xwechat_files//msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。 + +```bash +# 1) 列出会话里的图片附件,先拿到不透明的 attachment_id +wx attachments "张三" +wx attachments "AI群" --kind image -n 100 +wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 + +# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等) +wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/x.jpg --overwrite +``` + +`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 + +`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 + +支持的解码档位: +- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) +- **V1 fixed-AES**(`07 08 V1 08 07`):AES-128-ECB + 固定 key `cfcd208495d565ef` +- **V2 AES + XOR**(`07 08 V2 08 07`):AES-128-ECB + raw + XOR;AES key 平台派生 + +V2 image key 提取: +- **macOS**:`kvcomm` cache(`key__*.statistic` 文件名取 uin → `md5(str(uin) + wxid)[:16]`)+ brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24);xor_key = `uin & 0xff`,**不是硬编码 0x88** +- **Windows**:扫 `Weixin.exe` 内存匹配 `[A-Za-z0-9]{32|16}` 候选,按 V2 template ciphertext-block 反验 +- **Linux**:上游空白,遇到 V2 .dat 会报 unsupported + ### 联系人 & 群组 ```bash @@ -185,6 +248,14 @@ wx contacts --query "李" # 按名字搜索 wx members "AI交流群" # 群成员列表 ``` +`wx members --json` 返回的成员字段包括: + +- `username`:微信内部 username +- `display`:用于展示的名称,优先使用群昵称 +- `contact_display`:联系人备注或微信昵称 +- `group_nickname`:群昵称;本地没有记录时为空字符串 +- `is_owner`:是否群主 + ### 收藏 & 统计 ```bash @@ -252,7 +323,7 @@ daemon 首次解密后将数据库和 mtime 持久化到 `~/.wx-cli/cache/`。 微信 4.x 使用 SQLCipher 4 加密本地数据库(AES-256-CBC + HMAC-SHA512,PBKDF2 256,000 次迭代)。WCDB 在进程内存中缓存派生后的 raw key,格式为 `x'<64hex_key><32hex_salt>'`。 -wx-cli 通过 macOS Mach VM API(`mach_vm_region` + `mach_vm_read`)或 Linux `/proc//mem` 扫描微信进程内存,匹配该模式提取密钥,daemon 按需解密并缓存。 +wx-cli 通过 macOS Mach VM API(`mach_vm_region` + `mach_vm_read`)、Linux `/proc//mem` 或 Windows `VirtualQueryEx` + `ReadProcessMemory`(需要 `PROCESS_VM_READ | PROCESS_QUERY_INFORMATION` 权限)扫描微信进程内存,匹配该模式提取密钥,daemon 按需解密并缓存。 --- diff --git a/SKILL.md b/SKILL.md index 4ce28c3..6b79e0d 100644 --- a/SKILL.md +++ b/SKILL.md @@ -11,6 +11,7 @@ description: "wx-cli — 从本地微信数据库查询聊天记录、联系人 - 微信消息历史 - 微信联系人 - 微信群成员 +- 微信群昵称 / 群名片 - 微信收藏 - wechat history / messages / contacts - wx-cli @@ -65,14 +66,33 @@ codesign --remove-signature "/Applications/WeChat.app/Contents/Frameworks/vlc_pl codesign --force --deep --sign - /Applications/WeChat.app ``` -**第二步:重启 WeChat** +**第二步:清理 WeChat 在 macOS TCC 隐私数据库里的旧授权记录**(重签名后必做) + +macOS TCC 按 `bundle id + csreq` 联合校验权限;csreq 编码自代码签名。重签名后旧 csreq 和新签名不再匹配,旧授权记录会 silent 失效(System Settings 仍把开关画成"已允许",运行时实际拒绝)。把 WeChat 在 TCC 里的旧记录抹掉,让 macOS 在下次微信请求权限时按新签名重新生成 csreq: + +```bash +tccutil reset ScreenCapture com.tencent.xinWeChat # 截图 / 屏幕共享 +tccutil reset Camera com.tencent.xinWeChat # 视频通话 / 扫码 +tccutil reset Microphone com.tencent.xinWeChat # 语音消息 / 通话 +tccutil reset AppleEvents com.tencent.xinWeChat # 自动化 / 输入法 +tccutil reset AddressBook com.tencent.xinWeChat # 通讯录 +tccutil reset SystemPolicyDocumentsFolder com.tencent.xinWeChat +tccutil reset SystemPolicyDownloadsFolder com.tencent.xinWeChat +tccutil reset SystemPolicyDesktopFolder com.tencent.xinWeChat +``` + +`tccutil` 对没有授权过的 service 会报 "No such bundle identifier",是 no-op,不影响其他 service 的 reset。 + +**第三步:重启 WeChat** ```bash killall WeChat && open /Applications/WeChat.app # 等待微信完全登录后再继续 ``` -**第三步:初始化** +之后微信触发权限请求时按 GUI 提示重新允许即可。在 macOS 26 上,把 WeChat 加进 **隐私与安全 → 录屏与系统录音** 的上半区,**不要**只勾下半区的"仅系统录音"——后者不能授予截图权限。 + +**第四步:初始化** ```bash sudo wx init @@ -137,6 +157,17 @@ wx search "会议" --in "工作群" --since 2026-01-01 `wx unread --filter` 支持 `private` / `group` / `official` / `folded` / `all`,逗号分隔多选。默认 `all`。 +群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 + +引用消息(appmsg `type=57`)在 `history` / `search` / `new-messages` 输出里会展开为两行:第一行是当前回复,第二行以 `↳` 开头显示被引用原文,例如: + +```text +[引用] 当前回复 + ↳ 发送者: 被引用内容 +``` + +`--type link` / `--type file` 会覆盖微信 appmsg 的链接、文件、合并聊天记录和引用消息等变体;`search --type link` 也会匹配解压并格式化后的引用原文。 + ### 联系人与群组 ```bash @@ -148,6 +179,16 @@ wx contacts --query "李" wx members "AI交流群" ``` +`wx members --json` 每个成员包含: + +- `username`:微信内部 username +- `display`:推荐展示名,优先使用群昵称 +- `contact_display`:联系人备注或微信昵称 +- `group_nickname`:群昵称;没有记录时为空字符串 +- `is_owner`:是否群主 + +Agent 展示群成员时优先用 `display`。需要区分群昵称和联系人名时,再读取 `group_nickname` 与 `contact_display`。 + ### 朋友圈(SNS) 三个命令,作用各不同: @@ -174,6 +215,61 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01 -n 50 > 只保存你本地刷到过的朋友圈(微信 app 按需下载)。没刷到过的帖子不在本地,任何命令都拿不到。 +### 公众号文章 + +公众号的文章推送存在独立的 `biz_message_0.db`,与普通 `message_0.db` 分开: + +```bash +# 最近 50 篇(默认) +wx biz-articles + +# 更多 +wx biz-articles -n 200 + +# 限定公众号(名称模糊匹配 display name / username) +wx biz-articles --account "返朴" + +# 时间范围(YYYY-MM-DD,发布时间,非接收时间) +wx biz-articles --since 2026-05-01 --until 2026-05-10 + +# 仅有未读消息的公众号,每号取最新 1 篇(适合"今天有什么新推送"扫描) +wx biz-articles --unread +wx biz-articles --unread --account "Datawhale" # 与 --account 取交集 + +# 下游消费:拿 URL 做内容抓取 +wx biz-articles --since 2026-05-10 --json | jq '.[].url' +``` + +每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。 + +### 附件提取(图片) + +聊天里的图片本体在 `xwechat_files//msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走: + +```bash +# 1) 先列出图片附件,拿到不透明的 attachment_id +wx attachments "张三" +wx attachments "AI群" --kind image -n 100 +wx attachments "AI群" --since 2026-04-01 --until 2026-04-15 + +# 2) 用 attachment_id 把单个资源解密写到指定路径 +wx extract -o ~/Desktop/photo.jpg +wx extract -o /tmp/x.jpg --overwrite +``` + +`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 + +`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。 + +支持的解码档位: +- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推) +- **V1 fixed-AES**(`07 08 V1 08 07`):AES-128-ECB + 固定 key `cfcd208495d565ef` +- **V2 AES + XOR**(`07 08 V2 08 07`):AES-128-ECB + raw + XOR;AES key 平台派生 + +V2 image key 提取(macOS / Windows 自动;Linux 暂不支持): +- macOS:`kvcomm` cache(`key__*.statistic` 文件名取 uin → `md5(str(uin) + wxid)[:16]`)+ brute-force fallback;`xor_key = uin & 0xff` +- Windows:扫 `Weixin.exe` 内存匹配 `[A-Za-z0-9]{32|16}` 候选,按 V2 template ciphertext-block 反验 + ### 收藏与统计 ```bash diff --git a/docs/macos-permission-guide.md b/docs/macos-permission-guide.md index e5ee463..322cb90 100644 --- a/docs/macos-permission-guide.md +++ b/docs/macos-permission-guide.md @@ -196,3 +196,79 @@ open /Applications/WeChat.app | "SIP 阻止了调试微信" | ❌ SIP 只保护系统进程,微信不受 SIP 保护 | | "加了 sshd 到 FDA 就行" | ❌ 还需要加 `sshd-keygen-wrapper`,且要重连 SSH | | "微信开着也能重签名" | ❌ 运行中的 binary/dylib 被占用,codesign 会失败 | + +--- + +## 五、重签名后微信权限 silent 失效 + +### 现象 + +完成 ad-hoc 重签名后,微信任意以下功能都可能"看起来已授权但实际被拒绝": + +- 截图 / 屏幕共享(`ScreenCapture`) +- 视频通话 / 扫码(`Camera`) +- 语音消息 / 通话(`Microphone`) +- 自动化、第三方输入法(`AppleEvents`) +- 同步通讯录(`AddressBook`) +- 文件发送 / 接收(`SystemPolicyDocumentsFolder` / `Downloads` / `Desktop`) + +System Settings 里通常仍看到"微信.app"开关是 ON,但运行时权限校验失败。微信会反复弹"需要开启 X 权限"。 + +### 根因(第一性原理) + +macOS TCC(Transparency, Consent, and Control)按 **bundle id + csreq** 联合校验权限。`csreq`(code requirement)是从 app 的 code signature 推导出的二进制 blob,存在 `/Library/Application Support/com.apple.TCC/TCC.db` 的 `access` 表里,每条 ~160 字节。 + +`codesign --force --deep --sign -` 把 WeChat 从官方签名换成 ad-hoc 签名(甚至 ad-hoc → ad-hoc 重签也会变),新进程的 csreq 跟旧记录里那条对不上 —— tccd 拒绝。 + +System Settings UI 只按 client 显示开关、不重算 csreq,所以视觉上是"已授权",运行时实际拒绝。这是 silent drift。 + +### 修复步骤 + +把 WeChat 在 TCC 里的旧记录全部抹掉,让 macOS 在下次微信请求权限时按新签名重新生成 csreq: + +```bash +for s in ScreenCapture Camera Microphone AppleEvents AddressBook \ + SystemPolicyDocumentsFolder SystemPolicyDownloadsFolder SystemPolicyDesktopFolder; do + tccutil reset "$s" com.tencent.xinWeChat +done +``` + +`tccutil` 对没有授权过的 service 会报 "No such bundle identifier",这是 no-op,不影响其他 service 的 reset。 + +之后退出并重新打开微信,按 GUI 提示重新允许: + +```bash +killall WeChat +open /Applications/WeChat.app +``` + +> 这一步**应当由用户/agent 手动执行**,不在 `wx init` 里自动跑——TCC 重置会让用户的现有授权失效,需要由人决定时机。 + +#### macOS 26 的 UI 拆分 + +在 macOS 26 上,**隐私与安全 → 录屏与系统录音** 显示为两块,容易踩坑: + +| 区域 | 作用 | +|------|------| +| **录屏与系统录音**(上半区) | 录制屏幕内容 + 系统音频;微信截图、屏幕共享需要这一项 | +| **仅系统录音**(下半区) | 只录系统音频;只打开这一项**不能**修复微信截图 | + +把 WeChat 加进上半区;只勾下半区的"仅系统录音"无效。 + +### 验证 + +确认 WeChat 当前是 ad-hoc 签名(这是修复前提): + +```bash +codesign -dv --verbose=4 /Applications/WeChat.app 2>&1 | grep -E "Signature|flags|TeamIdentifier" +``` + +期望看到: + +```text +flags=0x2(adhoc) +Signature=adhoc +TeamIdentifier=not set +``` + +最直接的功能验证:在微信里使用截图、视频通话、麦克风等功能,按 GUI 弹窗的"允许"重新授权一次,之后正常工作。 diff --git a/npm/platforms/darwin-arm64/package.json b/npm/platforms/darwin-arm64/package.json index 05b851e..3566a92 100644 --- a/npm/platforms/darwin-arm64/package.json +++ b/npm/platforms/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-darwin-arm64", - "version": "0.1.10", + "version": "0.2.0", "description": "wx-cli binary for macOS arm64", "os": ["darwin"], "cpu": ["arm64"], diff --git a/npm/platforms/darwin-x64/package.json b/npm/platforms/darwin-x64/package.json index 6fce3b4..d8a9068 100644 --- a/npm/platforms/darwin-x64/package.json +++ b/npm/platforms/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-darwin-x64", - "version": "0.1.10", + "version": "0.2.0", "description": "wx-cli binary for macOS x64", "os": ["darwin"], "cpu": ["x64"], diff --git a/npm/platforms/linux-arm64/package.json b/npm/platforms/linux-arm64/package.json index d44e7b3..19655cb 100644 --- a/npm/platforms/linux-arm64/package.json +++ b/npm/platforms/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-linux-arm64", - "version": "0.1.10", + "version": "0.2.0", "description": "wx-cli binary for Linux arm64", "os": ["linux"], "cpu": ["arm64"], diff --git a/npm/platforms/linux-x64/package.json b/npm/platforms/linux-x64/package.json index 0be0893..862c610 100644 --- a/npm/platforms/linux-x64/package.json +++ b/npm/platforms/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-linux-x64", - "version": "0.1.10", + "version": "0.2.0", "description": "wx-cli binary for Linux x64", "os": ["linux"], "cpu": ["x64"], diff --git a/npm/platforms/win32-x64/package.json b/npm/platforms/win32-x64/package.json index 32d2eb0..1b6f9f8 100644 --- a/npm/platforms/win32-x64/package.json +++ b/npm/platforms/win32-x64/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli-win32-x64", - "version": "0.1.10", + "version": "0.2.0", "description": "wx-cli binary for Windows x64", "os": ["win32"], "cpu": ["x64"], diff --git a/npm/wx-cli/package.json b/npm/wx-cli/package.json index 121770f..dc76619 100644 --- a/npm/wx-cli/package.json +++ b/npm/wx-cli/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/wx-cli", - "version": "0.1.10", + "version": "0.2.0", "description": "Query your local WeChat data from the command line. Designed for LLM agent tool calls.", "bin": { "wx": "bin/wx.js" @@ -13,11 +13,11 @@ "install.js" ], "optionalDependencies": { - "@jackwener/wx-cli-darwin-arm64": "0.1.10", - "@jackwener/wx-cli-darwin-x64": "0.1.10", - "@jackwener/wx-cli-linux-x64": "0.1.10", - "@jackwener/wx-cli-linux-arm64": "0.1.10", - "@jackwener/wx-cli-win32-x64": "0.1.10" + "@jackwener/wx-cli-darwin-arm64": "0.2.0", + "@jackwener/wx-cli-darwin-x64": "0.2.0", + "@jackwener/wx-cli-linux-x64": "0.2.0", + "@jackwener/wx-cli-linux-arm64": "0.2.0", + "@jackwener/wx-cli-win32-x64": "0.2.0" }, "engines": { "node": ">=14" }, "keywords": ["wechat", "cli", "wx", "llm", "ai", "sqlite", "sqlcipher"], diff --git a/src/attachment/attachment_id.rs b/src/attachment/attachment_id.rs new file mode 100644 index 0000000..8af569e --- /dev/null +++ b/src/attachment/attachment_id.rs @@ -0,0 +1,153 @@ +//! 不透明附件 ID — 跨 CLI / IPC 的圆 trip 句柄。 +//! +//! 编码:`base64url_no_pad(serde_json(payload))`。 +//! 选择 base64url(json) 而不是紧凑 bit-pack: +//! - phase 1 求稳,不发明二进制协议 +//! - 后面加字段(`resource_md5` / `decoder_hint` 之类)老 CLI 不 break +//! - debug 直接 base64 -d | jq 看字段 +//! +//! ⚠️ `local_id` 在同一 chat 内会被 WeChat 复用(实测同 chat 最多 7 条同 local_id), +//! 所以 `(chat, local_id, create_time)` 三元组才是定位资源行的最小集。 + +use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AttachmentKind { + Image, + Video, + File, + Voice, +} + +impl AttachmentKind { + /// 从 message.local_type 推 attachment kind(只覆盖 phase 1 关心的几种)。 + /// 高 32 bit 是版本/会话 flag,要先 mask 到低 32 bit。 + pub fn from_local_type(local_type: i64) -> Option { + let lo = (local_type as u64) & 0xFFFF_FFFF; + match lo { + 3 => Some(AttachmentKind::Image), + 34 => Some(AttachmentKind::Voice), + 43 => Some(AttachmentKind::Video), + // type=49 是 appmsg,里面 subtype=6 才是文件;这里偏宽松返回 File, + // 由 resolver 进一步根据 appmsg subtype 决定是否真的能 extract + 49 => Some(AttachmentKind::File), + _ => None, + } + } + + pub fn as_str(&self) -> &'static str { + match self { + AttachmentKind::Image => "image", + AttachmentKind::Video => "video", + AttachmentKind::File => "file", + AttachmentKind::Voice => "voice", + } + } +} + +/// 附件 ID payload(序列化后 base64url 编码)。 +/// +/// `v` 是版本字段,将来 schema 变了可以走分支兼容。当前 v=1。 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AttachmentId { + /// payload schema version + pub v: u32, + /// 会话 username(同时用于 ChatName2Id 查 chat_id 和拼 attach 路径) + pub chat: String, + /// 消息行的 local_id + pub local_id: i64, + /// 消息行的 create_time(unix 秒)— 用于 disambiguate 同 chat 内 local_id 复用 + pub create_time: i64, + /// 附件类别 + pub kind: AttachmentKind, + /// 可选 hint:消息所在 message_N.db 的 N。给定时 resolver 可跳过 shard 扫描; + /// 缺省时 resolver 会按 `find_msg_tables` 逻辑全量扫 + #[serde(default, skip_serializing_if = "Option::is_none")] + pub db: Option, +} + +impl AttachmentId { + pub fn encode(&self) -> Result { + let json = serde_json::to_vec(self).context("序列化 AttachmentId")?; + Ok(URL_SAFE_NO_PAD.encode(json)) + } + + pub fn decode(s: &str) -> Result { + let bytes = URL_SAFE_NO_PAD + .decode(s.trim()) + .map_err(|e| anyhow!("attachment_id 不是合法 base64url: {}", e))?; + let id: AttachmentId = + serde_json::from_slice(&bytes).context("attachment_id payload 非合法 JSON")?; + if id.v != 1 { + return Err(anyhow!("不支持的 attachment_id 版本 v={}", id.v)); + } + Ok(id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn round_trip_minimal() { + let id = AttachmentId { + v: 1, + chat: "wxid_abc".to_string(), + local_id: 12345, + create_time: 1_715_678_901, + kind: AttachmentKind::Image, + db: None, + }; + let s = id.encode().unwrap(); + let back = AttachmentId::decode(&s).unwrap(); + assert_eq!(back.chat, id.chat); + assert_eq!(back.local_id, id.local_id); + assert_eq!(back.create_time, id.create_time); + assert_eq!(back.kind, id.kind); + assert_eq!(back.db, id.db); + } + + #[test] + fn round_trip_with_db_hint() { + let id = AttachmentId { + v: 1, + chat: "1234@chatroom".to_string(), + local_id: 42, + create_time: 1, + kind: AttachmentKind::Image, + db: Some(2), + }; + let s = id.encode().unwrap(); + assert!(!s.contains('=')); // base64url no-pad + let back = AttachmentId::decode(&s).unwrap(); + assert_eq!(back.db, Some(2)); + } + + #[test] + fn local_type_mask_high_bits() { + // monitor_web.py 里 image push 路径:高位带 flag,低 32 bit 是 3 + let high_flag = (0xDEAD_BEEFu64 << 32) as i64 | 3; + assert_eq!( + AttachmentKind::from_local_type(high_flag), + Some(AttachmentKind::Image) + ); + } + + #[test] + fn rejects_unknown_version() { + let id = AttachmentId { + v: 99, + chat: "x".to_string(), + local_id: 0, + create_time: 0, + kind: AttachmentKind::Image, + db: None, + }; + let s = id.encode().unwrap(); + assert!(AttachmentId::decode(&s).is_err()); + } +} diff --git a/src/attachment/decoder/mod.rs b/src/attachment/decoder/mod.rs new file mode 100644 index 0000000..a5723c5 --- /dev/null +++ b/src/attachment/decoder/mod.rs @@ -0,0 +1,122 @@ +//! `.dat` 文件解码:根据 6B header magic 分发到具体 decoder。 +//! +//! 三档: +//! | header[0..6] | decoder | 备注 | +//! |-------------------------|-------------------|-----------------------------------------| +//! | `07 08 V2 08 07` | `v2` | AES-128-ECB + XOR 混合,需要 image AES key | +//! | `07 08 V1 08 07` | `v1_aes` | 固定 AES key `cfcd208495d565ef` | +//! | (其他, 通常无 magic) | `v1_xor` | legacy single-byte XOR,magic 自动探测 | +//! +//! 决策点放在 `dispatch`,让上层(`resolver` / CLI extract 命令)只跟一个入口打交道。 + +use anyhow::{anyhow, Result}; + +pub mod v1_xor; +pub mod v2; + +/// 完整 V2 magic:`\x07\x08V2\x08\x07` +pub const V2_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'2', 0x08, 0x07]; +/// 完整 V1 magic:`\x07\x08V1\x08\x07` +pub const V1_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'1', 0x08, 0x07]; + +/// 解码后的产物 + 探测出的图片格式 +#[derive(Debug)] +pub struct DecodedImage { + pub data: Vec, + /// 推断出的图片扩展名(不带点),由 magic 决定。例如 "jpg" / "png" / "gif" / "webp" / + /// "tif" / "bmp" / "hevc"(wxgf 容器)/ "bin"(未识别) + pub format: &'static str, + /// 解码器名称("legacy_xor" / "v1_aes" / "v2"),用于 CLI 调试输出 + pub decoder: &'static str, +} + +/// 由 caller 提供的 V2 image AES key(codex 的 `image_key` 模块负责拿到)。 +/// 缺省时遇到 V2 文件会返回 `Err`,caller 可以拿到具体错误信息再处理。 +#[derive(Debug, Clone, Copy, Default)] +pub struct V2KeyMaterial<'a> { + pub aes_key: Option<&'a [u8; 16]>, + /// XOR key — WeChat 4.x 默认 0x88,可 override + pub xor_key: u8, +} + +impl<'a> V2KeyMaterial<'a> { + pub fn with_aes(key: &'a [u8; 16]) -> Self { + Self { aes_key: Some(key), xor_key: 0x88 } + } +} + +/// 根据 `dat_bytes` 头部 magic 自动分发到对应 decoder。 +/// +/// `v2_key` 仅在文件是 V2 magic 时被消费。 +pub fn dispatch(dat_bytes: &[u8], v2_key: V2KeyMaterial<'_>) -> Result { + if dat_bytes.len() >= 6 { + let head: &[u8; 6] = dat_bytes[..6].try_into().unwrap(); + if head == &V2_MAGIC { + return v2::decode(dat_bytes, v2_key); + } + if head == &V1_MAGIC { + // V1 fixed-AES: 固定 key = md5("0")[:16] = "cfcd208495d565ef" + let fixed_key: [u8; 16] = *b"cfcd208495d565ef"; + return v2::decode( + dat_bytes, + V2KeyMaterial { aes_key: Some(&fixed_key), xor_key: v2_key.xor_key }, + ) + .map(|mut d| { + d.decoder = "v1_aes"; + d + }); + } + } + if dat_bytes.is_empty() { + return Err(anyhow!("空 .dat 文件")); + } + v1_xor::decode(dat_bytes) +} + +/// 从解密后的字节流头部探测图片格式扩展名。 +/// +/// 与上游 `decode_image.py::detect_image_format` 一致;新增 wxgf (HEVC 裸流) 的探测, +/// 因为 V2 解码后产物可能直接是 wxgf 容器。 +pub fn detect_image_format(bytes: &[u8]) -> &'static str { + if bytes.len() >= 4 && &bytes[..4] == b"wxgf" { + return "hevc"; + } + if bytes.len() >= 3 && bytes[..3] == [0xFF, 0xD8, 0xFF] { + return "jpg"; + } + if bytes.len() >= 4 && bytes[..4] == [0x89, 0x50, 0x4E, 0x47] { + return "png"; + } + if bytes.len() >= 3 && &bytes[..3] == b"GIF" { + return "gif"; + } + if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WEBP" { + return "webp"; + } + if bytes.len() >= 4 && bytes[..4] == [0x49, 0x49, 0x2A, 0x00] { + return "tif"; + } + if bytes.len() >= 2 && &bytes[..2] == b"BM" { + return "bmp"; + } + "bin" +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detect_basic_formats() { + assert_eq!(detect_image_format(&[0xFF, 0xD8, 0xFF, 0xE0]), "jpg"); + assert_eq!(detect_image_format(&[0x89, 0x50, 0x4E, 0x47]), "png"); + assert_eq!(detect_image_format(b"GIF89a"), "gif"); + assert_eq!(detect_image_format(b"BM\0\0\0\0\0\0\0\0\0\0\0\0"), "bmp"); + let mut webp = b"RIFF\0\0\0\0WEBP".to_vec(); + webp.extend_from_slice(&[0; 4]); + assert_eq!(detect_image_format(&webp), "webp"); + assert_eq!(detect_image_format(&[0x49, 0x49, 0x2A, 0x00]), "tif"); + assert_eq!(detect_image_format(b"wxgfXXXX"), "hevc"); + assert_eq!(detect_image_format(&[0, 0, 0, 0]), "bin"); + } +} diff --git a/src/attachment/decoder/v1_xor.rs b/src/attachment/decoder/v1_xor.rs new file mode 100644 index 0000000..788383e --- /dev/null +++ b/src/attachment/decoder/v1_xor.rs @@ -0,0 +1,166 @@ +//! Legacy single-byte XOR decoder(无 magic 头的旧 .dat) +//! +//! 算法:用已知图片 magic 反推 XOR key —— `key = file[0] ^ magic[0]`。 +//! 然后用同一个 key 校验 `file[i] ^ key == magic[i]`,全部命中才接受这个 key。 +//! +//! 优先级(按 magic 长度降序,避免短 magic 假阳性): +//! PNG (4) > GIF (4) > TIF (4) > WEBP (4, RIFF) > JPG (3) > BMP (2, 需额外校验) +//! +//! BMP 只有 2 字节 magic,假阳性高;额外用 BMP file header 里的 +//! `bf_size`(offset 2, u32 LE)和 `bf_offset`(offset 10, u32 LE)做合理性校验: +//! - `|bf_size - file_size| < 1024`(允许微小 padding 差) +//! - `14 <= bf_offset <= 1078`(最大调色板 256*4 + header 14 = 1038,留点余量) + +use anyhow::{anyhow, Result}; + +use super::{detect_image_format, DecodedImage}; + +const PNG: &[u8] = &[0x89, 0x50, 0x4E, 0x47]; +const GIF: &[u8] = &[0x47, 0x49, 0x46, 0x38]; +const TIF: &[u8] = &[0x49, 0x49, 0x2A, 0x00]; +const WEBP_RIFF: &[u8] = &[0x52, 0x49, 0x46, 0x46]; +const JPG: &[u8] = &[0xFF, 0xD8, 0xFF]; +const BMP: &[u8] = &[0x42, 0x4D]; + +/// 在 `header` 上尝试一个固定 magic:返回 `Some(key)` 当且仅当所有字节都对得上。 +fn try_magic(header: &[u8], magic: &[u8]) -> Option { + if header.len() < magic.len() { + return None; + } + let key = header[0] ^ magic[0]; + for i in 1..magic.len() { + if header[i] ^ key != magic[i] { + return None; + } + } + Some(key) +} + +/// 探测 XOR key。失败返回 `None`(caller 决定是不是错)。 +pub fn detect_key(file_bytes: &[u8]) -> Option { + if file_bytes.len() < 4 { + return None; + } + let header = &file_bytes[..file_bytes.len().min(16)]; + + // 先试 3+ 字节 magic + for magic in [PNG, GIF, TIF, WEBP_RIFF, JPG] { + if let Some(k) = try_magic(header, magic) { + return Some(k); + } + } + + // 最后试 BMP(只有 2B magic,需额外校验) + if let Some(k) = try_magic(header, BMP) { + if header.len() >= 14 { + // 解 BMP file header 14 字节 + let mut dec = [0u8; 14]; + for i in 0..14 { + dec[i] = header[i] ^ k; + } + let bmp_size = u32::from_le_bytes([dec[2], dec[3], dec[4], dec[5]]); + let bmp_offset = u32::from_le_bytes([dec[10], dec[11], dec[12], dec[13]]); + let file_size = file_bytes.len() as u32; + // 允许 1024 字节 padding 差;offset 在合理范围 + if file_size.abs_diff(bmp_size) < 1024 && (14..=1078).contains(&bmp_offset) { + return Some(k); + } + } + } + + None +} + +/// XOR 解码整个 `.dat` 内容。 +pub fn decode(file_bytes: &[u8]) -> Result { + let key = + detect_key(file_bytes).ok_or_else(|| anyhow!("legacy XOR: 无法识别图片 magic(key 探测失败)"))?; + let data: Vec = file_bytes.iter().map(|b| b ^ key).collect(); + let format = detect_image_format(&data); + if format == "bin" { + return Err(anyhow!("legacy XOR: 解出 key=0x{:02x} 但产物 magic 不识别", key)); + } + Ok(DecodedImage { data, format, decoder: "legacy_xor" }) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// 把一段 plaintext 用单字节 key XOR 加密,模拟 .dat 文件 + fn xor_encrypt(plain: &[u8], key: u8) -> Vec { + plain.iter().map(|b| b ^ key).collect() + } + + #[test] + fn detect_jpg_key() { + let plain = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46]; + let enc = xor_encrypt(&plain, 0x3C); + assert_eq!(detect_key(&enc), Some(0x3C)); + } + + #[test] + fn detect_png_key() { + let mut plain = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0xA5); + assert_eq!(detect_key(&enc), Some(0xA5)); + } + + #[test] + fn detect_gif_key() { + let mut plain = b"GIF89a".to_vec(); + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0x77); + assert_eq!(detect_key(&enc), Some(0x77)); + } + + #[test] + fn detect_webp_riff_key() { + let mut plain = b"RIFF\x00\x00\x00\x00WEBP".to_vec(); + plain.extend_from_slice(&[0; 8]); + let enc = xor_encrypt(&plain, 0x12); + assert_eq!(detect_key(&enc), Some(0x12)); + } + + #[test] + fn detect_tif_key() { + let mut plain = vec![0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00]; + plain.extend_from_slice(&[0; 16]); + let enc = xor_encrypt(&plain, 0xC3); + assert_eq!(detect_key(&enc), Some(0xC3)); + } + + #[test] + fn detect_bmp_with_valid_header() { + // BMP 14B header: 'BM' + size(u32 LE) + reserved(2*u16) + offset(u32 LE) + let mut plain = Vec::new(); + plain.extend_from_slice(b"BM"); + plain.extend_from_slice(&100u32.to_le_bytes()); // file_size = 100 + plain.extend_from_slice(&[0; 4]); // reserved + plain.extend_from_slice(&54u32.to_le_bytes()); // pixel data offset = 54 + plain.resize(100, 0); // 整个文件 100 字节,匹配 file_size + let enc = xor_encrypt(&plain, 0x55); + assert_eq!(detect_key(&enc), Some(0x55)); + } + + #[test] + fn reject_random_bytes() { + // 全 0 文件:BMP 检测会算出 key = 0x42 ^ 0 = 0x42, + // 但解密出的 BMP file_size = 0 vs file_size = 100,差距 > 1024 → + // 应该 reject + let bytes = vec![0u8; 100]; + assert_eq!(detect_key(&bytes), None); + } + + #[test] + fn decode_round_trip_jpg() { + let mut plain = vec![0xFF, 0xD8, 0xFF, 0xE0]; + plain.extend_from_slice(b"JFIF padding here"); + let enc = xor_encrypt(&plain, 0xAB); + let out = decode(&enc).unwrap(); + assert_eq!(out.format, "jpg"); + assert_eq!(out.decoder, "legacy_xor"); + assert_eq!(out.data, plain); + } +} diff --git a/src/attachment/decoder/v2.rs b/src/attachment/decoder/v2.rs new file mode 100644 index 0000000..1c90f29 --- /dev/null +++ b/src/attachment/decoder/v2.rs @@ -0,0 +1,130 @@ +//! V2 .dat 解码:`AES-128-ECB(PKCS7) + raw + XOR` 三段拼接。 +//! +//! 文件结构(来自上游 `decode_image.py::v2_decrypt_file`): +//! `[6B magic V2/V1] [4B aes_size LE] [4B xor_size LE] [1B padding]` +//! `[aligned_aes_size bytes AES-ECB ciphertext]` +//! `[len - aligned_aes_size - xor_size bytes raw_data (不加密)]` +//! `[xor_size bytes XOR (单字节 key)]` +//! +//! `aligned_aes_size`:把 `aes_size` 向上对齐到 16 的倍数;当 `aes_size` 本身是 +//! 16 的倍数时,PKCS7 还会再加一整块 padding,所以再 +16。等价于 +//! `aes_size + (16 - aes_size % 16)`。 +//! +//! ⚠️ 此模块由 codex 落地完整 V2 实现 + image key 模块。当前只提供一个 +//! `decode` 入口骨架,方便 v1_aes 路径(固定 key)和 dispatch 一起编译过。 +//! `aes_key=None` 时返回带具体诊断信息的错误。 + +use anyhow::{anyhow, bail, Result}; + +use super::{detect_image_format, DecodedImage, V2KeyMaterial, V1_MAGIC, V2_MAGIC}; + +const HEADER_SIZE: usize = 15; + +pub fn decode(file_bytes: &[u8], key: V2KeyMaterial<'_>) -> Result { + if file_bytes.len() < HEADER_SIZE { + bail!("V2 .dat: 文件过短({} < {} 字节)", file_bytes.len(), HEADER_SIZE); + } + let magic: &[u8; 6] = file_bytes[..6].try_into().unwrap(); + if magic != &V2_MAGIC && magic != &V1_MAGIC { + bail!("V2 .dat: header magic 不匹配 V1/V2"); + } + + let aes_key = key.aes_key.ok_or_else(|| { + anyhow!("V2 .dat: 需要 image AES key(codex 的 image_key 模块尚未填充)") + })?; + + let aes_size = u32::from_le_bytes(file_bytes[6..10].try_into().unwrap()) as usize; + let xor_size = u32::from_le_bytes(file_bytes[10..14].try_into().unwrap()) as usize; + + // PKCS7 对齐:aes_size 不是 16 的倍数 → 向上对齐;是 16 的倍数 → 再加一整块 + let aligned_aes_size = aes_size + (16 - (aes_size % 16)); + + let aes_end = HEADER_SIZE.checked_add(aligned_aes_size).ok_or_else(|| anyhow!("aes 段长度溢出"))?; + if aes_end > file_bytes.len() { + bail!( + "V2 .dat: 头部宣称 aes_size={} (aligned={}) 超过文件长度 {}", + aes_size, + aligned_aes_size, + file_bytes.len() + ); + } + let raw_end = file_bytes.len().checked_sub(xor_size).ok_or_else(|| { + anyhow!("V2 .dat: 头部宣称 xor_size={} 超过文件长度 {}", xor_size, file_bytes.len()) + })?; + if aes_end > raw_end { + bail!( + "V2 .dat: aes_end={} > raw_end={}(aes/xor 段重叠)", + aes_end, + raw_end + ); + } + + // === AES-128-ECB 解密 + PKCS7 unpad === + let aes_data = &file_bytes[HEADER_SIZE..aes_end]; + let dec_aes = aes_ecb_decrypt_pkcs7(aes_key, aes_data)?; + + // === Raw 段(未加密) === + let raw_data = &file_bytes[aes_end..raw_end]; + + // === XOR 段 === + let xor_data: Vec = file_bytes[raw_end..].iter().map(|b| b ^ key.xor_key).collect(); + + let mut out = Vec::with_capacity(dec_aes.len() + raw_data.len() + xor_data.len()); + out.extend_from_slice(&dec_aes); + out.extend_from_slice(raw_data); + out.extend_from_slice(&xor_data); + + let format = detect_image_format(&out); + if format == "bin" { + bail!("V2 .dat: AES 解密成功但产物 magic 不识别(key 可能错)"); + } + Ok(DecodedImage { data: out, format, decoder: "v2" }) +} + +/// AES-128-ECB 解密 + PKCS7 unpad。失败时返回 `Err`,不返回半结果。 +/// +/// 不引第三方 ECB 包;ECB 本身就是 block-by-block,手工跑就行。 +/// PKCS7 padding 由本函数最后一段做 strict 校验:长度 1..=16,且尾部全是同值字节。 +fn aes_ecb_decrypt_pkcs7(key: &[u8; 16], cipher: &[u8]) -> Result> { + use aes::cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit}; + if cipher.is_empty() || cipher.len() % 16 != 0 { + bail!("AES 输入长度 {} 不是 16 的倍数", cipher.len()); + } + let aes = aes::Aes128::new(key.into()); + let mut out = Vec::with_capacity(cipher.len()); + for chunk in cipher.chunks_exact(16) { + let mut block = GenericArray::clone_from_slice(chunk); + aes.decrypt_block(&mut block); + out.extend_from_slice(&block); + } + let pad = *out.last().ok_or_else(|| anyhow!("AES PKCS7: 空输出"))? as usize; + if pad == 0 || pad > 16 || pad > out.len() { + bail!("AES PKCS7: 非法 padding 长度 {}", pad); + } + let tail = &out[out.len() - pad..]; + if !tail.iter().all(|&b| b as usize == pad) { + bail!("AES PKCS7: padding 字节不一致"); + } + out.truncate(out.len() - pad); + Ok(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rejects_short_file() { + let r = decode(&[0u8; 4], V2KeyMaterial::default()); + assert!(r.is_err()); + } + + #[test] + fn rejects_v2_without_key() { + let mut buf = V2_MAGIC.to_vec(); + buf.extend_from_slice(&[0u8; HEADER_SIZE - 6]); + let r = decode(&buf, V2KeyMaterial::default()); + let err = r.unwrap_err().to_string(); + assert!(err.contains("AES key"), "{}", err); + } +} diff --git a/src/attachment/image_key/linux.rs b/src/attachment/image_key/linux.rs new file mode 100644 index 0000000..4100ab2 --- /dev/null +++ b/src/attachment/image_key/linux.rs @@ -0,0 +1,11 @@ +use anyhow::{bail, Result}; + +use super::{ImageKeyMaterial, ImageKeyProvider}; + +pub struct LinuxImageKeyProvider; + +impl ImageKeyProvider for LinuxImageKeyProvider { + fn get_key(&self, _wxid: &str) -> Result { + bail!("Linux V2 图片 key 当前未实现;请先用 legacy/V1 图片或在 README 中标注 unsupported") + } +} diff --git a/src/attachment/image_key/macos.rs b/src/attachment/image_key/macos.rs new file mode 100644 index 0000000..127d81c --- /dev/null +++ b/src/attachment/image_key/macos.rs @@ -0,0 +1,423 @@ +//! macOS V2 image AES key 提取。 +//! +//! 主路径:从 `key__*.statistic` 文件名拿 uin,然后 +//! `md5(str(uin) + normalize(wxid)).hex()[:16]` 派生 AES key。 +//! +//! fallback:通过 `md5(str(uin))[:4] == wxid_suffix` + `uin & 0xff == xor_key` +//! 把搜索空间压到 2^24,再用 V2 模板反验 AES key。 + +use anyhow::{bail, Context, Result}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{mpsc, Arc, Mutex}; + +use crate::config; + +use super::{ + attach_root_for_db_dir, configured_db_dir_for_wxid, derive_xor_key_from_v2_dat, + find_v2_template_ciphertexts, join_components, normalize_wxid, verify_aes_key, wxid_from_db_dir, + ImageKeyMaterial, ImageKeyProvider, +}; + +pub struct MacosImageKeyProvider { + configured_db_dir: Result, + cache: Mutex>, +} + +impl MacosImageKeyProvider { + pub fn from_current_config() -> Self { + let configured_db_dir = config::load_config() + .map(|cfg| cfg.db_dir) + .map_err(|err| err.to_string()); + Self { + configured_db_dir, + cache: Mutex::new(HashMap::new()), + } + } +} + +impl ImageKeyProvider for MacosImageKeyProvider { + fn get_key(&self, wxid: &str) -> Result { + let cache_key = normalize_wxid(wxid); + if let Some(found) = self.cache.lock().unwrap().get(&cache_key).copied() { + return Ok(found); + } + + let configured_db_dir = self + .configured_db_dir + .as_ref() + .map_err(|err| anyhow::anyhow!("读取 config.db_dir 失败: {}", err))?; + let db_dir = configured_db_dir_for_wxid(configured_db_dir, wxid); + let attach_dir = attach_root_for_db_dir(&db_dir); + let key = derive_key_for_paths(&db_dir, &attach_dir)?; + self.cache.lock().unwrap().insert(cache_key, key); + Ok(key) + } +} + +fn derive_key_for_paths(db_dir: &Path, attach_dir: &Path) -> Result { + let templates = find_v2_template_ciphertexts(attach_dir, 3, 64)?; + if templates.is_empty() { + bail!("在 {} 下找不到 V2 模板文件", attach_dir.display()); + } + + if let Some(found) = find_via_kvcomm(db_dir, &templates)? { + return Ok(found); + } + + let (wxid_full, wxid_norm, suffix) = + extract_wxid_parts(db_dir).context("db_dir 不含可用于 fallback 的 wxid 4 位后缀")?; + let (xor_key, _votes, _total) = derive_xor_key_from_v2_dat(attach_dir, 10, 3)? + .context("V2 .dat 样本不足,无法投票反推 xor_key")?; + + for wxid in preferred_wxid_candidates(&wxid_full, &wxid_norm) { + if let Some(aes_key) = bruteforce_aes_key(xor_key, &suffix, wxid, &templates)? { + return Ok(ImageKeyMaterial { aes_key, xor_key }); + } + } + + bail!("macOS V2 图片 key 派生失败") +} + +fn find_via_kvcomm(db_dir: &Path, templates: &[[u8; 16]]) -> Result> { + let Some(kvcomm_dir) = find_existing_kvcomm_dir(db_dir) else { + return Ok(None); + }; + + let codes = collect_kvcomm_codes(&kvcomm_dir)?; + if codes.is_empty() { + return Ok(None); + } + let wxids = collect_wxid_candidates(db_dir); + if wxids.is_empty() { + return Ok(None); + } + + for wxid in wxids { + for code in &codes { + let candidate = derive_image_key_material(*code, &wxid); + if verify_aes_key(&candidate.aes_key, templates) { + return Ok(Some(candidate)); + } + } + } + Ok(None) +} + +fn derive_image_key_material(code: u32, wxid: &str) -> ImageKeyMaterial { + let xor_key = (code & 0xFF) as u8; + let digest = format!("{:x}", md5::compute(format!("{}{}", code, wxid))); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + ImageKeyMaterial { aes_key, xor_key } +} + +fn collect_wxid_candidates(db_dir: &Path) -> Vec { + let Some(raw) = wxid_from_db_dir(db_dir) else { + return Vec::new(); + }; + let mut out = vec![raw.clone()]; + let normalized = normalize_wxid(&raw); + if normalized != raw { + out.push(normalized); + } + out +} + +fn extract_wxid_parts(db_dir: &Path) -> Option<(String, String, String)> { + let raw = wxid_from_db_dir(db_dir)?; + let idx = raw.rfind('_')?; + let suffix = &raw[idx + 1..]; + if suffix.len() != 4 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) { + return None; + } + Some((raw.clone(), normalize_wxid(&raw), suffix.to_ascii_lowercase())) +} + +fn preferred_wxid_candidates<'a>(raw: &'a str, normalized: &'a str) -> Vec<&'a str> { + if raw == normalized { + vec![raw] + } else { + vec![normalized, raw] + } +} + +fn derive_kvcomm_dir_candidates(db_dir: &Path) -> Vec { + let parts: Vec = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()) + .collect(); + + let mut candidates = Vec::new(); + if let Some(idx) = parts.iter().position(|part| part == "xwechat_files") { + let documents_root = join_components(&parts[..idx]); + candidates.push(documents_root.join("app_data/net/kvcomm")); + candidates.push(documents_root.join("xwechat/net/kvcomm")); + if idx >= 1 { + let container_root = join_components(&parts[..idx - 1]); + candidates.push( + container_root + .join("Library/Application Support/com.tencent.xinWeChat/xwechat/net/kvcomm"), + ); + candidates.push( + container_root.join("Library/Application Support/com.tencent.xinWeChat/net/kvcomm"), + ); + } + } + if let Some(home) = dirs::home_dir() { + candidates.push( + home.join("Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm"), + ); + } + + let mut dedup = Vec::new(); + for candidate in candidates { + if !dedup.contains(&candidate) { + dedup.push(candidate); + } + } + dedup +} + +fn find_existing_kvcomm_dir(db_dir: &Path) -> Option { + derive_kvcomm_dir_candidates(db_dir) + .into_iter() + .find(|path| path.is_dir()) +} + +fn collect_kvcomm_codes(kvcomm_dir: &Path) -> Result> { + let mut codes = std::collections::BTreeSet::new(); + for entry in std::fs::read_dir(kvcomm_dir)? { + let entry = entry?; + let Some(name) = entry.file_name().to_str().map(|value| value.to_string()) else { + continue; + }; + let Some(rest) = name.strip_prefix("key_") else { + continue; + }; + let Some((code, _)) = rest.split_once('_') else { + continue; + }; + if let Ok(code) = code.parse::() { + codes.insert(code); + } + } + Ok(codes.into_iter().collect()) +} + +fn bruteforce_aes_key( + xor_key: u8, + suffix_hex: &str, + wxid: &str, + templates: &[[u8; 16]], +) -> Result> { + let suffix = hex_prefix_to_bytes(suffix_hex)?; + let workers = std::thread::available_parallelism() + .map(|count| count.get()) + .unwrap_or(1) + .max(1); + let total = 1u32 << 24; + let chunk = total / workers as u32; + let stop = Arc::new(AtomicBool::new(false)); + let (tx, rx) = mpsc::channel(); + let wxid = Arc::new(wxid.as_bytes().to_vec()); + let templates = Arc::new(templates.to_vec()); + + std::thread::scope(|scope| { + for idx in 0..workers { + let start = idx as u32 * chunk; + let end = if idx + 1 == workers { + total + } else { + (idx as u32 + 1) * chunk + }; + let stop = Arc::clone(&stop); + let tx = tx.clone(); + let wxid = Arc::clone(&wxid); + let templates = Arc::clone(&templates); + scope.spawn(move || { + for upper in start..end { + if stop.load(Ordering::Relaxed) { + break; + } + let uin = (upper << 8) | xor_key as u32; + let uin_ascii = uin.to_string(); + let digest = md5::compute(uin_ascii.as_bytes()); + if digest.0[0] != suffix[0] || digest.0[1] != suffix[1] { + continue; + } + + let mut input = Vec::with_capacity(uin_ascii.len() + wxid.len()); + input.extend_from_slice(uin_ascii.as_bytes()); + input.extend_from_slice(&wxid); + let aes_hex = format!("{:x}", md5::compute(input)); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&aes_hex.as_bytes()[..16]); + if verify_aes_key(&aes_key, &templates) { + stop.store(true, Ordering::Relaxed); + let _ = tx.send(aes_key); + break; + } + } + }); + } + }); + drop(tx); + Ok(rx.try_iter().next()) +} + +fn hex_prefix_to_bytes(hex: &str) -> Result<[u8; 2]> { + if hex.len() != 4 { + bail!("wxid suffix 不是 4 位 hex: {}", hex); + } + let hi = u8::from_str_radix(&hex[..2], 16)?; + let lo = u8::from_str_radix(&hex[2..], 16)?; + Ok([hi, lo]) +} + +#[cfg(test)] +mod tests { + use super::{derive_key_for_paths, find_existing_kvcomm_dir}; + use super::collect_wxid_candidates; + use crate::attachment::image_key::normalize_wxid; + use aes::cipher::{generic_array::GenericArray, BlockEncrypt, KeyInit}; + use aes::Aes128; + use std::fs; + use std::path::Path; + + fn temp_dir(label: &str) -> std::path::PathBuf { + let mut dir = std::env::temp_dir(); + dir.push(format!( + "wx-cli-image-key-macos-{}-{:?}", + label, + std::thread::current().id() + )); + let _ = fs::remove_dir_all(&dir); + fs::create_dir_all(&dir).unwrap(); + dir + } + + fn write_v2_template(path: &Path, aes_key: &[u8; 16], xor_key: u8, plaintext: &[u8; 16]) { + let cipher = Aes128::new(aes_key.into()); + let mut block = GenericArray::clone_from_slice(plaintext); + cipher.encrypt_block(&mut block); + + let mut data = Vec::new(); + data.extend_from_slice(&crate::attachment::decoder::V2_MAGIC); + data.extend_from_slice(&0u32.to_le_bytes()); + data.extend_from_slice(&0u32.to_le_bytes()); + data.push(0); + data.extend_from_slice(&block); + data.push(0); + data.push(0xD9 ^ xor_key); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(path, data).unwrap(); + } + + #[test] + fn normalize_wxid_matches_expected_shapes() { + assert_eq!(normalize_wxid("wxid_abc_def"), "wxid_abc"); + assert_eq!(normalize_wxid("your_wxid_a1b2"), "your_wxid"); + assert_eq!(normalize_wxid("plain"), "plain"); + } + + #[test] + fn kvcomm_path_detection_works() { + let dir = temp_dir("kvcomm"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + let kvcomm = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm", + ); + fs::create_dir_all(&db_dir).unwrap(); + fs::create_dir_all(&kvcomm).unwrap(); + assert_eq!(find_existing_kvcomm_dir(&db_dir), Some(kvcomm)); + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn derives_key_via_kvcomm() { + let dir = temp_dir("via-kvcomm"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + let attach = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/msg/attach/chat/2026-05/Img", + ); + let kvcomm = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/app_data/net/kvcomm", + ); + fs::create_dir_all(&db_dir).unwrap(); + fs::create_dir_all(&kvcomm).unwrap(); + fs::write(kvcomm.join("key_42_x.statistic"), b"").unwrap(); + + let digest = format!("{:x}", md5::compute("42your_wxid")); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + write_v2_template( + &attach.join("sample_t.dat"), + &aes_key, + 42, + b"\xFF\xD8\xFFtemplate-001!", + ); + + let derived = derive_key_for_paths(&db_dir, db_dir.parent().unwrap().join("msg/attach").as_path()) + .unwrap(); + assert_eq!(derived.aes_key, aes_key); + assert_eq!(derived.xor_key, 42); + + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn derives_key_via_bruteforce_fallback() { + let dir = temp_dir("via-fallback"); + let suffix = format!("{:x}", md5::compute("42")) + .chars() + .take(4) + .collect::(); + let raw_wxid = format!("mywxid_{}", suffix); + let db_dir = dir.join(format!( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/{}/db_storage", + raw_wxid + )); + let attach = dir.join(format!( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/{}/msg/attach/chat/2026-05/Img", + raw_wxid + )); + fs::create_dir_all(&db_dir).unwrap(); + + let digest = format!("{:x}", md5::compute("42mywxid")); + let mut aes_key = [0u8; 16]; + aes_key.copy_from_slice(&digest.as_bytes()[..16]); + for idx in 0..3 { + write_v2_template( + &attach.join(format!("sample{}_t.dat", idx)), + &aes_key, + 42, + b"\xFF\xD8\xFFtemplate-001!", + ); + } + + let derived = derive_key_for_paths(&db_dir, db_dir.parent().unwrap().join("msg/attach").as_path()) + .unwrap(); + assert_eq!(derived.aes_key, aes_key); + assert_eq!(derived.xor_key, 42); + + let _ = fs::remove_dir_all(dir); + } + + #[test] + fn collects_raw_and_normalized_wxid() { + let dir = temp_dir("wxid"); + let db_dir = dir.join( + "Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/your_wxid_a1b2/db_storage", + ); + fs::create_dir_all(&db_dir).unwrap(); + let wxids = collect_wxid_candidates(&db_dir); + assert_eq!(wxids, vec!["your_wxid_a1b2".to_string(), "your_wxid".to_string()]); + let _ = fs::remove_dir_all(dir); + } +} diff --git a/src/attachment/image_key/mod.rs b/src/attachment/image_key/mod.rs new file mode 100644 index 0000000..74eee30 --- /dev/null +++ b/src/attachment/image_key/mod.rs @@ -0,0 +1,342 @@ +//! V2 image AES key 提取 — 平台相关。 +//! +//! 路径: +//! - macOS:磁盘派生(`key__*.statistic` 文件名拿 uin → `md5(str(uin) + wxid)[:16]`) +//! + brute-force fallback(`md5(str(uin))[:4] == wxid_suffix` 枚举 2^24) +//! - Windows:扫 `Weixin.exe` 内存,匹配 `[a-zA-Z0-9]{32}` 候选,按已知 AES ciphertext-block +//! 反验(`find_image_key.py` / `find_image_key.c` 已写实) +//! - Linux:上游空白;当前不实现,遇到 V2 .dat 返回 unsupported 错误 + +#[cfg(target_os = "linux")] +pub mod linux; +#[cfg(target_os = "macos")] +pub mod macos; +#[cfg(target_os = "windows")] +pub mod windows; + +use anyhow::Result; +use regex::bytes::Regex; +use std::collections::HashSet; +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::OnceLock; + +use crate::attachment::decoder::{detect_image_format, V2_MAGIC}; + +/// V2 图片真正需要的是两份材料: +/// - 16 字节 ASCII AES key +/// - XOR key(macOS 上来自 uin & 0xff,不是总能硬编码成 0x88) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ImageKeyMaterial { + pub aes_key: [u8; 16], + pub xor_key: u8, +} + +/// 单个 wxid 的 V2 image key 提取接口。 +/// +/// 实现者负责跨调用缓存(一台机器上同一 wxid 的 image key 在微信不重启时通常稳定)。 +pub trait ImageKeyProvider { + fn get_key(&self, wxid: &str) -> Result; + + fn get_aes_key(&self, wxid: &str) -> Result<[u8; 16]> { + Ok(self.get_key(wxid)?.aes_key) + } + + fn get_xor_key(&self, wxid: &str) -> Result { + Ok(self.get_key(wxid)?.xor_key) + } +} + +/// 平台默认实现。 +pub fn default_provider() -> Option> { + #[cfg(target_os = "macos")] + { + return Some(Box::new(macos::MacosImageKeyProvider::from_current_config())); + } + #[cfg(target_os = "windows")] + { + return Some(Box::new(windows::WindowsImageKeyProvider::from_current_config())); + } + #[cfg(target_os = "linux")] + { + return Some(Box::new(linux::LinuxImageKeyProvider)); + } + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] + { + None + } +} + +pub(crate) fn configured_db_dir_for_wxid(configured_db_dir: &Path, requested_wxid: &str) -> PathBuf { + if requested_wxid.trim().is_empty() { + return configured_db_dir.to_path_buf(); + } + + let configured_leaf = wxid_from_db_dir(configured_db_dir); + if let Some(leaf) = configured_leaf.as_deref() { + if same_wxid(leaf, requested_wxid) { + return configured_db_dir.to_path_buf(); + } + } + + xwechat_files_root(configured_db_dir) + .map(|root| root.join(requested_wxid).join("db_storage")) + .unwrap_or_else(|| configured_db_dir.to_path_buf()) +} + +pub(crate) fn wxid_from_db_dir(db_dir: &Path) -> Option { + let mut components = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()); + while let Some(component) = components.next() { + if component == "xwechat_files" { + return components.next(); + } + } + None +} + +pub(crate) fn xwechat_files_root(db_dir: &Path) -> Option { + let parts: Vec<_> = db_dir + .components() + .map(|component| component.as_os_str().to_string_lossy().into_owned()) + .collect(); + let idx = parts.iter().position(|part| part == "xwechat_files")?; + Some(join_components(&parts[..=idx])) +} + +pub(crate) fn normalize_wxid(raw: &str) -> String { + let raw = raw.trim(); + if raw.is_empty() { + return String::new(); + } + if let Some(stripped) = raw.strip_prefix("wxid_") { + let head = stripped.split('_').next().unwrap_or(stripped); + return format!("wxid_{}", head); + } + if let Some((base, suffix)) = raw.rsplit_once('_') { + if suffix.len() == 4 && suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) { + return base.to_string(); + } + } + raw.to_string() +} + +pub(crate) fn same_wxid(a: &str, b: &str) -> bool { + a == b || normalize_wxid(a) == normalize_wxid(b) +} + +pub(crate) fn join_components(parts: &[String]) -> PathBuf { + let mut out = if parts.first().map(|part| part.is_empty()).unwrap_or(false) { + PathBuf::from("/") + } else { + PathBuf::new() + }; + for part in parts { + if part.is_empty() { + continue; + } + out.push(part); + } + out +} + +pub(crate) fn attach_root_for_db_dir(db_dir: &Path) -> PathBuf { + db_dir + .parent() + .map(|base| base.join("msg").join("attach")) + .unwrap_or_else(|| PathBuf::from("msg/attach")) +} + +pub(crate) fn find_v2_template_ciphertexts( + attach_dir: &Path, + max_templates: usize, + max_files: usize, +) -> Result> { + if !attach_dir.is_dir() { + return Ok(Vec::new()); + } + + let mut out = collect_templates_with_suffix(attach_dir, "_t.dat", max_templates, max_files)?; + if out.is_empty() { + out = collect_templates_with_suffix(attach_dir, ".dat", max_templates, max_files)?; + } + Ok(out) +} + +pub(crate) fn derive_xor_key_from_v2_dat( + attach_dir: &Path, + sample: usize, + min_samples: usize, +) -> Result> { + if !attach_dir.is_dir() { + return Ok(None); + } + let mut votes = Vec::new(); + visit_files(attach_dir, &mut |path| -> Result { + let Some(name) = path.file_name().and_then(|value| value.to_str()) else { + return Ok(false); + }; + if !name.ends_with(".dat") { + return Ok(false); + } + + let meta = fs::metadata(path)?; + if meta.len() < 0x20 { + return Ok(false); + } + + let bytes = fs::read(path)?; + if bytes.starts_with(&V2_MAGIC) { + let last = *bytes.last().unwrap(); + votes.push(last ^ 0xD9); + if votes.len() >= sample { + return Ok(true); + } + } + Ok(false) + })?; + + if votes.len() < min_samples { + return Ok(None); + } + + let mut counts = [0usize; 256]; + for vote in &votes { + counts[*vote as usize] += 1; + } + let (xor_key, top_votes) = counts + .iter() + .enumerate() + .max_by_key(|(_, count)| *count) + .map(|(idx, count)| (idx as u8, *count)) + .expect("votes 非空"); + Ok(Some((xor_key, top_votes, votes.len()))) +} + +pub(crate) fn verify_aes_key(aes_key: &[u8; 16], templates: &[[u8; 16]]) -> bool { + !templates.is_empty() + && templates + .iter() + .all(|template| decrypt_template_block(aes_key, template).is_some()) +} + +pub(crate) fn ascii_alnum_candidates<'a>(buf: &'a [u8], len: usize) -> Vec<&'a [u8]> { + let re = match len { + 16 => regex16(), + 32 => regex32(), + _ => return Vec::new(), + }; + + re.find_iter(buf) + .filter_map(|matched| { + let start = matched.start(); + let end = matched.end(); + let left_ok = start == 0 || !buf[start - 1].is_ascii_alphanumeric(); + let right_ok = end == buf.len() || !buf[end].is_ascii_alphanumeric(); + (left_ok && right_ok).then_some(&buf[start..end]) + }) + .collect() +} + +fn collect_templates_with_suffix( + dir: &Path, + suffix: &str, + max_templates: usize, + max_files: usize, +) -> Result> { + let mut out = Vec::new(); + let mut seen = HashSet::new(); + let mut examined = 0usize; + visit_files(dir, &mut |path| -> Result { + let Some(name) = path.file_name().and_then(|value| value.to_str()) else { + return Ok(false); + }; + if !name.ends_with(suffix) { + return Ok(false); + } + examined += 1; + let bytes = fs::read(path)?; + if bytes.len() >= 0x1F && bytes.starts_with(&V2_MAGIC) { + let template: [u8; 16] = bytes[0x0F..0x1F].try_into().unwrap(); + if seen.insert(template) { + out.push(template); + if out.len() >= max_templates { + return Ok(true); + } + } + } + Ok(examined >= max_files && !out.is_empty()) + })?; + Ok(out) +} + +fn visit_files(dir: &Path, f: &mut F) -> Result +where + F: FnMut(&Path) -> Result, +{ + let mut entries: Vec = fs::read_dir(dir)? + .flatten() + .map(|entry| entry.path()) + .collect(); + entries.sort(); + + for path in entries { + if path.is_dir() { + if visit_files(&path, f)? { + return Ok(true); + } + continue; + } + if f(&path)? { + return Ok(true); + } + } + Ok(false) +} + +fn decrypt_template_block(aes_key: &[u8; 16], ciphertext: &[u8; 16]) -> Option<&'static str> { + use aes::cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit}; + + let cipher = aes::Aes128::new(aes_key.into()); + let mut block = GenericArray::clone_from_slice(ciphertext); + cipher.decrypt_block(&mut block); + let block: [u8; 16] = block.as_slice().try_into().ok()?; + let format = detect_image_format(&block); + (format != "bin").then_some(format) +} + +fn regex16() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"[A-Za-z0-9]{16}").unwrap()) +} + +fn regex32() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"[A-Za-z0-9]{32}").unwrap()) +} + +#[cfg(test)] +mod tests { + use super::{ascii_alnum_candidates, normalize_wxid, same_wxid}; + + #[test] + fn regex_candidates_respect_boundaries() { + let buf = b"xx 0123456789ABCDef yy"; + let hits = ascii_alnum_candidates(buf, 16); + assert_eq!(hits, vec![&buf[3..19]]); + } + + #[test] + fn regex_candidates_ignore_embedded_runs() { + let buf = b"x0123456789ABCDefz"; + assert!(ascii_alnum_candidates(buf, 16).is_empty()); + } + + #[test] + fn wxid_normalization_matches_expected_forms() { + assert_eq!(normalize_wxid("wxid_abc_def"), "wxid_abc"); + assert_eq!(normalize_wxid("your_wxid_a1b2"), "your_wxid"); + assert!(same_wxid("your_wxid_a1b2", "your_wxid")); + } +} diff --git a/src/attachment/image_key/windows.rs b/src/attachment/image_key/windows.rs new file mode 100644 index 0000000..0b7acd8 --- /dev/null +++ b/src/attachment/image_key/windows.rs @@ -0,0 +1,238 @@ +//! Windows V2 image AES key 提取。 +//! +//! 扫 `Weixin.exe` 进程内存,匹配模式 `[A-Za-z0-9]{32}` / `[A-Za-z0-9]{16}`, +//! 然后用 V2 模板 AES block 反验,控制 false positive。 + +use anyhow::{bail, Context, Result}; +use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; +use std::sync::Mutex; + +use windows::Win32::Foundation::{CloseHandle, HANDLE}; +use windows::Win32::System::Diagnostics::Debug::ReadProcessMemory; +use windows::Win32::System::Diagnostics::ToolHelp::{ + CreateToolhelp32Snapshot, Process32First, Process32Next, PROCESSENTRY32, TH32CS_SNAPPROCESS, +}; +use windows::Win32::System::Memory::{ + VirtualQueryEx, MEMORY_BASIC_INFORMATION, MEM_COMMIT, PAGE_EXECUTE_READWRITE, + PAGE_EXECUTE_WRITECOPY, PAGE_GUARD, PAGE_NOCACHE, PAGE_NOACCESS, PAGE_READWRITE, + PAGE_WRITECOMBINE, PAGE_WRITECOPY, +}; +use windows::Win32::System::Threading::{OpenProcess, PROCESS_QUERY_INFORMATION, PROCESS_VM_READ}; + +use crate::config; + +use super::{ + ascii_alnum_candidates, attach_root_for_db_dir, configured_db_dir_for_wxid, + derive_xor_key_from_v2_dat, find_v2_template_ciphertexts, verify_aes_key, ImageKeyMaterial, + ImageKeyProvider, +}; + +const CHUNK_SIZE: usize = 2 * 1024 * 1024; +const MAX_REGION_SIZE: usize = 50 * 1024 * 1024; + +pub struct WindowsImageKeyProvider { + configured_db_dir: Result, + cache: Mutex>, +} + +impl WindowsImageKeyProvider { + pub fn from_current_config() -> Self { + let configured_db_dir = config::load_config() + .map(|cfg| cfg.db_dir) + .map_err(|err| err.to_string()); + Self { + configured_db_dir, + cache: Mutex::new(HashMap::new()), + } + } +} + +impl ImageKeyProvider for WindowsImageKeyProvider { + fn get_key(&self, wxid: &str) -> Result { + let cache_key = wxid.trim().to_string(); + if let Some(found) = self.cache.lock().unwrap().get(&cache_key).copied() { + return Ok(found); + } + + let configured_db_dir = self + .configured_db_dir + .as_ref() + .map_err(|err| anyhow::anyhow!("读取 config.db_dir 失败: {}", err))?; + let db_dir = configured_db_dir_for_wxid(configured_db_dir, wxid); + let attach_dir = attach_root_for_db_dir(&db_dir); + let key = derive_key_for_paths(&attach_dir)?; + self.cache.lock().unwrap().insert(cache_key, key); + Ok(key) + } +} + +fn derive_key_for_paths(attach_dir: &std::path::Path) -> Result { + let templates = find_v2_template_ciphertexts(attach_dir, 3, 64)?; + if templates.is_empty() { + bail!("在 {} 下找不到 V2 模板文件", attach_dir.display()); + } + let xor_key = derive_xor_key_from_v2_dat(attach_dir, 10, 3)? + .map(|(key, _, _)| key) + .unwrap_or(0x88); + + let pid = find_wechat_pid().context("找不到 Weixin.exe 进程,请确认微信正在运行")?; + let process = unsafe { + OpenProcess(PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, false, pid) + .context("OpenProcess 失败,请以管理员权限运行")? + }; + + let aes_key = scan_memory_for_key(process, &templates); + unsafe { + let _ = CloseHandle(process); + } + + Ok(ImageKeyMaterial { + aes_key: aes_key?, + xor_key, + }) +} + +fn find_wechat_pid() -> Option { + let snapshot = unsafe { CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0).ok()? }; + let mut entry = PROCESSENTRY32 { + dwSize: std::mem::size_of::() as u32, + ..Default::default() + }; + + unsafe { + if Process32First(snapshot, &mut entry).is_err() { + let _ = CloseHandle(snapshot); + return None; + } + loop { + let name = + std::ffi::CStr::from_ptr(entry.szExeFile.as_ptr() as *const i8).to_string_lossy(); + if name.eq_ignore_ascii_case("Weixin.exe") { + let pid = entry.th32ProcessID; + let _ = CloseHandle(snapshot); + return Some(pid); + } + if Process32Next(snapshot, &mut entry).is_err() { + break; + } + } + let _ = CloseHandle(snapshot); + } + None +} + +fn scan_memory_for_key(process: HANDLE, templates: &[[u8; 16]]) -> Result<[u8; 16]> { + let mut seen = HashSet::<[u8; 16]>::new(); + let mut address = 0usize; + + loop { + let mut mbi = MEMORY_BASIC_INFORMATION::default(); + let ret = unsafe { + VirtualQueryEx( + process, + Some(address as *const _), + &mut mbi, + std::mem::size_of::(), + ) + }; + if ret == 0 { + break; + } + + let base = mbi.BaseAddress as usize; + let size = mbi.RegionSize; + if mbi.State == MEM_COMMIT && is_candidate_page(mbi.Protect.0) && size <= MAX_REGION_SIZE { + if let Some(aes_key) = scan_region(process, base, size, templates, &mut seen)? { + return Ok(aes_key); + } + } + + address = base.saturating_add(size); + if address == 0 { + break; + } + } + + bail!("Windows 进程内存里没有找到可验证的 V2 AES key") +} + +fn scan_region( + process: HANDLE, + base: usize, + size: usize, + templates: &[[u8; 16]], + seen: &mut HashSet<[u8; 16]>, +) -> Result> { + let overlap = 31usize; + let mut offset = 0usize; + + while offset < size { + let chunk_size = std::cmp::min(CHUNK_SIZE, size - offset); + let addr = base + offset; + let mut buf = vec![0u8; chunk_size]; + let mut bytes_read = 0usize; + + let ok = unsafe { + ReadProcessMemory( + process, + addr as *const _, + buf.as_mut_ptr() as *mut _, + chunk_size, + Some(&mut bytes_read), + ) + .is_ok() + }; + + if ok && bytes_read > 0 { + buf.truncate(bytes_read); + if let Some(key) = scan_candidate_buffer(&buf, templates, seen) { + return Ok(Some(key)); + } + } + + offset += if chunk_size > overlap { + chunk_size - overlap + } else { + chunk_size + }; + } + + Ok(None) +} + +fn scan_candidate_buffer( + buf: &[u8], + templates: &[[u8; 16]], + seen: &mut HashSet<[u8; 16]>, +) -> Option<[u8; 16]> { + for candidate in ascii_alnum_candidates(buf, 32) { + let mut key = [0u8; 16]; + key.copy_from_slice(&candidate[..16]); + if seen.insert(key) && verify_aes_key(&key, templates) { + return Some(key); + } + } + for candidate in ascii_alnum_candidates(buf, 16) { + let mut key = [0u8; 16]; + key.copy_from_slice(candidate); + if seen.insert(key) && verify_aes_key(&key, templates) { + return Some(key); + } + } + None +} + +fn is_candidate_page(protect: u32) -> bool { + if protect == PAGE_NOACCESS.0 || (protect & PAGE_GUARD.0) != 0 { + return false; + } + let base = protect & !(PAGE_GUARD.0 | PAGE_NOCACHE.0 | PAGE_WRITECOMBINE.0); + matches!( + base, + value if value == PAGE_READWRITE.0 + || value == PAGE_WRITECOPY.0 + || value == PAGE_EXECUTE_READWRITE.0 + || value == PAGE_EXECUTE_WRITECOPY.0 + ) +} diff --git a/src/attachment/mod.rs b/src/attachment/mod.rs new file mode 100644 index 0000000..43dd14e --- /dev/null +++ b/src/attachment/mod.rs @@ -0,0 +1,28 @@ +//! 聊天附件提取链路(图片 / 视频 / 语音 / 文件本体的本地解码) +//! +//! 整条链: +//! message_N.db (Msg_) → message_resource.db (ChatName2Id + MessageResourceInfo) +//! → packed_info protobuf md5 提取 → xwechat_files//msg/attach/.../Img/[_t|_h].dat +//! → magic 分发 (legacy XOR / V1 fixed-AES / V2 AES+XOR) → 写出实际图片 +//! +//! 模块切分: +//! - `attachment_id`:跨 IPC / CLI 的不透明 ID(base64url(json)) +//! - `resolver`:从 `attachment_id` 反查 message_resource.db,定位本地 .dat +//! - `decoder`:根据文件 magic 分发到具体解码器(V1 / V2 等) +//! - `image_key`:V2 image AES key 提取(macOS / Windows) +//! +//! V2 / image_key 模块由 codex 落地,先放空 stub 以便 V1 / resolver / CLI 不被 block。 + +// 此模块由分多个 PR/commit 增量启用: +// 1) 先落 attachment_id / decoder / resolver / image_key 骨架(本 commit) +// 2) IPC + CLI + daemon route 把它们串起来(后续 commit) +// 3) image_key 平台实现(codex 后续 commit) +// 在 step 1 完成、step 2 未到时,大量公开 API 仍未被引用,#[allow(dead_code)] 抑制噪音 +#![allow(dead_code)] + +pub mod attachment_id; +pub mod decoder; +pub mod resolver; +pub mod image_key; + +pub use attachment_id::{AttachmentId, AttachmentKind}; diff --git a/src/attachment/resolver.rs b/src/attachment/resolver.rs new file mode 100644 index 0000000..8db4f41 --- /dev/null +++ b/src/attachment/resolver.rs @@ -0,0 +1,439 @@ +//! 把 `AttachmentId` 翻译成本地 `.dat` 路径。 +//! +//! 流程: +//! 1. `chat` username → `ChatName2Id.rowid`(资源库) +//! 2. `(chat_id, local_id)` + `ORDER BY message_create_time DESC LIMIT 1` → +//! `MessageResourceInfo.packed_info` +//! 3. 从 `packed_info` (protobuf) 提取 32 字节 ASCII hex MD5 +//! 4. 在 `/msg/attach///Img/[_t|_h].dat` +//! 下找对应文件,按 full > _h > _t 优先级选一个 +//! +//! `` 由 daemon 已知(同 `db_dir` 的父目录),路径 layout 平台差异: +//! - Linux: `~/Documents/xwechat_files/` +//! - macOS: `~/Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files/` +//! ⚠️ msg/attach/... 子树 layout 待我用真实账号验证;上游 docstring 只写了 Windows +//! - Windows: `\xwechat_files\`(root 从 `%APPDATA%\Tencent\xwechat\config\*.ini` 读) + +use anyhow::{anyhow, Context, Result}; +use chrono::TimeZone; +use rusqlite::Connection; +use std::path::{Path, PathBuf}; + +use super::AttachmentId; + +/// 单条 attachment 在资源库 + 本地 attach 树下的解析结果。 +#[derive(Debug, Clone)] +pub struct ResolvedAttachment { + pub id: AttachmentId, + /// 从 `packed_info` 提取出的资源 MD5(小写 hex) + pub md5: String, + /// 命中的本地 .dat 路径(按 full > _h > _t 优先级选一个) + pub dat_path: PathBuf, + /// 文件 size(字节) + pub size: u64, +} + +/// 仅 schema lookup(不去找本地 .dat)。 +/// 用于 `wx attachments` 列表时填 `md5` 字段——文件可能根本不在本地。 +#[derive(Debug, Clone)] +pub struct AttachmentMetadata { + pub md5: String, +} + +/// 用 `(chat, local_id)` 查 message_resource.db 拿 file md5。 +/// +/// 调用方传已经解密好的 `message_resource.db` 路径(由 daemon 的 `DBCache` 准备)。 +/// 同步函数 — caller 在 `spawn_blocking` 里跑。 +pub fn lookup_md5_blocking( + resource_db_path: &Path, + chat: &str, + local_id: i64, + create_time: i64, + msg_local_type_lo32: i64, +) -> Result> { + let conn = Connection::open_with_flags( + resource_db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI, + ) + .with_context(|| format!("打开 message_resource.db {:?}", resource_db_path))?; + + // 1) ChatName2Id: user_name -> rowid + let chat_id: Option = conn + .query_row( + "SELECT rowid FROM ChatName2Id WHERE user_name = ?1", + [chat], + |row| row.get(0), + ) + .ok(); + let Some(chat_id) = chat_id else { + return Ok(None); + }; + + // 2) MessageResourceInfo: + // 同 chat 内 local_id 会复用,所以先用 create_time 精确命中; + // 若资源库里的时间戳跟 message_N.db 不完全对齐,再 fallback 到“同 local_id/type 取最新” + // message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型 + let packed_exact: Option> = conn + .query_row( + "SELECT packed_info FROM MessageResourceInfo + WHERE chat_id = ?1 + AND message_local_id = ?2 + AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3) + AND message_create_time = ?4 + ORDER BY rowid DESC + LIMIT 1", + rusqlite::params![chat_id, local_id, msg_local_type_lo32, create_time], + |row| row.get(0), + ) + .ok(); + + let packed: Option> = packed_exact.or_else(|| conn + .query_row( + "SELECT packed_info FROM MessageResourceInfo + WHERE chat_id = ?1 + AND message_local_id = ?2 + AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3) + ORDER BY message_create_time DESC + LIMIT 1", + rusqlite::params![chat_id, local_id, msg_local_type_lo32], + |row| row.get(0), + ) + .ok()); + + let Some(blob) = packed else { + return Ok(None); + }; + Ok(extract_md5_from_packed_info(&blob).map(|md5| AttachmentMetadata { md5 })) +} + +/// 从 `MessageResourceInfo.packed_info` (protobuf) 提取 32 字节 ASCII hex md5。 +/// +/// 主路径:搜 4 字节 marker `12 22 0a 20`(field=2 LEN, length=34, sub field=1 LEN, length=32), +/// 紧跟 32 字节 ASCII hex。 +/// Fallback:扫整个 blob 找连续 32 字节合法 hex 字符。 +pub fn extract_md5_from_packed_info(blob: &[u8]) -> Option { + const MARKER: &[u8; 4] = &[0x12, 0x22, 0x0A, 0x20]; + + // 主路径 + if let Some(pos) = find_subslice(blob, MARKER) { + let start = pos + MARKER.len(); + if start + 32 <= blob.len() { + if let Ok(s) = std::str::from_utf8(&blob[start..start + 32]) { + if s.chars().all(|c| c.is_ascii_hexdigit()) { + return Some(s.to_ascii_lowercase()); + } + } + } + } + + // Fallback:连续 32 字节合法 hex + if blob.len() >= 32 { + for start in 0..=blob.len() - 32 { + let chunk = &blob[start..start + 32]; + if let Ok(s) = std::str::from_utf8(chunk) { + if s.chars().all(|c| c.is_ascii_hexdigit()) { + return Some(s.to_ascii_lowercase()); + } + } + } + } + None +} + +/// 简单的子串扫描(避免拉 memchr/memmem 依赖;blob 通常 < 1KB) +fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() || needle.len() > haystack.len() { + return None; + } + haystack + .windows(needle.len()) + .position(|w| w == needle) +} + +/// 在 `///Img/[_t|_h].dat` 下找文件。 +/// +/// 优先级:full > `_h`(HD thumbnail)> `_t`(thumbnail)。返回最优的一个; +/// 找不到返回 None。 +/// +/// `attach_root` = `/msg/attach`。 +/// `create_time` 用于先定位 `` 子目录;找不到时再 fallback 全月份扫描, +/// 因为 WeChat 的 `YYYY-MM` 目录有时跟消息时间差 1 个月(按收到时间归档)。 +pub fn find_dat_file( + attach_root: &Path, + chat: &str, + file_md5: &str, + create_time: i64, +) -> Option { + let chat_hash = format!("{:x}", md5::compute(chat.as_bytes())); + let chat_dir = attach_root.join(&chat_hash); + if !chat_dir.is_dir() { + return None; + } + + // 第一步:试 create_time 当月 + 前后各一个月(共 3 个候选目录) + let candidates_ym: Vec = three_month_candidates(create_time); + for ym in &candidates_ym { + let img_dir = chat_dir.join(ym).join("Img"); + if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + return Some(p); + } + } + + // 第二步 fallback:扫整个 chat_dir 的所有月份子目录 + let entries = std::fs::read_dir(&chat_dir).ok()?; + let mut all_months: Vec = entries + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.is_dir()) + .collect(); + // 已经试过的 3 个候选可以跳过,但成本极小;保留全量扫 + all_months.sort(); + for month_dir in all_months { + let img_dir = month_dir.join("Img"); + if let Some(p) = pick_best_in_img_dir(&img_dir, file_md5) { + return Some(p); + } + } + None +} + +fn pick_best_in_img_dir(img_dir: &Path, file_md5: &str) -> Option { + if !img_dir.is_dir() { + return None; + } + let full = img_dir.join(format!("{}.dat", file_md5)); + if full.is_file() { + return Some(full); + } + let hd = img_dir.join(format!("{}_h.dat", file_md5)); + if hd.is_file() { + return Some(hd); + } + let thumb = img_dir.join(format!("{}_t.dat", file_md5)); + if thumb.is_file() { + return Some(thumb); + } + None +} + +fn three_month_candidates(unix_ts: i64) -> Vec { + use chrono::{Datelike, Duration}; + let dt = match chrono::Local.timestamp_opt(unix_ts, 0).single() { + Some(d) => d, + None => return Vec::new(), + }; + let prev = dt - Duration::days(31); + let next = dt + Duration::days(31); + [prev, dt, next] + .iter() + .map(|d| format!("{:04}-{:02}", d.year(), d.month())) + .collect() +} + +/// 把 `` (即 `db_storage` 父目录)拼成 `/msg/attach`。 +pub fn attach_root_for(wxchat_base: &Path) -> PathBuf { + wxchat_base.join("msg").join("attach") +} + +/// 完整流程:用 `attachment_id` 拿 md5 + 找 .dat。失败返回带具体诊断信息的 `Err`。 +/// +/// `resource_db_path` 由 daemon 提供(DBCache 已经解密好); +/// `attach_root` 由 caller 拼好(`attach_root_for(wxchat_base)`)。 +/// 同步函数 — caller 在 `spawn_blocking` 里跑。 +pub fn resolve_blocking( + id: &AttachmentId, + resource_db_path: &Path, + attach_root: &Path, +) -> Result { + let lo32_type: i64 = match id.kind { + super::AttachmentKind::Image => 3, + super::AttachmentKind::Voice => 34, + super::AttachmentKind::Video => 43, + super::AttachmentKind::File => 49, + }; + + let meta = lookup_md5_blocking( + resource_db_path, + &id.chat, + id.local_id, + id.create_time, + lo32_type, + )? + .ok_or_else(|| { + anyhow!( + "message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)", + id.chat, + id.local_id, + lo32_type + ) + })?; + + let dat_path = find_dat_file(attach_root, &id.chat, &meta.md5, id.create_time).ok_or_else( + || { + anyhow!( + "找不到本地 .dat(md5={} chat={} create_time={})— 微信可能尚未下载该附件,或附件已被清理", + meta.md5, + id.chat, + id.create_time + ) + }, + )?; + let size = std::fs::metadata(&dat_path).map(|m| m.len()).unwrap_or(0); + + Ok(ResolvedAttachment { id: id.clone(), md5: meta.md5, dat_path, size }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extract_md5_main_path() { + // 构造一段含 12 22 0a 20 marker 的 blob + let mut blob = vec![0xAA, 0xBB, 0xCC]; + blob.extend_from_slice(&[0x12, 0x22, 0x0A, 0x20]); + blob.extend_from_slice(b"deadbeefcafebabe1234567890abcdef"); + blob.extend_from_slice(&[0xFF, 0xFF]); + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("deadbeefcafebabe1234567890abcdef".to_string()) + ); + } + + #[test] + fn extract_md5_fallback_no_marker() { + // 没有 marker,但 blob 里有合法 32 字节 hex + let mut blob = vec![0xFF, 0x00]; + blob.extend_from_slice(b"00112233445566778899aabbccddeeff"); + blob.extend_from_slice(&[0x01]); + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("00112233445566778899aabbccddeeff".to_string()) + ); + } + + #[test] + fn extract_md5_uppercase_normalized_to_lower() { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"DEADBEEFCAFEBABE1234567890ABCDEF"); + // 上游/CI/本地 file md5 都是 lowercase;强制小写化避免大小写不一致导致命中失败 + assert_eq!( + extract_md5_from_packed_info(&blob), + Some("deadbeefcafebabe1234567890abcdef".to_string()) + ); + } + + #[test] + fn extract_md5_returns_none_on_garbage() { + let blob = vec![0; 16]; + assert!(extract_md5_from_packed_info(&blob).is_none()); + } + + #[test] + fn lookup_md5_prefers_exact_create_time_over_latest_reuse() { + let dir = tempdir_for_test(); + let db_path = dir.join("message_resource.db"); + let conn = Connection::open(&db_path).unwrap(); + conn.execute( + "CREATE TABLE ChatName2Id (user_name TEXT)", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO ChatName2Id (rowid, user_name) VALUES (1, 'room@chatroom')", + [], + ) + .unwrap(); + conn.execute( + "CREATE TABLE MessageResourceInfo ( + chat_id INTEGER, + message_local_id INTEGER, + message_local_type INTEGER, + message_create_time INTEGER, + packed_info BLOB + )", + [], + ) + .unwrap(); + + let old_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"11111111111111111111111111111111"); + blob + }; + let new_blob = { + let mut blob = vec![0x12, 0x22, 0x0A, 0x20]; + blob.extend_from_slice(b"22222222222222222222222222222222"); + blob + }; + + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 1000i64, old_blob], + ) + .unwrap(); + conn.execute( + "INSERT INTO MessageResourceInfo + (chat_id, message_local_id, message_local_type, message_create_time, packed_info) + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![1i64, 7i64, 3i64, 2000i64, new_blob], + ) + .unwrap(); + + let old = lookup_md5_blocking(&db_path, "room@chatroom", 7, 1000, 3) + .unwrap() + .unwrap(); + let new = lookup_md5_blocking(&db_path, "room@chatroom", 7, 2000, 3) + .unwrap() + .unwrap(); + assert_eq!(old.md5, "11111111111111111111111111111111"); + assert_eq!(new.md5, "22222222222222222222222222222222"); + } + + #[test] + fn three_month_candidates_includes_prev_curr_next() { + // 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09 + let ts = chrono::Local + .with_ymd_and_hms(2025, 8, 15, 12, 0, 0) + .unwrap() + .timestamp(); + let v = three_month_candidates(ts); + assert!(v.contains(&"2025-07".to_string())); + assert!(v.contains(&"2025-08".to_string())); + assert!(v.contains(&"2025-09".to_string())); + } + + #[test] + fn pick_best_prefers_full_then_h_then_t() { + let tmp = tempdir_for_test(); + let img = tmp.join("Img"); + std::fs::create_dir_all(&img).unwrap(); + let md5 = "abcd1234"; + std::fs::write(img.join(format!("{}_t.dat", md5)), b"thumb").unwrap(); + std::fs::write(img.join(format!("{}_h.dat", md5)), b"hd").unwrap(); + // 只有 _t / _h 时取 _h + assert_eq!( + pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + format!("{}_h.dat", md5).as_str() + ); + // 加 full 后取 full + std::fs::write(img.join(format!("{}.dat", md5)), b"full").unwrap(); + assert_eq!( + pick_best_in_img_dir(&img, md5).unwrap().file_name().unwrap(), + format!("{}.dat", md5).as_str() + ); + } + + fn tempdir_for_test() -> PathBuf { + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let p = std::env::temp_dir().join(format!("wx-cli-attach-test-{}-{}", pid, nanos)); + std::fs::create_dir_all(&p).unwrap(); + p + } +} diff --git a/src/cli/attachments.rs b/src/cli/attachments.rs new file mode 100644 index 0000000..662c256 --- /dev/null +++ b/src/cli/attachments.rs @@ -0,0 +1,42 @@ +use anyhow::Result; + +use crate::ipc::Request; +use super::history::{parse_time, parse_time_end}; +use super::output::{print_value, resolve}; +use super::transport; + +/// `wx attachments` — 列出指定会话的附件消息(默认 image,可多选)。 +/// +/// 输出每条 `attachment_id`,再传给 `wx extract` 才真正读 message_resource.db +/// 与本地 .dat 解码。这一步只查 `Msg_` 表,几千条群聊也能秒返。 +pub fn cmd_attachments( + chat: String, + kinds: Vec, + limit: usize, + offset: usize, + since: Option, + until: Option, + json: bool, +) -> Result<()> { + let since_ts = since.as_deref().map(parse_time).transpose()?; + let until_ts = until.as_deref().map(parse_time_end).transpose()?; + + // CLI 收上来的 Vec 为空时按默认(image)走,让 daemon 决定 fallback。 + let kinds_param = if kinds.is_empty() { None } else { Some(kinds) }; + + let req = Request::Attachments { + chat, + kinds: kinds_param, + limit, + offset, + since: since_ts, + until: until_ts, + }; + let resp = transport::send(req)?; + let data = resp + .data + .get("attachments") + .cloned() + .unwrap_or(serde_json::Value::Array(vec![])); + print_value(&data, &resolve(json)) +} diff --git a/src/cli/biz_articles.rs b/src/cli/biz_articles.rs new file mode 100644 index 0000000..0c74874 --- /dev/null +++ b/src/cli/biz_articles.rs @@ -0,0 +1,30 @@ +use anyhow::Result; +use crate::ipc::Request; +use super::history::{parse_time, parse_time_end}; +use super::transport; +use super::output::{resolve, print_value}; + +pub fn cmd_biz_articles( + limit: usize, + account: Option, + since: Option, + until: Option, + unread: bool, + json: bool, +) -> Result<()> { + let since_ts = since.as_deref().map(parse_time).transpose()?; + let until_ts = until.as_deref().map(parse_time_end).transpose()?; + + let req = Request::BizArticles { + limit, + account, + since: since_ts, + until: until_ts, + unread, + }; + let resp = transport::send(req)?; + let data = resp.data.get("articles") + .cloned() + .unwrap_or(serde_json::Value::Array(vec![])); + print_value(&data, &resolve(json)) +} diff --git a/src/cli/daemon_cmd.rs b/src/cli/daemon_cmd.rs index 31b0792..ded6827 100644 --- a/src/cli/daemon_cmd.rs +++ b/src/cli/daemon_cmd.rs @@ -1,7 +1,7 @@ -use anyhow::Result; -use crate::config; -use crate::cli::DaemonCommands; use crate::cli::transport; +use crate::cli::DaemonCommands; +use crate::config; +use anyhow::Result; pub fn cmd_daemon(cmd: DaemonCommands) -> Result<()> { match cmd { @@ -15,7 +15,13 @@ fn cmd_status() -> Result<()> { if transport::is_alive() { let pid_path = config::pid_path(); let pid = std::fs::read_to_string(&pid_path) - .map(|s| s.trim().to_string()) + .map(|s| { + serde_json::from_str::(&s) + .ok() + .and_then(|v| v.get("pid").and_then(|p| p.as_u64())) + .map(|pid| pid.to_string()) + .unwrap_or_else(|| s.trim().to_string()) + }) .unwrap_or_else(|_| "?".into()); println!("wx-daemon 运行中 (PID {})", pid); } else { @@ -25,42 +31,13 @@ fn cmd_status() -> Result<()> { } fn cmd_stop() -> Result<()> { - let pid_path = config::pid_path(); - if !pid_path.exists() { + if !transport::is_alive() { println!("daemon 未运行"); return Ok(()); } - let pid_str = std::fs::read_to_string(&pid_path)?; - let pid: u32 = pid_str.trim().parse() - .map_err(|_| anyhow::anyhow!("PID 文件格式错误"))?; - - #[cfg(unix)] - { - let ret = unsafe { libc::kill(pid as libc::pid_t, libc::SIGTERM) }; - if ret != 0 { - let errno = std::io::Error::last_os_error().raw_os_error().unwrap_or(0); - if errno == libc::ESRCH { - println!("wx-daemon (PID {}) 已不在运行,清理残留文件", pid); - } else { - anyhow::bail!("发送 SIGTERM 失败 (errno {})", errno); - } - } else { - println!("已停止 wx-daemon (PID {})", pid); - } - } - - #[cfg(windows)] - { - std::process::Command::new("taskkill") - .args(["/PID", &pid.to_string(), "/F"]) - .output()?; - println!("已停止 wx-daemon (PID {})", pid); - } - - let _ = std::fs::remove_file(config::sock_path()); - let _ = std::fs::remove_file(&pid_path); - + transport::stop_daemon()?; + println!("已停止 wx-daemon"); Ok(()) } @@ -89,19 +66,25 @@ fn cmd_logs(follow: bool, lines: usize) -> Result<()> { file.read_to_string(&mut content)?; let all_lines: Vec<&str> = content.lines().collect(); let show = &all_lines[all_lines.len().saturating_sub(lines)..]; - for line in show { println!("{}", line); } + for line in show { + println!("{}", line); + } loop { std::thread::sleep(std::time::Duration::from_millis(500)); let mut buf = String::new(); file.read_to_string(&mut buf)?; - if !buf.is_empty() { print!("{}", buf); } + if !buf.is_empty() { + print!("{}", buf); + } } } } else { let content = std::fs::read_to_string(&log_path)?; let all_lines: Vec<&str> = content.lines().collect(); let show = &all_lines[all_lines.len().saturating_sub(lines)..]; - for line in show { println!("{}", line); } + for line in show { + println!("{}", line); + } } Ok(()) diff --git a/src/cli/extract.rs b/src/cli/extract.rs new file mode 100644 index 0000000..a0eba0d --- /dev/null +++ b/src/cli/extract.rs @@ -0,0 +1,25 @@ +use anyhow::Result; + +use crate::ipc::Request; +use super::output::{print_value, resolve}; +use super::transport; + +/// `wx extract` — 把单个 `attachment_id` 对应的资源解密写到指定路径。 +/// +/// daemon 端:解析 `attachment_id` → 查 `message_resource.db` 拿 file md5 → +/// 在 `/msg/attach/...` 找 .dat → 按 magic 分发到 v1/v2 解码器 → +/// 写出真实图片/文件。 +pub fn cmd_extract( + attachment_id: String, + output: String, + overwrite: bool, + json: bool, +) -> Result<()> { + let req = Request::Extract { + attachment_id, + output, + overwrite, + }; + let resp = transport::send(req)?; + print_value(&resp.data, &resolve(json)) +} diff --git a/src/cli/init.rs b/src/cli/init.rs index ece6af0..d7553b7 100644 --- a/src/cli/init.rs +++ b/src/cli/init.rs @@ -91,6 +91,10 @@ pub fn cmd_init(force: bool) -> Result<()> { std::fs::write(&config_path, serde_json::to_string_pretty(&cfg)?) .context("写入 config.json 失败")?; println!("配置已保存: {}", config_path.display()); + + // init 之后必须停掉旧 daemon(它用的是旧 config),下次调用会自动重启 + let _ = crate::cli::transport::stop_daemon(); + println!("初始化完成,可以使用 wx sessions / wx history 等命令了"); Ok(()) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 3a28060..2ec2476 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,4 +1,7 @@ mod init; +pub mod attachments; +pub mod biz_articles; +pub mod extract; pub mod sessions; pub mod history; pub mod search; @@ -220,6 +223,27 @@ enum Commands { #[arg(long)] json: bool, }, + /// 查询公众号文章推送(本地缓存) + BizArticles { + /// 显示数量 + #[arg(short = 'n', long, default_value = "50")] + limit: usize, + /// 限定公众号(名称模糊匹配) + #[arg(long)] + account: Option, + /// 起始时间 YYYY-MM-DD + #[arg(long)] + since: Option, + /// 结束时间 YYYY-MM-DD + #[arg(long)] + until: Option, + /// 只看有未读的公众号,每个公众号取最新 1 篇 + #[arg(long)] + unread: bool, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, /// 朋友圈全文搜索:匹配正文关键词 SnsSearch { /// 关键词 @@ -240,6 +264,44 @@ enum Commands { #[arg(long)] json: bool, }, + /// 列出某会话的图片附件,返回不透明 attachment_id + Attachments { + /// 会话名称(联系人显示名 / wxid / @chatroom username 都可以) + chat: String, + /// 类型(当前仅支持 image) + #[arg(long = "kind", value_name = "KIND", + value_parser = ["image", "img"])] + kinds: Vec, + /// 显示数量 + #[arg(short = 'n', long, default_value = "50")] + limit: usize, + /// 分页偏移 + #[arg(long, default_value = "0")] + offset: usize, + /// 起始时间 YYYY-MM-DD + #[arg(long)] + since: Option, + /// 结束时间 YYYY-MM-DD + #[arg(long)] + until: Option, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, + /// 把单个 attachment_id 对应的资源解密写到指定文件路径 + Extract { + /// 由 `wx attachments` 输出的不透明 ID(base64url 字符串) + attachment_id: String, + /// 输出文件路径(绝对或相对当前工作目录均可;扩展名建议保留为 .jpg 等) + #[arg(short = 'o', long)] + output: String, + /// 目标已存在时覆盖 + #[arg(long)] + overwrite: bool, + /// 输出 JSON(默认 YAML) + #[arg(long)] + json: bool, + }, /// 管理 wx-daemon Daemon { #[command(subcommand)] @@ -304,6 +366,15 @@ fn dispatch(cli: Cli) -> Result<()> { Commands::SnsSearch { keyword, limit, since, until, user, json } => { sns_search::cmd_sns_search(keyword, limit, since, until, user, json) } + Commands::BizArticles { limit, account, since, until, unread, json } => { + biz_articles::cmd_biz_articles(limit, account, since, until, unread, json) + } + Commands::Attachments { chat, kinds, limit, offset, since, until, json } => { + attachments::cmd_attachments(chat, kinds, limit, offset, since, until, json) + } + Commands::Extract { attachment_id, output, overwrite, json } => { + extract::cmd_extract(attachment_id, output, overwrite, json) + } Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd), } } diff --git a/src/cli/transport.rs b/src/cli/transport.rs index ab62da5..23c3e18 100644 --- a/src/cli/transport.rs +++ b/src/cli/transport.rs @@ -1,50 +1,32 @@ use anyhow::{bail, Context, Result}; +use serde::{Deserialize, Serialize}; use std::io::{BufRead, BufReader, Write}; +use std::path::{Path, PathBuf}; use std::time::Duration; use crate::config; use crate::ipc::{Request, Response}; const STARTUP_TIMEOUT_SECS: u64 = 15; +#[cfg(unix)] +const STOP_TIMEOUT_MS: u64 = 2_000; + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct PidFile { + pid: u32, + #[serde(default)] + exe: Option, +} /// 检查 daemon 是否存活 pub fn is_alive() -> bool { #[cfg(unix)] { - use std::os::unix::net::UnixStream; - let sock_path = config::sock_path(); - if !sock_path.exists() { - return false; - } - let mut stream = match UnixStream::connect(&sock_path) { - Ok(s) => s, - Err(_) => return false, - }; - stream.set_read_timeout(Some(Duration::from_secs(2))).ok(); - stream.set_write_timeout(Some(Duration::from_secs(2))).ok(); - - let req = serde_json::json!({"cmd": "ping"}); - if write!(stream, "{}\n", req).is_err() { - return false; - } - let mut line = String::new(); - let mut reader = BufReader::new(&stream); - if reader.read_line(&mut line).is_err() { - return false; - } - serde_json::from_str::(&line) - .ok() - .and_then(|v| v.get("pong").and_then(|p| p.as_bool())) - .unwrap_or(false) + ping_unix().unwrap_or(false) } #[cfg(windows)] { - use interprocess::local_socket::{prelude::*, GenericNamespaced, Stream}; - // 必须用 interprocess 自己的连接 API,和 server 保持一致 - match "wx-cli-daemon".to_ns_name::() { - Ok(name) => Stream::connect(name).is_ok(), - Err(_) => false, - } + ping_windows().unwrap_or(false) } #[cfg(not(any(unix, windows)))] { @@ -62,6 +44,39 @@ pub fn ensure_daemon() -> Result<()> { Ok(()) } +/// 停止 daemon(如果正在运行) +pub fn stop_daemon() -> Result<()> { + let pid_path = config::pid_path(); + let pid_file = read_pid_file(&pid_path)?; + let daemon_alive = is_alive(); + + match pid_file { + Some(pid_file) => { + let belongs = pid_belongs_to_daemon(&pid_file)?; + if daemon_alive && !belongs { + bail!( + "daemon 正在运行,但 {} 指向的 PID {} 无法确认属于当前 wx-daemon", + pid_path.display(), + pid_file.pid + ); + } + if belongs { + terminate_pid(pid_file.pid)?; + } + } + None if daemon_alive => { + bail!( + "daemon 正在运行,但 {} 缺失或损坏,无法安全停止", + pid_path.display() + ); + } + None => {} + } + + cleanup_ipc_files(); + Ok(()) +} + /// 启动 daemon 前检查 `~/.wx-cli/` 可写,给出比"超时"更明确的错误。 /// /// 典型坑:旧版本 `sudo wx init` 把目录留成 root 属主,非 root 的 daemon @@ -98,6 +113,7 @@ fn preflight_cli_dir_writable() -> Result<()> { /// 启动 daemon 进程(自身二进制,设置 WX_DAEMON_MODE=1) fn start_daemon() -> Result<()> { let exe = std::env::current_exe().context("无法获取当前可执行文件路径")?; + let child_pid: u32; // 预检:当前用户是否能写 ~/.wx-cli/。如果不能,给出可操作的错误信息, // 而不是 spawn 一个注定失败的 daemon 然后超时 15s。 @@ -113,7 +129,8 @@ fn start_daemon() -> Result<()> { let _ = std::fs::create_dir_all(parent); } let (stdout_stdio, stderr_stdio) = std::fs::OpenOptions::new() - .create(true).append(true) + .create(true) + .append(true) .open(&log_path) .and_then(|f| f.try_clone().map(|g| (f, g))) .map(|(f, g)| (std::process::Stdio::from(f), std::process::Stdio::from(g))) @@ -124,8 +141,14 @@ fn start_daemon() -> Result<()> { .stdout(stdout_stdio) .stderr(stderr_stdio); // SAFETY: setsid() 在 fork 后的子进程中调用,使 daemon 脱离控制终端 - unsafe { cmd.pre_exec(|| { libc::setsid(); Ok(()) }); } - let _ = cmd.spawn().context("无法启动 daemon 进程")?; + unsafe { + cmd.pre_exec(|| { + libc::setsid(); + Ok(()) + }); + } + let child = cmd.spawn().context("无法启动 daemon 进程")?; + child_pid = child.id(); } #[cfg(windows)] @@ -136,12 +159,13 @@ fn start_daemon() -> Result<()> { let _ = std::fs::create_dir_all(parent); } let (stdout_stdio, stderr_stdio) = std::fs::OpenOptions::new() - .create(true).append(true) + .create(true) + .append(true) .open(&log_path) .and_then(|f| f.try_clone().map(|g| (f, g))) .map(|(f, g)| (std::process::Stdio::from(f), std::process::Stdio::from(g))) .unwrap_or_else(|_| (std::process::Stdio::null(), std::process::Stdio::null())); - let _ = std::process::Command::new(&exe) + let child = std::process::Command::new(&exe) .env("WX_DAEMON_MODE", "1") .stdin(std::process::Stdio::null()) .stdout(stdout_stdio) @@ -149,6 +173,7 @@ fn start_daemon() -> Result<()> { .creation_flags(0x00000008) // DETACHED_PROCESS .spawn() .context("无法启动 daemon 进程")?; + child_pid = child.id(); } // 等待 daemon 就绪(最多 STARTUP_TIMEOUT_SECS 秒) @@ -156,6 +181,7 @@ fn start_daemon() -> Result<()> { while std::time::Instant::now() < deadline { std::thread::sleep(Duration::from_millis(300)); if is_alive() { + write_pid_file(child_pid, &exe)?; return Ok(()); } } @@ -167,6 +193,233 @@ fn start_daemon() -> Result<()> { ) } +fn write_pid_file(pid: u32, exe: &Path) -> Result<()> { + if let Some(parent) = config::pid_path().parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("创建 {} 失败", parent.display()))?; + } + let pid_file = PidFile { + pid, + exe: Some(exe.to_path_buf()), + }; + let content = serde_json::to_string(&pid_file)?; + std::fs::write(config::pid_path(), content) + .with_context(|| format!("写入 {} 失败", config::pid_path().display()))?; + Ok(()) +} + +fn read_pid_file(path: &Path) -> Result> { + let content = match std::fs::read_to_string(path) { + Ok(content) => content, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err).with_context(|| format!("读取 {} 失败", path.display())), + }; + if let Ok(pid_file) = serde_json::from_str::(&content) { + return Ok(Some(pid_file)); + } + if let Ok(pid) = content.trim().parse::() { + return Ok(Some(PidFile { + pid, + exe: std::env::current_exe().ok(), + })); + } + bail!("{} 不是合法的 PID 文件", path.display()) +} + +fn cleanup_ipc_files() { + let _ = std::fs::remove_file(config::sock_path()); + let _ = std::fs::remove_file(config::pid_path()); +} + +#[cfg(unix)] +fn ping_unix() -> Result { + use std::os::unix::net::UnixStream; + let sock_path = config::sock_path(); + if !sock_path.exists() { + return Ok(false); + } + let mut stream = UnixStream::connect(&sock_path)?; + stream.set_read_timeout(Some(Duration::from_secs(2))).ok(); + stream.set_write_timeout(Some(Duration::from_secs(2))).ok(); + + let req = serde_json::to_string(&Request::Ping)? + "\n"; + stream.write_all(req.as_bytes())?; + + let mut line = String::new(); + let mut reader = BufReader::new(&stream); + reader.read_line(&mut line)?; + + let resp: Response = serde_json::from_str(&line)?; + Ok(resp.ok && resp.data.get("pong").and_then(|p| p.as_bool()) == Some(true)) +} + +#[cfg(windows)] +fn ping_windows() -> Result { + use interprocess::local_socket::{prelude::*, GenericNamespaced, Stream}; + + let name = "wx-cli-daemon".to_ns_name::()?; + let stream = Stream::connect(name)?; + let mut reader = BufReader::new(stream); + + let req = serde_json::to_string(&Request::Ping)? + "\n"; + reader.get_mut().write_all(req.as_bytes())?; + + let mut line = String::new(); + reader.read_line(&mut line)?; + + let resp: Response = serde_json::from_str(&line)?; + Ok(resp.ok && resp.data.get("pong").and_then(|p| p.as_bool()) == Some(true)) +} + +fn pid_belongs_to_daemon(pid_file: &PidFile) -> Result { + let expected_exe = pid_file + .exe + .clone() + .or_else(|| std::env::current_exe().ok()); + #[cfg(unix)] + { + unix_pid_matches_daemon(pid_file.pid, expected_exe.as_deref()) + } + #[cfg(windows)] + { + windows_pid_matches_daemon(pid_file.pid, expected_exe.as_deref()) + } + #[cfg(not(any(unix, windows)))] + { + let _ = expected_exe; + Ok(true) + } +} + +#[cfg(unix)] +fn unix_pid_matches_daemon(pid: u32, expected_exe: Option<&Path>) -> Result { + let Some(expected_exe) = expected_exe else { + return Ok(false); + }; + let output = std::process::Command::new("ps") + .args(["-o", "command=", "-p", &pid.to_string()]) + .output() + .with_context(|| format!("读取 PID {} 的 command 失败", pid))?; + if !output.status.success() { + return Ok(false); + } + let command = String::from_utf8_lossy(&output.stdout); + let expected = expected_exe.to_string_lossy(); + if command.contains(expected.as_ref()) { + return Ok(true); + } + let Some(exe_name) = expected_exe.file_name().and_then(|name| name.to_str()) else { + return Ok(false); + }; + Ok(command + .split_whitespace() + .any(|part| part == exe_name || part.ends_with(&format!("/{}", exe_name)))) +} + +#[cfg(windows)] +fn windows_pid_matches_daemon(pid: u32, expected_exe: Option<&Path>) -> Result { + use windows::core::PWSTR; + use windows::Win32::Foundation::CloseHandle; + use windows::Win32::System::Threading::{ + OpenProcess, QueryFullProcessImageNameW, PROCESS_NAME_FORMAT, + PROCESS_QUERY_LIMITED_INFORMATION, + }; + + let Some(expected_exe) = expected_exe else { + return Ok(false); + }; + let handle = match unsafe { OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, false, pid) } { + Ok(handle) => handle, + Err(_) => return Ok(false), + }; + + let mut buf = vec![0u16; 260]; + let mut len = buf.len() as u32; + let actual = unsafe { + let result = QueryFullProcessImageNameW( + handle, + PROCESS_NAME_FORMAT(0), + PWSTR(buf.as_mut_ptr()), + &mut len, + ); + let _ = CloseHandle(handle); + result + }; + if actual.is_err() { + return Ok(false); + } + + let actual_path = PathBuf::from(String::from_utf16_lossy(&buf[..len as usize])); + Ok(normalize_exe_path(&actual_path) == normalize_exe_path(expected_exe)) +} + +#[cfg(windows)] +fn normalize_exe_path(path: &Path) -> String { + path.to_string_lossy() + .replace('\\', "/") + .to_ascii_lowercase() +} + +fn terminate_pid(pid: u32) -> Result<()> { + #[cfg(unix)] + { + terminate_pid_unix(pid) + } + #[cfg(windows)] + { + terminate_pid_windows(pid) + } + #[cfg(not(any(unix, windows)))] + { + let _ = pid; + Ok(()) + } +} + +#[cfg(unix)] +fn terminate_pid_unix(pid: u32) -> Result<()> { + let rc = unsafe { libc::kill(pid as i32, libc::SIGTERM) }; + if rc != 0 { + let err = std::io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::ESRCH) { + return Ok(()); + } + bail!("停止 PID {} 失败: {}", pid, err); + } + + let deadline = std::time::Instant::now() + Duration::from_millis(STOP_TIMEOUT_MS); + while std::time::Instant::now() < deadline { + if !unix_process_exists(pid) { + return Ok(()); + } + std::thread::sleep(Duration::from_millis(50)); + } + + bail!("等待 PID {} 退出超时", pid) +} + +#[cfg(unix)] +fn unix_process_exists(pid: u32) -> bool { + let rc = unsafe { libc::kill(pid as i32, 0) }; + if rc == 0 { + return true; + } + let err = std::io::Error::last_os_error(); + err.raw_os_error() == Some(libc::EPERM) +} + +#[cfg(windows)] +fn terminate_pid_windows(pid: u32) -> Result<()> { + let status = std::process::Command::new("taskkill") + .args(["/F", "/PID", &pid.to_string()]) + .status() + .with_context(|| format!("执行 taskkill /PID {} 失败", pid))?; + if !status.success() { + bail!("停止 PID {} 失败: taskkill exit {:?}", pid, status.code()); + } + Ok(()) +} + /// 向 daemon 发送请求并返回响应 pub fn send(req: Request) -> Result { ensure_daemon()?; @@ -189,10 +442,11 @@ pub fn send(req: Request) -> Result { fn send_unix(req: Request) -> Result { use std::os::unix::net::UnixStream; let sock_path = config::sock_path(); - let mut stream = UnixStream::connect(&sock_path) - .context("连接 daemon socket 失败")?; + let mut stream = UnixStream::connect(&sock_path).context("连接 daemon socket 失败")?; stream.set_read_timeout(Some(Duration::from_secs(120))).ok(); - stream.set_write_timeout(Some(Duration::from_secs(120))).ok(); + stream + .set_write_timeout(Some(Duration::from_secs(120))) + .ok(); let req_str = serde_json::to_string(&req)? + "\n"; stream.write_all(req_str.as_bytes())?; @@ -201,8 +455,7 @@ fn send_unix(req: Request) -> Result { let mut reader = BufReader::new(&stream); reader.read_line(&mut line)?; - let resp: Response = serde_json::from_str(&line) - .context("解析 daemon 响应失败")?; + let resp: Response = serde_json::from_str(&line).context("解析 daemon 响应失败")?; if !resp.ok { bail!("{}", resp.error.as_deref().unwrap_or("未知错误")); @@ -215,10 +468,10 @@ fn send_unix(req: Request) -> Result { fn send_windows(req: Request) -> Result { use interprocess::local_socket::{prelude::*, GenericNamespaced, Stream}; - let name = "wx-cli-daemon".to_ns_name::() + let name = "wx-cli-daemon" + .to_ns_name::() .context("构造 pipe name 失败")?; - let stream = Stream::connect(name) - .context("连接 daemon named pipe 失败")?; + let stream = Stream::connect(name).context("连接 daemon named pipe 失败")?; // interprocess::Stream 同时实现 Read + Write,但需要拆分读写端 let mut reader = BufReader::new(stream); @@ -229,8 +482,7 @@ fn send_windows(req: Request) -> Result { let mut line = String::new(); reader.read_line(&mut line)?; - let resp: Response = serde_json::from_str(&line) - .context("解析 daemon 响应失败")?; + let resp: Response = serde_json::from_str(&line).context("解析 daemon 响应失败")?; if !resp.ok { bail!("{}", resp.error.as_deref().unwrap_or("未知错误")); diff --git a/src/config.rs b/src/config.rs index 55a03ca..f74fda3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -11,38 +11,50 @@ pub struct Config { pub wechat_process: String, } -/// 从 /config.json 或 $HOME/.wx-cli/config.json 加载配置 +/// 从当前工作目录 / / $HOME/.wx-cli 加载配置 pub fn load_config() -> Result { let config_path = find_config_file()?; let content = std::fs::read_to_string(&config_path) .with_context(|| format!("读取 config.json 失败: {}", config_path.display()))?; - let raw: serde_json::Value = serde_json::from_str(&content) - .with_context(|| "config.json 格式错误")?; + let raw: serde_json::Value = + serde_json::from_str(&content).with_context(|| "config.json 格式错误")?; - let db_dir = raw.get("db_dir") + let db_dir = raw + .get("db_dir") .and_then(|v| v.as_str()) .map(PathBuf::from) .unwrap_or_else(default_db_dir); let base_dir = config_path.parent().unwrap_or(Path::new(".")); - let keys_file = raw.get("keys_file") + let keys_file = raw + .get("keys_file") .and_then(|v| v.as_str()) .map(|s| { let p = PathBuf::from(s); - if p.is_absolute() { p } else { base_dir.join(p) } + if p.is_absolute() { + p + } else { + base_dir.join(p) + } }) .unwrap_or_else(|| base_dir.join("all_keys.json")); - let decrypted_dir = raw.get("decrypted_dir") + let decrypted_dir = raw + .get("decrypted_dir") .and_then(|v| v.as_str()) .map(|s| { let p = PathBuf::from(s); - if p.is_absolute() { p } else { base_dir.join(p) } + if p.is_absolute() { + p + } else { + base_dir.join(p) + } }) .unwrap_or_else(|| base_dir.join("decrypted")); - let wechat_process = raw.get("wechat_process") + let wechat_process = raw + .get("wechat_process") .and_then(|v| v.as_str()) .unwrap_or(default_wechat_process()) .to_string(); @@ -56,40 +68,97 @@ pub fn load_config() -> Result { } fn find_config_file() -> Result { - // 1. 优先查找可执行文件同目录 - if let Ok(exe) = std::env::current_exe() { - if let Some(dir) = exe.parent() { - let p = dir.join("config.json"); - if p.exists() { - return Ok(p); - } - } + let cwd_dir = std::env::current_dir().ok(); + let exe_dir = std::env::current_exe() + .ok() + .and_then(|exe| exe.parent().map(PathBuf::from)); + let cli_home = cli_home_dir(); + let home_dir = (cli_home != PathBuf::from("/tmp")).then_some(cli_home.as_path()); + + if let Some(path) = find_existing_config_path(cwd_dir.as_deref(), exe_dir.as_deref(), home_dir) + { + return Ok(path); } - // 2. 当前工作目录 - let cwd = std::env::current_dir().unwrap_or_default().join("config.json"); - if cwd.exists() { - return Ok(cwd); - } - // 3. ~/.wx-cli/config.json - if let Some(home) = dirs::home_dir() { - let p = home.join(".wx-cli").join("config.json"); - if p.exists() { - return Ok(p); - } - } - // 返回默认路径(可能不存在,调用方负责处理) - if let Ok(exe) = std::env::current_exe() { - if let Some(dir) = exe.parent() { - return Ok(dir.join("config.json")); - } - } - Ok(PathBuf::from("config.json")) + + Ok(default_config_path( + cwd_dir.as_deref(), + exe_dir.as_deref(), + home_dir, + )) +} + +fn find_existing_config_path( + cwd_dir: Option<&Path>, + exe_dir: Option<&Path>, + home_dir: Option<&Path>, +) -> Option { + let candidates = [ + cwd_dir.map(config_path_in_dir), + exe_dir.map(config_path_in_dir), + home_dir.map(home_config_path), + ]; + candidates.into_iter().flatten().find(|path| path.exists()) +} + +fn default_config_path( + cwd_dir: Option<&Path>, + exe_dir: Option<&Path>, + home_dir: Option<&Path>, +) -> PathBuf { + cwd_dir + .map(config_path_in_dir) + .or_else(|| exe_dir.map(config_path_in_dir)) + .or_else(|| home_dir.map(home_config_path)) + .unwrap_or_else(|| PathBuf::from("config.json")) +} + +fn config_path_in_dir(dir: &Path) -> PathBuf { + dir.join("config.json") +} + +fn home_config_path(home_dir: &Path) -> PathBuf { + home_dir.join(".wx-cli").join("config.json") } pub fn cli_dir() -> PathBuf { - dirs::home_dir() - .unwrap_or_else(|| PathBuf::from("/tmp")) - .join(".wx-cli") + cli_home_dir().join(".wx-cli") +} + +fn cli_home_dir() -> PathBuf { + resolve_cli_home( + dirs::home_dir().unwrap_or_else(|| PathBuf::from("/tmp")), + sudo_user_home_dir(), + ) +} + +fn resolve_cli_home(default_home: PathBuf, sudo_home: Option) -> PathBuf { + sudo_home.unwrap_or(default_home) +} + +#[cfg(unix)] +fn sudo_user_home_dir() -> Option { + use std::ffi::{CStr, CString}; + + let sudo_user = std::env::var("SUDO_USER").ok()?; + let sudo_user = sudo_user.trim(); + if sudo_user.is_empty() { + return None; + } + + let c_user = CString::new(sudo_user).ok()?; + unsafe { + let pwd = libc::getpwnam(c_user.as_ptr()); + if pwd.is_null() || (*pwd).pw_dir.is_null() { + return None; + } + let dir = CStr::from_ptr((*pwd).pw_dir).to_str().ok()?; + Some(PathBuf::from(dir)) + } +} + +#[cfg(not(unix))] +fn sudo_user_home_dir() -> Option { + None } pub fn sock_path() -> PathBuf { @@ -127,8 +196,7 @@ fn default_db_dir() -> PathBuf { } #[cfg(target_os = "windows")] { - PathBuf::from(std::env::var("APPDATA").unwrap_or_default()) - .join("Tencent/xwechat") + PathBuf::from(std::env::var("APPDATA").unwrap_or_default()).join("Tencent/xwechat") } #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] { @@ -138,13 +206,21 @@ fn default_db_dir() -> PathBuf { fn default_wechat_process() -> &'static str { #[cfg(target_os = "macos")] - { "WeChat" } + { + "WeChat" + } #[cfg(target_os = "linux")] - { "wechat" } + { + "wechat" + } #[cfg(target_os = "windows")] - { "Weixin.exe" } + { + "Weixin.exe" + } #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] - { "WeChat" } + { + "WeChat" + } } /// 自动检测微信 db_storage 目录 @@ -154,17 +230,7 @@ pub fn auto_detect_db_dir() -> Option { #[cfg(target_os = "macos")] fn detect_db_dir_impl() -> Option { - let home = dirs::home_dir()?; - // 支持 sudo 环境 - let home = if let Ok(sudo_user) = std::env::var("SUDO_USER") { - if !sudo_user.is_empty() { - PathBuf::from("/Users").join(&sudo_user) - } else { - home - } - } else { - home - }; + let home = sudo_user_home_dir().or_else(dirs::home_dir)?; let base = home.join("Library/Containers/com.tencent.xinWeChat/Data/Documents/xwechat_files"); if !base.exists() { @@ -190,9 +256,7 @@ fn detect_db_dir_impl() -> Option { #[cfg(target_os = "linux")] fn detect_db_dir_impl() -> Option { let home = dirs::home_dir()?; - let sudo_home = std::env::var("SUDO_USER").ok() - .filter(|s| !s.is_empty()) - .map(|u| PathBuf::from("/home").join(u)); + let sudo_home = sudo_user_home_dir(); let mut candidates: Vec = Vec::new(); for base_home in [Some(home.clone()), sudo_home].into_iter().flatten() { @@ -213,13 +277,36 @@ fn detect_db_dir_impl() -> Option { } } candidates.sort_by_key(|p| { - std::fs::metadata(p) - .and_then(|m| m.modified()) - .unwrap_or(std::time::SystemTime::UNIX_EPOCH) + // 排序:取 db_storage 目录下所有 .db 文件的最新 mtime,而非目录自身的 mtime + // 这样当收到新消息时(只有 .db 文件被更新),能正确识别最新目录 + latest_db_mtime(p).unwrap_or(std::time::SystemTime::UNIX_EPOCH) }); candidates.into_iter().next_back() } +#[cfg(any(target_os = "linux", target_os = "windows"))] +/// 递归查找 db_storage 目录下所有 .db 文件的最新 mtime +fn latest_db_mtime(dir: &Path) -> Option { + let mut latest = None; + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + let mtime = if path.is_dir() { + latest_db_mtime(&path).unwrap_or(std::time::SystemTime::UNIX_EPOCH) + } else if path.extension().and_then(|s| s.to_str()) == Some("db") { + entry + .metadata() + .and_then(|m| m.modified()) + .unwrap_or(std::time::SystemTime::UNIX_EPOCH) + } else { + continue; + }; + latest = Some(latest.map_or(mtime, |cur| if mtime > cur { mtime } else { cur })); + } + } + latest +} + #[cfg(target_os = "windows")] fn detect_db_dir_impl() -> Option { let appdata = std::env::var("APPDATA").ok()?; @@ -235,8 +322,7 @@ fn detect_db_dir_impl() -> Option { if let Ok(content) = std::fs::read_to_string(&path) { let data_root = content.trim().to_string(); if PathBuf::from(&data_root).is_dir() { - let pattern = PathBuf::from(&data_root) - .join("xwechat_files"); + let pattern = PathBuf::from(&data_root).join("xwechat_files"); if let Ok(entries2) = std::fs::read_dir(&pattern) { for entry2 in entries2.flatten() { let storage = entry2.path().join("db_storage"); @@ -250,10 +336,77 @@ fn detect_db_dir_impl() -> Option { } } } - candidates.into_iter().next() + candidates.sort_by_key(|p| latest_db_mtime(p).unwrap_or(std::time::SystemTime::UNIX_EPOCH)); + candidates.into_iter().next_back() } #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] fn detect_db_dir_impl() -> Option { None } + +#[cfg(test)] +mod tests { + use super::{ + config_path_in_dir, default_config_path, find_existing_config_path, home_config_path, + resolve_cli_home, + }; + use std::fs; + use std::path::PathBuf; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(name: &str) -> PathBuf { + let unique = format!( + "wx-cli-config-test-{}-{}-{}", + name, + std::process::id(), + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() + ); + let dir = std::env::temp_dir().join(unique); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn resolve_cli_home_prefers_sudo_home_when_present() { + let home = resolve_cli_home(PathBuf::from("/root"), Some(PathBuf::from("/Users/alice"))); + assert_eq!(home, PathBuf::from("/Users/alice")); + } + + #[test] + fn resolve_cli_home_falls_back_to_default_home() { + let home = resolve_cli_home(PathBuf::from("/root"), None); + assert_eq!(home, PathBuf::from("/root")); + } + + #[test] + fn config_path_prefers_cwd_over_exe_and_home() { + let cwd = temp_dir("cwd"); + let exe = temp_dir("exe"); + let home = temp_dir("home"); + fs::write(config_path_in_dir(&cwd), "{}").unwrap(); + fs::write(config_path_in_dir(&exe), "{}").unwrap(); + fs::create_dir_all(home.join(".wx-cli")).unwrap(); + fs::write(home_config_path(&home), "{}").unwrap(); + + let path = find_existing_config_path(Some(&cwd), Some(&exe), Some(&home)).unwrap(); + assert_eq!(path, config_path_in_dir(&cwd)); + + fs::remove_dir_all(cwd).unwrap(); + fs::remove_dir_all(exe).unwrap(); + fs::remove_dir_all(home).unwrap(); + } + + #[test] + fn default_config_path_matches_init_write_order() { + let cwd = PathBuf::from("/tmp/cwd"); + let exe = PathBuf::from("/tmp/exe"); + let home = PathBuf::from("/tmp/home"); + + let path = default_config_path(Some(&cwd), Some(&exe), Some(&home)); + assert_eq!(path, cwd.join("config.json")); + } +} diff --git a/src/crypto/mod.rs b/src/crypto/mod.rs index e5407b5..da074e7 100644 --- a/src/crypto/mod.rs +++ b/src/crypto/mod.rs @@ -1,9 +1,9 @@ pub mod wal; -use anyhow::{bail, Result}; use aes::Aes256; -use cbc::Decryptor; +use anyhow::{bail, Result}; use cbc::cipher::{BlockDecryptMut, KeyIvInit}; +use cbc::Decryptor; use std::io::{Read, Write}; use std::path::Path; @@ -65,11 +65,8 @@ fn aes_cbc_decrypt(key: &[u8; 32], iv: &[u8; 16], data: &[u8]) -> Result bail!("密文长度不是 AES 块大小的倍数: {}", data.len()); } // 将 &[u8] 复制为 Block 数组,避免 unsafe from_raw_parts_mut - let mut blocks: Vec = data.chunks_exact(16) - .map(Block::clone_from_slice) - .collect(); - Aes256CbcDec::new(key.into(), iv.into()) - .decrypt_blocks_mut(&mut blocks); + let mut blocks: Vec = data.chunks_exact(16).map(Block::clone_from_slice).collect(); + Aes256CbcDec::new(key.into(), iv.into()).decrypt_blocks_mut(&mut blocks); Ok(blocks.iter().flat_map(|b| b.iter().copied()).collect()) } @@ -92,15 +89,101 @@ pub fn full_decrypt(db_path: &Path, out_path: &Path, enc_key: &[u8; 32]) -> Resu let mut page_buf = vec![0u8; PAGE_SZ]; for pgno in 1..=total_pages { - let n = input.read(&mut page_buf)?; - if n == 0 { break; } - // 不足一页则补零 - if n < PAGE_SZ { - page_buf[n..].fill(0); - } + let page_start = (pgno - 1) * PAGE_SZ; + let bytes_remaining = file_size.saturating_sub(page_start); + read_page(&mut input, &mut page_buf, bytes_remaining)?; let dec = decrypt_page(enc_key, &page_buf, pgno as u32)?; output.write_all(&dec)?; } Ok(()) } + +fn read_page( + input: &mut impl Read, + page_buf: &mut [u8], + bytes_remaining: usize, +) -> std::io::Result { + let expected = bytes_remaining.min(PAGE_SZ); + input.read_exact(&mut page_buf[..expected])?; + if expected < PAGE_SZ { + page_buf[expected..].fill(0); + } + Ok(expected) +} + +#[cfg(test)] +mod tests { + use super::{read_page, PAGE_SZ}; + use std::io::{self, Read}; + + struct ChunkedReader { + chunks: Vec>, + chunk_idx: usize, + offset: usize, + } + + impl ChunkedReader { + fn new(chunks: Vec>) -> Self { + Self { + chunks, + chunk_idx: 0, + offset: 0, + } + } + } + + impl Read for ChunkedReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if self.chunk_idx >= self.chunks.len() { + return Ok(0); + } + let chunk = &self.chunks[self.chunk_idx]; + let remaining = &chunk[self.offset..]; + let n = remaining.len().min(buf.len()); + buf[..n].copy_from_slice(&remaining[..n]); + self.offset += n; + if self.offset == chunk.len() { + self.chunk_idx += 1; + self.offset = 0; + } + Ok(n) + } + } + + #[test] + fn read_page_reads_across_short_chunks() { + let mut reader = ChunkedReader::new(vec![vec![1; 32], vec![2; PAGE_SZ - 32]]); + let mut page_buf = vec![0u8; PAGE_SZ]; + + let n = read_page(&mut reader, &mut page_buf, PAGE_SZ).unwrap(); + + assert_eq!(n, PAGE_SZ); + assert_eq!(page_buf[0], 1); + assert_eq!(page_buf[31], 1); + assert_eq!(page_buf[32], 2); + assert_eq!(page_buf[PAGE_SZ - 1], 2); + } + + #[test] + fn read_page_zero_pads_last_partial_page() { + let mut reader = ChunkedReader::new(vec![vec![7; 8], vec![9; 4]]); + let mut page_buf = vec![0u8; PAGE_SZ]; + + let n = read_page(&mut reader, &mut page_buf, 12).unwrap(); + + assert_eq!(n, 12); + assert_eq!(&page_buf[..8], &[7; 8]); + assert_eq!(&page_buf[8..12], &[9; 4]); + assert!(page_buf[12..].iter().all(|&b| b == 0)); + } + + #[test] + fn read_page_errors_on_early_eof() { + let mut reader = ChunkedReader::new(vec![vec![1; 8]]); + let mut page_buf = vec![0u8; PAGE_SZ]; + + let err = read_page(&mut reader, &mut page_buf, 16).unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof); + } +} diff --git a/src/daemon/cache.rs b/src/daemon/cache.rs index 5ad8a14..813f717 100644 --- a/src/daemon/cache.rs +++ b/src/daemon/cache.rs @@ -30,6 +30,7 @@ struct CacheEntry { pub struct DbCache { db_dir: PathBuf, cache_dir: PathBuf, + mtime_file: PathBuf, all_keys: HashMap, // rel_key -> enc_key(hex) inner: Arc>>, } @@ -39,21 +40,36 @@ impl DbCache { db_dir: PathBuf, all_keys: HashMap, ) -> Result { - let cache_dir = config::cache_dir(); + Self::with_dirs(db_dir, config::cache_dir(), config::mtime_file(), all_keys).await + } + + /// 注入 `cache_dir` / `mtime_file`(测试用 + 生产 `new()` 复用) + pub(crate) async fn with_dirs( + db_dir: PathBuf, + cache_dir: PathBuf, + mtime_file: PathBuf, + all_keys: HashMap, + ) -> Result { tokio::fs::create_dir_all(&cache_dir).await?; - let inner: HashMap = HashMap::new(); let cache = DbCache { db_dir, cache_dir, + mtime_file, all_keys, - inner: Arc::new(Mutex::new(inner)), + inner: Arc::new(Mutex::new(HashMap::new())), }; cache.load_persistent().await; Ok(cache) } + /// 数据库根目录(即 `/db_storage`)。 + /// 上层(attachment resolver)需要 `db_dir.parent()` 来定位 `msg/attach/...` 解密图片。 + pub fn db_dir(&self) -> &Path { + &self.db_dir + } + fn cache_file_path(&self, rel_key: &str) -> PathBuf { let hash = format!("{:x}", md5::compute(rel_key.as_bytes())); self.cache_dir.join(format!("{}.db", hash)) @@ -106,7 +122,7 @@ impl DbCache { /// 从持久化文件加载 mtime 记录,复用未过期的解密文件 async fn load_persistent(&self) { - let mtime_file = config::mtime_file(); + let mtime_file = &self.mtime_file; let content = match tokio::fs::read_to_string(&mtime_file).await { Ok(c) => c, Err(_) => return, @@ -127,12 +143,17 @@ impl DbCache { let wal_path = wal_path_for(&db_path); let db_mt = mtime_nanos(&db_path); - let wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; + let _wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; - if db_mt == entry.db_mt && wal_mt == entry.wal_mt { + // 只要主 .db 没变,就把 cached 产物载回来。 + // 如果 WAL mtime 变了,后续 `get()` 会自动走 Path 2:在已有 cached DB 上增量 apply_wal, + // 而不是 daemon 重启后第一条请求又退回全量解密。 + if db_mt == entry.db_mt { inner.insert(rel_key.clone(), CacheEntry { db_mtime: db_mt, - wal_mtime: wal_mt, + // 保留"cached 产物构建时看到的 wal_mtime",让 `get()` 去比较当前 WAL + // 是否发生了变化,从而决定 exact-hit 还是 WAL 增量。 + wal_mtime: entry.wal_mt, decrypted_path: dec_path, }); reused += 1; @@ -145,7 +166,7 @@ impl DbCache { /// 持久化 mtime 记录 async fn save_persistent(&self) { - let mtime_file = config::mtime_file(); + let mtime_file = &self.mtime_file; let inner = self.inner.lock().await; let data: HashMap = inner.iter().map(|(k, v)| { (k.clone(), MtimeEntry { @@ -163,7 +184,14 @@ impl DbCache { /// 获取解密后的数据库路径 /// - /// 如果 mtime 未变,直接返回缓存路径;否则重新解密 + /// 三种命中路径: + /// 1. 主 `.db` 和 WAL mtime 都未变 → 直接返回缓存路径 + /// 2. 主 `.db` 未变、WAL mtime 变了 → 在已有 cached 产物上**增量** `apply_wal` + /// (apply_wal 是幂等的:旧帧 redo 同样的 page 写入,新帧追加生效;不重新 full_decrypt) + /// 3. 主 `.db` mtime 变了 → 重新 `full_decrypt` + `apply_wal` + /// + /// WeChat 在写消息时只 append WAL(除非触发 checkpoint),因此 path 2 是常态; + /// 这条路径把"每次请求都全量解密 ~1.8GB DB(~120s)"压到"只解 WAL 帧(典型 < 10s)"。 pub async fn get(&self, rel_key: &str) -> Result> { let enc_key_hex = match self.all_keys.get(rel_key) { Some(k) => k.clone(), @@ -179,28 +207,53 @@ impl DbCache { } let wal_path = wal_path_for(&db_path); - let db_mt = mtime_nanos(&db_path); let wal_mt = if wal_path.exists() { mtime_nanos(&wal_path) } else { 0 }; - // 检查缓存 - { + let cached = { let inner = self.inner.lock().await; - if let Some(entry) = inner.get(rel_key) { - if entry.db_mtime == db_mt - && entry.wal_mtime == wal_mt - && entry.decrypted_path.exists() - { - return Ok(Some(entry.decrypted_path.clone())); - } - } - } + inner.get(rel_key).cloned() + }; - // 需要重新解密 - let out_path = self.cache_file_path(rel_key); let enc_key_bytes = hex_to_32bytes(&enc_key_hex) .with_context(|| format!("密钥格式错误: {}", rel_key))?; + // Path 1 / Path 2:主 .db mtime 未变且 cached 产物仍在 + if let Some(entry) = cached.as_ref() { + if entry.db_mtime == db_mt && entry.decrypted_path.exists() { + if entry.wal_mtime == wal_mt { + return Ok(Some(entry.decrypted_path.clone())); + } + + // Path 2: WAL-only 变化 → 在 cached 产物上重新 apply_wal + // 不存在的 WAL 也要更新 wal_mtime=0(虽然 SQLite 不会自发"主库不变 + WAL 清空") + let out_path = entry.decrypted_path.clone(); + let t0 = std::time::Instant::now(); + if wal_path.exists() { + let out_path2 = out_path.clone(); + let wal_path2 = wal_path.clone(); + let key_copy = enc_key_bytes; + tokio::task::spawn_blocking(move || { + wal::apply_wal(&wal_path2, &out_path2, &key_copy) + }).await??; + } + eprintln!("[cache] WAL 增量 {} ({}ms)", rel_key, t0.elapsed().as_millis()); + + { + let mut inner = self.inner.lock().await; + inner.insert(rel_key.to_string(), CacheEntry { + db_mtime: db_mt, + wal_mtime: wal_mt, + decrypted_path: out_path.clone(), + }); + } + self.save_persistent().await; + return Ok(Some(out_path)); + } + } + + // Path 3: 主 .db 变了 / 缓存 miss → 全量解密 + let out_path = self.cache_file_path(rel_key); let t0 = std::time::Instant::now(); let db_path2 = db_path.clone(); let out_path2 = out_path.clone(); @@ -209,7 +262,6 @@ impl DbCache { crypto::full_decrypt(&db_path2, &out_path2, &key_copy) }).await??; - // 应用 WAL if wal_path.exists() { let out_path3 = out_path.clone(); let wal_path3 = wal_path.clone(); @@ -219,10 +271,8 @@ impl DbCache { }).await??; } - let elapsed_ms = t0.elapsed().as_millis(); - eprintln!("[cache] 解密 {} ({}ms)", rel_key, elapsed_ms); + eprintln!("[cache] 全量解密 {} ({}ms)", rel_key, t0.elapsed().as_millis()); - // 更新内存缓存 { let mut inner = self.inner.lock().await; inner.insert(rel_key.to_string(), CacheEntry { @@ -262,3 +312,231 @@ fn hex_to_32bytes(s: &str) -> Result<[u8; 32]> { } Ok(out) } + +#[cfg(test)] +mod tests { + use super::*; + + /// 64 字符 hex(不需要是真 SQLCipher key — 仅用来证明"是否触发了 full_decrypt") + const FAKE_KEY_HEX: &str = + "0000000000000000000000000000000000000000000000000000000000000000"; + + /// 路径区分约定: + /// - 完全 hit / WAL 增量 → `decrypted_path` **内容不变** + /// - 全量解密 → `crypto::full_decrypt` 把 cached file **重写为 PAGE_SZ 倍数** + /// (fake key 解出 4096 字节垃圾,但仍写入 — 不验证内容合法性) + /// 因此用 cached file 的"size 是否被改"来判断走了哪条路径。 + const ORIGINAL_CACHED_BYTES: &[u8] = b"original cached contents"; + + fn unique_tmpdir(tag: &str) -> PathBuf { + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let p = std::env::temp_dir().join(format!("wx-cli-cache-test-{}-{}-{}", tag, pid, nanos)); + std::fs::create_dir_all(&p).unwrap(); + p + } + + /// 准备一份 "DbCache 已经 reuse 了 cached 解密产物" 的初始状态。 + /// 返回 (cache, db_path, decrypted_path, mtime_file, rel_key)。 + async fn setup_seeded_cache(tag: &str) -> (DbCache, PathBuf, PathBuf, PathBuf, String) { + let root = unique_tmpdir(tag); + let db_dir = root.join("db_storage"); + let cache_dir = root.join("cache"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let rel_key = "message_0.db".to_string(); + let db_path = db_dir.join(&rel_key); + std::fs::write(&db_path, b"fake encrypted db").unwrap(); + + let cached_hash = format!("{:x}", md5::compute(rel_key.as_bytes())); + let decrypted_path = cache_dir.join(format!("{}.db", cached_hash)); + std::fs::write(&decrypted_path, ORIGINAL_CACHED_BYTES).unwrap(); + + let db_mt = mtime_nanos(&db_path); + let mtime_file = cache_dir.join("_mtimes.json"); + let payload = serde_json::to_string(&serde_json::json!({ + &rel_key: { + "db_mt": db_mt, + "wal_mt": 0u64, + "path": decrypted_path.display().to_string(), + } + })) + .unwrap(); + std::fs::write(&mtime_file, payload).unwrap(); + + let mut all_keys = HashMap::new(); + all_keys.insert(rel_key.clone(), FAKE_KEY_HEX.to_string()); + let cache = DbCache::with_dirs(db_dir, cache_dir, mtime_file.clone(), all_keys) + .await + .unwrap(); + + (cache, db_path, decrypted_path, mtime_file, rel_key) + } + + #[tokio::test] + async fn exact_mtime_hit_skips_decrypt() { + let (cache, _db_path, decrypted_path, _mtime_file, rel_key) = + setup_seeded_cache("exact").await; + + let p = cache.get(&rel_key).await.unwrap().expect("cache should hit"); + assert_eq!(p, decrypted_path); + + // 完全 hit → cached file 内容不应被改 + let body = std::fs::read(&decrypted_path).unwrap(); + assert_eq!(body, ORIGINAL_CACHED_BYTES); + } + + #[tokio::test] + async fn wal_only_change_uses_incremental_path() { + // 自己构造(不走 setup_seeded_cache)以便初始 mtime.json 同时写 db_mt 和 wal_mt + let root = unique_tmpdir("walonly"); + let db_dir = root.join("db_storage"); + let cache_dir = root.join("cache"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let rel_key = "message_0.db".to_string(); + let db_path = db_dir.join(&rel_key); + std::fs::write(&db_path, b"fake encrypted db").unwrap(); + + let wal_path = wal_path_for(&db_path); + std::fs::write(&wal_path, [0u8; 31]).unwrap(); // ≤ WAL_HDR_SZ=32 → apply_wal noop + + let cached_hash = format!("{:x}", md5::compute(rel_key.as_bytes())); + let decrypted_path = cache_dir.join(format!("{}.db", cached_hash)); + std::fs::write(&decrypted_path, ORIGINAL_CACHED_BYTES).unwrap(); + + let db_mt = mtime_nanos(&db_path); + let wal_mt0 = mtime_nanos(&wal_path); + let mtime_file = cache_dir.join("_mtimes.json"); + let payload = serde_json::to_string(&serde_json::json!({ + &rel_key: { + "db_mt": db_mt, + "wal_mt": wal_mt0, + "path": decrypted_path.display().to_string(), + } + })) + .unwrap(); + std::fs::write(&mtime_file, payload).unwrap(); + + let mut all_keys = HashMap::new(); + all_keys.insert(rel_key.clone(), FAKE_KEY_HEX.to_string()); + let cache = DbCache::with_dirs(db_dir, cache_dir, mtime_file, all_keys) + .await + .unwrap(); + + // 第一次:完全 hit + let p1 = cache.get(&rel_key).await.unwrap().expect("first get hits"); + assert_eq!(p1, decrypted_path); + assert_eq!(std::fs::read(&decrypted_path).unwrap(), ORIGINAL_CACHED_BYTES); + + // bump WAL mtime(重写仍 31 bytes,apply_wal 仍 noop) + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&wal_path, [0xffu8; 31]).unwrap(); + let wal_mt1 = mtime_nanos(&wal_path); + assert_ne!(wal_mt0, wal_mt1, "rewriting WAL should bump mtime"); + + // 第二次:WAL 增量路径 + // 如果错误地走 full_decrypt → cached file 大小会被重写为 ≥ PAGE_SZ + let p2 = cache + .get(&rel_key) + .await + .unwrap() + .expect("WAL-incremental path should produce path"); + assert_eq!(p2, decrypted_path); + + let body = std::fs::read(&decrypted_path).unwrap(); + assert_eq!( + body, ORIGINAL_CACHED_BYTES, + "WAL-incremental should NOT rewrite cached file" + ); + } + + #[tokio::test] + async fn db_mtime_change_triggers_full_decrypt() { + let (cache, db_path, decrypted_path, _mtime_file, rel_key) = + setup_seeded_cache("dbchange").await; + + // bump 主 .db 的 mtime(重写一份不同 bytes) + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&db_path, b"different fake encrypted bytes").unwrap(); + assert_ne!( + mtime_nanos(&db_path), + cache.inner.lock().await.get(&rel_key).unwrap().db_mtime, + "rewriting db file should bump mtime" + ); + + // 走 full_decrypt 路径 → fake key 不会让 full_decrypt 失败(它不验证内容), + // 但会把 cached file 重写为 PAGE_SZ 倍数。原始内容是 24 bytes,重写后应该 ≥ 4096 bytes。 + let p = cache + .get(&rel_key) + .await + .unwrap() + .expect("cache should produce path"); + assert_eq!(p, decrypted_path); + + let new_size = std::fs::metadata(&decrypted_path).unwrap().len() as usize; + assert!( + new_size >= crate::crypto::PAGE_SZ, + "expected full_decrypt to rewrite cached file to PAGE_SZ multiple, got size={}", + new_size, + ); + } + + #[tokio::test] + async fn restart_with_wal_change_still_reuses_cached_db_then_applies_wal() { + let root = unique_tmpdir("restart-wal"); + let db_dir = root.join("db_storage"); + let cache_dir = root.join("cache"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let rel_key = "message_0.db".to_string(); + let db_path = db_dir.join(&rel_key); + std::fs::write(&db_path, b"fake encrypted db").unwrap(); + + let wal_path = wal_path_for(&db_path); + std::fs::write(&wal_path, [0u8; 31]).unwrap(); // WAL 增量仍是 noop + + let cached_hash = format!("{:x}", md5::compute(rel_key.as_bytes())); + let decrypted_path = cache_dir.join(format!("{}.db", cached_hash)); + std::fs::write(&decrypted_path, ORIGINAL_CACHED_BYTES).unwrap(); + + let db_mt = mtime_nanos(&db_path); + let wal_mt0 = mtime_nanos(&wal_path); + let mtime_file = cache_dir.join("_mtimes.json"); + let payload = serde_json::to_string(&serde_json::json!({ + &rel_key: { + "db_mt": db_mt, + "wal_mt": wal_mt0, + "path": decrypted_path.display().to_string(), + } + })) + .unwrap(); + std::fs::write(&mtime_file, payload).unwrap(); + + // 模拟 daemon 重启前又有新消息写入 WAL + std::thread::sleep(std::time::Duration::from_millis(20)); + std::fs::write(&wal_path, [0xffu8; 31]).unwrap(); + let wal_mt1 = mtime_nanos(&wal_path); + assert_ne!(wal_mt0, wal_mt1); + + let mut all_keys = HashMap::new(); + all_keys.insert(rel_key.clone(), FAKE_KEY_HEX.to_string()); + let cache = DbCache::with_dirs(db_dir, cache_dir, mtime_file, all_keys) + .await + .unwrap(); + + let p = cache.get(&rel_key).await.unwrap().expect("cache should reuse persisted DB"); + assert_eq!(p, decrypted_path); + let body = std::fs::read(&decrypted_path).unwrap(); + assert_eq!( + body, ORIGINAL_CACHED_BYTES, + "restart + WAL-only change should still reuse cached DB and avoid full_decrypt" + ); + } +} diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs index bec6359..315fcd6 100644 --- a/src/daemon/mod.rs +++ b/src/daemon/mod.rs @@ -25,9 +25,7 @@ async fn async_run() -> Result<()> { tokio::fs::create_dir_all(&cli_dir).await?; tokio::fs::create_dir_all(config::cache_dir()).await?; - // 写 PID 文件 let pid = std::process::id(); - tokio::fs::write(config::pid_path(), pid.to_string()).await?; // 注册 SIGTERM / SIGINT 处理 setup_signal_handler().await; @@ -39,7 +37,8 @@ async fn async_run() -> Result<()> { eprintln!("[daemon] DB_DIR: {}", cfg.db_dir.display()); // 加载密钥 - let keys_content = tokio::fs::read_to_string(&cfg.keys_file).await + let keys_content = tokio::fs::read_to_string(&cfg.keys_file) + .await .map_err(|e| anyhow::anyhow!("读取密钥文件 {:?} 失败: {}", cfg.keys_file, e))?; let keys_raw: serde_json::Value = serde_json::from_str(&keys_content)?; let all_keys = extract_keys(&keys_raw); @@ -71,7 +70,9 @@ async fn async_run() -> Result<()> { let names_arc = Arc::new(tokio::sync::RwLock::new(Arc::new(names))); // 启动 IPC server(阻塞) - server::serve(Arc::clone(&db), Arc::clone(&names_arc)).await?; + let serve_result = server::serve(Arc::clone(&db), Arc::clone(&names_arc)).await; + cleanup_ipc_files(); + serve_result?; Ok(()) } @@ -85,7 +86,9 @@ fn extract_keys(json: &serde_json::Value) -> HashMap { let mut result = HashMap::new(); if let Some(obj) = json.as_object() { for (k, v) in obj { - if k.starts_with('_') { continue; } + if k.starts_with('_') { + continue; + } let enc_key = if let Some(s) = v.as_str() { s.to_string() } else if let Some(obj2) = v.as_object() { @@ -121,8 +124,13 @@ async fn setup_signal_handler() { }); } +#[cfg(unix)] fn cleanup_and_exit() { - let _ = std::fs::remove_file(config::sock_path()); - let _ = std::fs::remove_file(config::pid_path()); + cleanup_ipc_files(); std::process::exit(0); } + +fn cleanup_ipc_files() { + let _ = std::fs::remove_file(config::sock_path()); + let _ = std::fs::remove_file(config::pid_path()); +} diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 5daabfe..eba476c 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -4,8 +4,8 @@ use regex::Regex; use roxmltree::{Document, Node}; use rusqlite::Connection; use serde_json::{json, Value}; -use std::collections::HashMap; -use std::sync::OnceLock; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, OnceLock}; use super::cache::DbCache; @@ -141,6 +141,7 @@ pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result> = HashMap::new(); for (username, unread, summary_bytes, ts, msg_type, sender, sender_name) in rows { let display = names.display(&username); let chat_type = chat_type_of(&username, names); @@ -151,9 +152,13 @@ pub async fn q_sessions(db: &DbCache, names: &Names, limit: usize) -> Result = Vec::new(); + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); let tname = table_name.clone(); let uname = username.clone(); let is_group2 = is_group; let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); let since2 = since; let until2 = until; let limit2 = limit; @@ -211,7 +222,7 @@ pub async fn q_history( let msgs: Vec = tokio::task::spawn_blocking(move || { // per-DB 软上限:offset + limit 已足够全局分页,避免大群全量加载 let per_db_cap = offset2 + limit2; - query_messages(&path, &tname, &uname, is_group2, &names_map, since2, until2, msg_type, per_db_cap, 0) + query_messages(&path, &tname, &uname, is_group2, &names_map, &group_nicknames2, since2, until2, msg_type, per_db_cap, 0) }).await??; all_msgs.extend(msgs); @@ -311,22 +322,45 @@ pub async fn q_search( by_path.entry(p).or_default().push((t, d, u)); } - let mut results: Vec = Vec::new(); + let mut group_usernames = HashSet::new(); + for table_list in by_path.values() { + for (_, _, uname) in table_list { + if uname.contains("@chatroom") { + group_usernames.insert(uname.clone()); + } + } + } + let group_nicknames_by_chat = load_group_nickname_maps(db, group_usernames) + .await + .unwrap_or_default(); + let group_nicknames_by_chat = Arc::new(group_nicknames_by_chat); + + // 多个 message_*.db 之间没有数据依赖,并发解密 + 查询。每个 DB 内部仍按 + // table 串行(共享同一 sqlite Connection 不能跨线程移动)。原版本是 N 个 DB + // 串行 await,活跃账号上 N 个分片要轮 N 次磁盘 IO;现在 JoinSet 把它们一次 + // 全部 dispatch 到 blocking pool,整体 latency 退化为单 DB 慢路径。 let kw = keyword.to_string(); + let mut join_set: tokio::task::JoinSet>> = tokio::task::JoinSet::new(); for (db_path, table_list) in by_path { let kw2 = kw.clone(); let since2 = since; let until2 = until; let limit2 = limit * 3; - let names_map2 = names.map.clone(); - let found: Vec = match tokio::task::spawn_blocking(move || { + let group_nicknames_by_chat2 = Arc::clone(&group_nicknames_by_chat); + let db_path_for_log = db_path.clone(); + + join_set.spawn_blocking(move || { let conn = Connection::open(&db_path)?; let mut all = Vec::new(); + let empty_group_nicknames = HashMap::new(); for (tname, display, uname) in &table_list { let is_group = uname.contains("@chatroom"); + let group_nicknames = group_nicknames_by_chat2 + .get(uname) + .unwrap_or(&empty_group_nicknames); match search_in_table(&conn, tname, &uname, is_group, - &names_map2, &kw2, since2, until2, msg_type, limit2) + &names_map2, group_nicknames, &kw2, since2, until2, msg_type, limit2) { Ok(rows) => { for mut row in rows { @@ -340,17 +374,20 @@ pub async fn q_search( all.push(row); } } - Err(e) => eprintln!("[search] skip table {}: {}", tname, e), + Err(e) => eprintln!("[search] skip table {} (db={}): {}", tname, db_path_for_log, e), } } - Ok::<_, anyhow::Error>(all) - }).await { - Ok(Ok(v)) => v, - Ok(Err(e)) => { eprintln!("[search] skip DB: {}", e); continue; } - Err(e) => { eprintln!("[search] task error: {}", e); continue; } - }; + Ok(all) + }); + } - results.extend(found); + let mut results: Vec = Vec::new(); + while let Some(joined) = join_set.join_next().await { + match joined { + Ok(Ok(rows)) => results.extend(rows), + Ok(Err(e)) => eprintln!("[search] skip DB: {}", e), + Err(e) => eprintln!("[search] task error: {}", e), + } } results.sort_by_key(|r| std::cmp::Reverse(r["timestamp"].as_i64().unwrap_or(0))); @@ -359,9 +396,14 @@ pub async fn q_search( } /// 查询联系人 +/// +/// 只返回真实联系人(`chat_type_of == "private"`)。`names.map` 是从 `contact` 表 +/// 全量加载的,里面同时包含群(`@chatroom`)、公众号(`gh_*` / `biz_*` / verify_flag != 0)、 +/// 折叠入口(`brandsessionholder` / `@placeholder_foldgroup`)以及微信内部 `@xxx` 系统账号。 +/// 这些都不应该出现在 `wx contacts` 输出里,统一走 `chat_type_of` 这条同样的真相判定。 pub async fn q_contacts(names: &Names, query: Option<&str>, limit: usize) -> Result { let mut contacts: Vec = names.map.iter() - .filter(|(u, _)| !u.starts_with("gh_") && !u.starts_with("biz_")) + .filter(|(u, _)| chat_type_of(u, names) == "private") .map(|(u, d)| json!({ "username": u, "display": d })) .collect(); @@ -461,6 +503,7 @@ fn query_messages( chat_username: &str, is_group: bool, names_map: &HashMap, + group_nicknames: &HashMap, since: Option, until: Option, msg_type: Option, @@ -470,19 +513,18 @@ fn query_messages( let conn = Connection::open(db_path)?; let id2u = load_id2u(&conn); - let mut clauses = Vec::new(); + let mut clauses: Vec = Vec::new(); let mut params: Vec> = Vec::new(); if let Some(s) = since { - clauses.push("create_time >= ?"); + clauses.push("create_time >= ?".into()); params.push(Box::new(s)); } if let Some(u) = until { - clauses.push("create_time <= ?"); + clauses.push("create_time <= ?".into()); params.push(Box::new(u)); } if let Some(t) = msg_type { - clauses.push("local_type = ?"); - params.push(Box::new(t)); + push_msg_type_filter(&mut clauses, &mut params, t); } let where_clause = if clauses.is_empty() { String::new() @@ -518,17 +560,22 @@ fn query_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); + let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "sender": sender, "content": text, "type": fmt_type(local_type), "local_id": local_id, - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -539,6 +586,7 @@ fn search_in_table( chat_username: &str, is_group: bool, names_map: &HashMap, + group_nicknames: &HashMap, keyword: &str, since: Option, until: Option, @@ -548,8 +596,14 @@ fn search_in_table( let id2u = load_id2u(conn); // 转义 LIKE 通配符,使用 '\' 作为 ESCAPE 字符 let escaped_kw = keyword.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_"); - let mut clauses = vec!["message_content LIKE ? ESCAPE '\\'".to_string()]; - let mut params: Vec> = vec![Box::new(format!("%{}%", escaped_kw))]; + let search_decoded_content = msg_type == Some(49); + let keyword_lower = keyword.to_lowercase(); + let mut clauses: Vec = Vec::new(); + let mut params: Vec> = Vec::new(); + if !search_decoded_content { + clauses.push("message_content LIKE ? ESCAPE '\\'".to_string()); + params.push(Box::new(format!("%{}%", escaped_kw))); + } if let Some(s) = since { clauses.push("create_time >= ?".into()); params.push(Box::new(s)); @@ -559,17 +613,23 @@ fn search_in_table( params.push(Box::new(u)); } if let Some(t) = msg_type { - clauses.push("local_type = ?".into()); - params.push(Box::new(t)); + push_msg_type_filter(&mut clauses, &mut params, t); } - let where_clause = format!("WHERE {}", clauses.join(" AND ")); + let where_clause = if clauses.is_empty() { + String::new() + } else { + format!("WHERE {}", clauses.join(" AND ")) + }; + let limit_clause = if search_decoded_content { "" } else { " LIMIT ?" }; let sql = format!( "SELECT local_id, local_type, create_time, real_sender_id, message_content, WCDB_CT_message_content - FROM [{}] {} ORDER BY create_time DESC LIMIT ?", - table, where_clause + FROM [{}] {} ORDER BY create_time DESC{}", + table, where_clause, limit_clause ); - params.push(Box::new(limit as i64)); + if !search_decoded_content { + params.push(Box::new(limit as i64)); + } let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; @@ -589,21 +649,51 @@ fn search_in_table( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map); + let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); + if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) { + continue; + } + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "chat": "", "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); + if search_decoded_content && result.len() >= limit { + break; + } } Ok(result) } +fn push_msg_type_filter( + clauses: &mut Vec, + params: &mut Vec>, + msg_type: i64, +) { + clauses.push("(local_type & 4294967295) = ?".into()); + params.push(Box::new(msg_type)); +} + +fn matches_search_text(raw: &str, formatted: &str, keyword: &str, keyword_lower: &str) -> bool { + contains_search_text(raw, keyword, keyword_lower) + || contains_search_text(formatted, keyword, keyword_lower) +} + +fn contains_search_text(haystack: &str, keyword: &str, keyword_lower: &str) -> bool { + haystack.contains(keyword) + || (!keyword_lower.is_empty() && haystack.to_lowercase().contains(keyword_lower)) +} + fn load_id2u(conn: &Connection) -> HashMap { let mut map = HashMap::new(); if let Ok(mut stmt) = conn.prepare("SELECT rowid, user_name FROM Name2Id") { @@ -618,6 +708,368 @@ fn load_id2u(conn: &Connection) -> HashMap { map } +async fn load_group_nicknames( + db: &DbCache, + chat_username: &str, +) -> Result> { + if !chat_username.contains("@chatroom") { + return Ok(HashMap::new()); + } + let Some(contact_p) = db.get("contact/contact.db").await? else { + return Ok(HashMap::new()); + }; + let chat = chat_username.to_string(); + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&contact_p)?; + Ok::<_, anyhow::Error>(load_group_nickname_map_from_conn(&conn, &chat, None)) + }).await? +} + +async fn load_group_nickname_maps( + db: &DbCache, + chat_usernames: HashSet, +) -> Result>> { + if chat_usernames.is_empty() { + return Ok(HashMap::new()); + } + let Some(contact_p) = db.get("contact/contact.db").await? else { + return Ok(HashMap::new()); + }; + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&contact_p)?; + let mut out = HashMap::new(); + for chat in chat_usernames { + let nicknames = load_group_nickname_map_from_conn(&conn, &chat, None); + if !nicknames.is_empty() { + out.insert(chat, nicknames); + } + } + Ok::<_, anyhow::Error>(out) + }).await? +} + +fn load_group_nickname_map_from_conn( + conn: &Connection, + chat_username: &str, + targets: Option<&HashSet>, +) -> HashMap { + if !chat_username.contains("@chatroom") { + return HashMap::new(); + } + let ext = load_group_ext_buffer(conn, chat_username); + + let owned_targets = if targets.is_none() { + load_group_member_username_set(conn, chat_username) + } else { + None + }; + let targets = targets.or(owned_targets.as_ref()); + + ext.as_deref() + .map(|buf| parse_group_nickname_map(buf, targets)) + .unwrap_or_default() +} + +fn load_group_ext_buffer( + conn: &Connection, + chat_username: &str, +) -> Option> { + [ + "SELECT ext_buffer FROM chat_room WHERE username = ? LIMIT 1", + "SELECT ext_buffer FROM chat_room WHERE chat_room_name = ? LIMIT 1", + "SELECT ext_buffer FROM chat_room WHERE name = ? LIMIT 1", + ].iter().find_map(|sql| { + conn.query_row(sql, [chat_username], |row| row.get::<_, Option>>(0)) + .ok() + .flatten() + }) +} + +fn load_group_member_username_set( + conn: &Connection, + chat_username: &str, +) -> Option> { + let room_id: i64 = [ + "SELECT id FROM chat_room WHERE username = ?", + "SELECT id FROM chat_room WHERE chat_room_name = ?", + "SELECT id FROM chat_room WHERE name = ?", + ].iter().find_map(|sql| { + conn.query_row(sql, [chat_username], |row| row.get::<_, i64>(0)).ok() + }).unwrap_or(0); + + if room_id == 0 { + return None; + } + + let mut stmt = conn.prepare( + "SELECT c.username + FROM chatroom_member cm + LEFT JOIN contact c ON c.id = cm.member_id + WHERE cm.room_id = ?" + ).ok()?; + let usernames: HashSet = stmt.query_map([room_id], |row| { + row.get::<_, String>(0) + }).ok()? + .filter_map(|r| r.ok()) + .filter(|uid| !uid.is_empty()) + .collect(); + + if usernames.is_empty() { None } else { Some(usernames) } +} + +fn decode_proto_varint(raw: &[u8], offset: usize) -> Option<(u64, usize)> { + let mut value = 0u64; + let mut shift = 0u32; + let mut pos = offset; + while pos < raw.len() { + let byte = raw[pos]; + pos += 1; + value |= u64::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some((value, pos)); + } + shift += 7; + if shift > 63 { + return None; + } + } + None +} + +fn proto_len_fields<'a>(raw: &'a [u8]) -> Vec<(u64, &'a [u8])> { + let mut fields = Vec::new(); + let mut idx = 0usize; + while idx < raw.len() { + let Some((tag, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + let field_no = tag >> 3; + let wire_type = tag & 0x07; + match wire_type { + 0 => { + let Some((_, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + } + 1 => { + let Some(next) = idx.checked_add(8) else { break; }; + if next > raw.len() { break; } + idx = next; + } + 2 => { + let Some((size, next)) = decode_proto_varint(raw, idx) else { break; }; + if next <= idx { break; } + idx = next; + let Ok(size) = usize::try_from(size) else { break; }; + let Some(end) = idx.checked_add(size) else { break; }; + if end > raw.len() { break; } + fields.push((field_no, &raw[idx..end])); + idx = end; + } + 5 => { + let Some(next) = idx.checked_add(4) else { break; }; + if next > raw.len() { break; } + idx = next; + } + _ => break, + } + } + fields +} + +fn proto_string_fields(raw: &[u8]) -> Vec<(u64, String)> { + proto_len_fields(raw) + .into_iter() + .filter_map(|(field_no, value)| { + if value.is_empty() || value.len() > 256 { + return None; + } + let text = std::str::from_utf8(value).ok()?.trim().to_string(); + if text.is_empty() || text.chars().any(char::is_control) { + return None; + } + Some((field_no, text)) + }) + .collect() +} + +fn is_strong_username_hint(value: &str) -> bool { + value.starts_with("wxid_") + || value.ends_with("@chatroom") + || value.starts_with("gh_") + || value.contains('@') +} + +fn looks_like_username(value: &str) -> bool { + let value = value.trim(); + if value.is_empty() { + return false; + } + if is_strong_username_hint(value) { + return true; + } + if value.len() < 6 || value.len() > 32 || value.chars().any(char::is_whitespace) { + return false; + } + let mut chars = value.chars(); + let Some(first) = chars.next() else { return false; }; + first.is_ascii_alphabetic() + && chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') +} + +fn pick_member_username( + strings: &[(u64, String)], + targets: Option<&HashSet>, +) -> Option { + if let Some(targets) = targets { + return strings + .iter() + .find(|(_, value)| targets.contains(value)) + .map(|(_, value)| value.clone()); + } + + for field_no in [1u64, 4u64] { + if let Some((_, value)) = strings + .iter() + .find(|(f, value)| *f == field_no && looks_like_username(value)) + { + return Some(value.clone()); + } + } + + strings + .iter() + .find(|(_, value)| is_strong_username_hint(value)) + .or_else(|| strings.iter().find(|(_, value)| looks_like_username(value))) + .map(|(_, value)| value.clone()) +} + +fn pick_group_nickname(strings: &[(u64, String)], username: &str) -> Option { + let mut best_score = i64::MIN; + let mut best = String::new(); + + for (idx, (field_no, value)) in strings.iter().enumerate() { + let value = value.trim(); + if value.is_empty() + || value == username + || is_strong_username_hint(value) + || value.contains('\n') + || value.contains('\r') + || value.len() > 64 + { + continue; + } + + let mut score = 0i64; + if *field_no == 2 { + score += 100; + } + if !looks_like_username(value) { + score += 20; + } + score += (32usize.saturating_sub(value.len())) as i64; + score = score * 1000 - idx as i64; + + if score > best_score { + best_score = score; + best = value.to_string(); + } + } + + if best.is_empty() { None } else { Some(best) } +} + +fn parse_group_nickname_map( + ext_buffer: &[u8], + targets: Option<&HashSet>, +) -> HashMap { + let mut out = HashMap::new(); + if ext_buffer.is_empty() { + return out; + } + + for (_, chunk) in proto_len_fields(ext_buffer) { + let strings = proto_string_fields(chunk); + if strings.is_empty() { + continue; + } + let Some(username) = pick_member_username(&strings, targets) else { + continue; + }; + if out.contains_key(&username) { + continue; + } + if let Some(nickname) = pick_group_nickname(&strings, &username) { + out.insert(username, nickname); + } + } + + out +} + +fn contact_display( + uid: &str, + nick: &str, + remark: &str, + names_map: &HashMap, +) -> String { + if !remark.is_empty() { + remark.to_string() + } else if !nick.is_empty() { + nick.to_string() + } else { + names_map.get(uid).cloned().unwrap_or_else(|| uid.to_string()) + } +} + +fn sender_display( + username: &str, + fallback_sender_name: &str, + names: &HashMap, + group_nicknames: &HashMap, +) -> String { + if username.is_empty() { + return String::new(); + } + group_nicknames + .get(username) + .filter(|s| !s.is_empty()) + .cloned() + .or_else(|| names.get(username).cloned()) + .or_else(|| { + if fallback_sender_name.is_empty() { + None + } else { + Some(fallback_sender_name.to_string()) + } + }) + .unwrap_or_else(|| username.to_string()) +} + +fn group_top_senders( + sender_counts: &HashMap, + names: &HashMap, + group_nicknames: &HashMap, + limit: usize, +) -> Vec { + let mut top_senders: Vec = sender_counts.iter() + .map(|(username, count)| json!({ + "sender": sender_display(username, "", names, group_nicknames), + "count": count, + })) + .collect(); + top_senders.sort_by(|a, b| { + b["count"].as_i64().unwrap_or(0) + .cmp(&a["count"].as_i64().unwrap_or(0)) + .then_with(|| { + a["sender"].as_str().unwrap_or("") + .cmp(b["sender"].as_str().unwrap_or("")) + }) + }); + top_senders.truncate(limit); + top_senders +} + fn sender_label( real_sender_id: i64, content: &str, @@ -625,15 +1077,16 @@ fn sender_label( chat_username: &str, id2u: &HashMap, names: &HashMap, + group_nicknames: &HashMap, ) -> String { let sender_uname = id2u.get(&real_sender_id).cloned().unwrap_or_default(); if is_group { if !sender_uname.is_empty() && sender_uname != chat_username { - return names.get(&sender_uname).cloned().unwrap_or(sender_uname); + return sender_display(&sender_uname, "", names, group_nicknames); } if content.contains(":\n") { let raw = content.splitn(2, ":\n").next().unwrap_or(""); - return names.get(raw).cloned().unwrap_or_else(|| raw.to_string()); + return sender_display(raw, "", names, group_nicknames); } return String::new(); } @@ -762,28 +1215,32 @@ fn parse_sysmsg(xml: &str) -> Option { } fn parse_appmsg(text: &str) -> Option { - // 简单 XML 解析,避免引入重量级 XML 库(或直接用 minidom) - // 这里用基本字符串搜索实现 + if let Some(parsed) = parse_appmsg_dom(text) { + return Some(parsed); + } + parse_appmsg_legacy(text) +} + +fn parse_appmsg_dom(text: &str) -> Option { + let doc = Document::parse(text).ok()?; + let appmsg = doc.descendants().find(|node| node.has_tag_name("appmsg"))?; + let title = xml_text(xml_child(appmsg, "title")).unwrap_or_default(); + let atype = xml_text(xml_child(appmsg, "type")).unwrap_or_default(); + match atype.as_str() { + "6" => Some(format_file_appmsg(appmsg, &title)), + "19" => Some(format_record_appmsg(appmsg, &title)), + _ => None, + } +} + +fn parse_appmsg_legacy(text: &str) -> Option { let title = extract_xml_text(text, "title")?; let atype = extract_xml_text(text, "type").unwrap_or_default(); match atype.as_str() { "6" => Some(if !title.is_empty() { format!("[文件] {}", title) } else { "[文件]".into() }), "57" => { - let ref_content = extract_xml_text(text, "content") - .map(|s| { - // content 可能是 HTML 转义的 XML(被引用的消息是 appmsg 时) - let unescaped = unescape_html(&s); - // 如果解转义后是 XML,尝试递归解析 - if unescaped.contains(">().join(" "); - if s.chars().count() > 40 { - format!("{}...", s.chars().take(40).collect::()) - } else { s } - }) + let ref_content = quote_refermsg_content(text) + .or_else(|| extract_xml_text(text, "content").and_then(|s| quote_content_text(&s, 40))) .unwrap_or_default(); let quote = if !title.is_empty() { format!("[引用] {}", title) } else { "[引用]".into() }; if !ref_content.is_empty() { @@ -797,6 +1254,193 @@ fn parse_appmsg(text: &str) -> Option { } } +fn format_file_appmsg<'a, 'input>(appmsg: Node<'a, 'input>, title: &str) -> String { + let mut meta = Vec::new(); + if let Some(size) = xml_child(appmsg, "appattach") + .and_then(|attach| xml_text(xml_child(attach, "totallen"))) + .and_then(|value| value.parse::().ok()) + .filter(|size| *size > 0) + { + meta.push(format_byte_size(size)); + } + if let Some(ext) = xml_child(appmsg, "appattach") + .and_then(|attach| xml_text(xml_child(attach, "fileext"))) + .filter(|ext| !ext.is_empty()) + { + meta.push(ext); + } + + let base = if !title.is_empty() { + format!("[文件] {}", title) + } else { + "[文件]".into() + }; + if meta.is_empty() { + base + } else { + format!("{} ({})", base, meta.join(", ")) + } +} + +fn format_record_appmsg<'a, 'input>(appmsg: Node<'a, 'input>, title: &str) -> String { + let items = record_item_lines(appmsg); + let mut header = if !title.is_empty() { + format!("[合并聊天记录] {}", title) + } else { + "[合并聊天记录]".into() + }; + if !items.is_empty() { + header.push_str(&format!(" ({}条)", items.len())); + } + + let mut lines = vec![header]; + if items.is_empty() { + if let Some(desc) = xml_text(xml_child(appmsg, "des")).filter(|desc| !desc.is_empty()) { + lines.push(format!(" {}", collapse_text(&desc, 120))); + } + } else { + for item in items.iter().take(10) { + lines.push(format!(" - {}", item)); + } + if items.len() > 10 { + lines.push(format!(" - ... 还有{}条", items.len() - 10)); + } + } + lines.join("\n") +} + +fn record_item_lines<'a, 'input>(appmsg: Node<'a, 'input>) -> Vec { + let mut lines = record_item_lines_from_node(appmsg); + if !lines.is_empty() { + return lines; + } + + let Some(record_xml) = xml_text(xml_child(appmsg, "recorditem")).filter(|value| !value.is_empty()) else { + return Vec::new(); + }; + let unescaped = unescape_html(&record_xml); + for candidate in [&record_xml, &unescaped] { + if let Ok(doc) = Document::parse(candidate) { + lines = record_item_lines_from_node(doc.root_element()); + if !lines.is_empty() { + break; + } + } + } + lines +} + +fn record_item_lines_from_node<'a, 'input>(node: Node<'a, 'input>) -> Vec { + node.descendants() + .filter(|child| child.has_tag_name("dataitem")) + .filter_map(format_record_item) + .collect() +} + +fn format_record_item<'a, 'input>(item: Node<'a, 'input>) -> Option { + let name = first_child_text(item, &["sourcename", "datasrcname", "sourceusername"]); + let desc = first_child_text(item, &["datadesc", "datatitle", "datafmt"]) + .or_else(|| item.attribute("datatype").and_then(record_datatype_label).map(str::to_string))?; + let desc = collapse_text(&desc, 100); + if let Some(name) = name.filter(|value| !value.is_empty()) { + Some(format!("{}: {}", name, desc)) + } else { + Some(desc) + } +} + +fn first_child_text<'a, 'input>(node: Node<'a, 'input>, tags: &[&str]) -> Option { + tags.iter() + .find_map(|tag| xml_text(xml_child(node, tag))) + .filter(|value| !value.is_empty()) +} + +fn record_datatype_label(datatype: &str) -> Option<&'static str> { + match datatype { + "1" => Some("[文本]"), + "2" => Some("[图片]"), + "3" => Some("[语音]"), + "4" => Some("[视频]"), + "6" => Some("[文件]"), + "17" => Some("[链接]"), + _ => None, + } +} + +fn quote_refermsg_content(text: &str) -> Option { + let refer = extract_xml_text(text, "refermsg")?; + let content = extract_xml_text(&refer, "content") + .and_then(|s| quote_content_text(&s, 80)) + .or_else(|| { + extract_xml_text(&refer, "type") + .and_then(|t| quote_refermsg_type_label(&t).map(str::to_string)) + })?; + match extract_xml_text(&refer, "displayname") { + Some(name) if !name.is_empty() => Some(format!("{}: {}", name, content)), + _ => Some(content), + } +} + +fn quote_content_text(raw: &str, max_chars: usize) -> Option { + let unescaped = unescape_html(raw); + if unescaped.contains(" Option<&'static str> { + match t { + "1" => None, + "3" => Some("[图片]"), + "34" => Some("[语音]"), + "43" => Some("[视频]"), + "47" => Some("[表情]"), + "49" => Some("[链接/文件]"), + _ => None, + } +} + +fn collapse_text(text: &str, max_chars: usize) -> String { + let collapsed = text.split_whitespace().collect::>().join(" "); + if collapsed.chars().count() > max_chars { + format!("{}...", collapsed.chars().take(max_chars).collect::()) + } else { + collapsed + } +} + +fn format_byte_size(bytes: u64) -> String { + const KB: f64 = 1024.0; + const MB: f64 = KB * 1024.0; + const GB: f64 = MB * 1024.0; + let bytes_f = bytes as f64; + if bytes_f >= GB { + format_decimal_unit(bytes_f / GB, "GB") + } else if bytes_f >= MB { + format_decimal_unit(bytes_f / MB, "MB") + } else if bytes_f >= KB { + format_decimal_unit(bytes_f / KB, "KB") + } else { + format!("{} B", bytes) + } +} + +fn format_decimal_unit(value: f64, unit: &str) -> String { + let mut s = format!("{:.1}", value); + if s.ends_with(".0") { + s.truncate(s.len() - 2); + } + format!("{} {}", s, unit) +} + fn extract_xml_text(xml: &str, tag: &str) -> Option { let open = format!("<{}>", tag); let close = format!("", tag); @@ -806,6 +1450,46 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { + if (local_type as u64 & 0xFFFFFFFF) != 49 { + return None; + } + extract_appmsg_url(content) +} + +fn extract_favorite_url(content: &str) -> Option { + let url = extract_xml_text(content, "link") + .map(|s| unescape_html(strip_xml_cdata(&s)))?; + if url.is_empty() || !(url.starts_with("http://") || url.starts_with("https://")) { + return None; + } + Some(url) +} + +fn strip_xml_cdata(s: &str) -> &str { + s.strip_prefix("")) + .unwrap_or(s) +} + +/// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) +fn extract_appmsg_url(text: &str) -> Option { + let xml = strip_group_prefix(text); + if !xml.contains(" Option { let open = format!("<{}", tag); let start = xml.find(&open)?; @@ -829,6 +1513,246 @@ fn unescape_html(s: &str) -> String { .replace("'", "'") } +#[cfg(test)] +mod appmsg_tests { + use super::*; + + #[test] + fn parse_forwarded_chat_record_expands_record_items() { + let xml = r#" + + + 群聊的聊天记录 + 张三: 早上好 +李四: 收到 + 19 + <recordinfo><datalist count="2"><dataitem datatype="1"><sourcename>张三</sourcename><sourcetime>1710000000</sourcetime><datadesc>早上好 &amp; coffee</datadesc></dataitem><dataitem datatype="2"><sourcename>李四</sourcename><sourcetime>1710000060</sourcetime><datafmt>图片</datafmt><datadesc>[图片]</datadesc></dataitem></datalist></recordinfo> + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[合并聊天记录] 群聊的聊天记录 (2条)\n - 张三: 早上好 & coffee\n - 李四: [图片]") + ); + } + + #[test] + fn parse_file_appmsg_includes_attachment_metadata() { + let xml = r#" + + + report.pdf + 6 + + 1536 + pdf + + abcdef123456 + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[文件] report.pdf (1.5 KB, pdf)") + ); + } + + #[test] + fn parse_quote_appmsg_reads_refermsg_content() { + let xml = r#" + + + 我也没有用ai啊 + 57 + + + 1 + 不再熬夜 + 昨天用 claude 爬小红书数据来着 + + + + "#; + + assert_eq!( + parse_appmsg(xml).as_deref(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn query_messages_filters_appmsg_by_base_type() { + let path = temp_db_path("query_messages_filters_appmsg_by_base_type"); + { + let conn = Connection::open(&path).expect("open temp db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content TEXT, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#, + 0_i64 + ], + ) + .expect("insert quote message"); + } + + let rows = query_messages( + &path, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + None, + None, + Some(49), + 10, + 0, + ) + .expect("query messages"); + + let _ = std::fs::remove_file(&path); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn search_in_table_filters_appmsg_by_base_type() { + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content TEXT, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#, + 0_i64 + ], + ) + .expect("insert quote message"); + + let rows = search_in_table( + &conn, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + "claude", + None, + None, + Some(49), + 10, + ) + .expect("search messages"); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + #[test] + fn search_in_table_matches_decompressed_formatted_appmsg_content() { + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute( + "CREATE TABLE Msg_test ( + local_id INTEGER, + local_type INTEGER, + create_time INTEGER, + real_sender_id INTEGER, + message_content BLOB, + WCDB_CT_message_content INTEGER + )", + [], + ) + .expect("create message table"); + let xml = r#"我也没有用ai啊57不再熬夜昨天用 claude 爬小红书数据来着"#; + let compressed = zstd::encode_all(xml.as_bytes(), 0).expect("compress appmsg xml"); + conn.execute( + "INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + 1_i64, + ((57_i64) << 32) | 49_i64, + 1775146911_i64, + 0_i64, + compressed, + 4_i64 + ], + ) + .expect("insert compressed quote message"); + + let rows = search_in_table( + &conn, + "Msg_test", + "wxid_r605h38n08mv22", + false, + &HashMap::new(), + &HashMap::new(), + "claude", + None, + None, + Some(49), + 10, + ) + .expect("search messages"); + + assert_eq!(rows.len(), 1); + assert_eq!( + rows[0]["content"].as_str(), + Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着") + ); + } + + fn temp_db_path(name: &str) -> std::path::PathBuf { + let unique = format!( + "wx-cli-{}-{}-{}.db", + name, + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock before unix epoch") + .as_nanos() + ); + std::env::temp_dir().join(unique) + } +} + fn fmt_time(ts: i64, fmt: &str) -> String { Local.timestamp_opt(ts, 0) .single() @@ -904,6 +1828,7 @@ pub async fn q_unread( }).await??; let mut results = Vec::new(); + let mut group_nickname_cache: HashMap> = HashMap::new(); for (username, unread, summary_bytes, ts, msg_type, sender, sender_name) in rows { let chat_type = chat_type_of(&username, names); if let Some(ref set) = filter_set { @@ -916,9 +1841,13 @@ pub async fn q_unread( let summary = decompress_or_str(&summary_bytes); let summary = strip_group_prefix(&summary); let sender_display = if is_group && !sender.is_empty() { - names.map.get(&sender).cloned().unwrap_or_else(|| { - if !sender_name.is_empty() { sender_name.clone() } else { sender.clone() } - }) + if !group_nickname_cache.contains_key(&username) { + let nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); + group_nickname_cache.insert(username.clone(), nicknames); + } + let empty = HashMap::new(); + let group_nicknames = group_nickname_cache.get(&username).unwrap_or(&empty); + sender_display(&sender, &sender_name, &names.map, group_nicknames) } else { String::new() }; @@ -955,7 +1884,6 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result // 优先路径:contact.db → chatroom_member + chat_room(完整成员列表) if let Some(contact_p) = db.get("contact/contact.db").await? { let uname2 = username.clone(); - let display2 = display.clone(); let names_map2 = names_map.clone(); let members_opt: Option> = tokio::task::spawn_blocking(move || { @@ -1008,12 +1936,31 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result return Ok(None); } + let target_usernames: HashSet = raw.iter() + .map(|(uid, _, _)| uid.clone()) + .collect(); + let group_nicknames = load_group_nickname_map_from_conn( + &conn, + &uname2, + Some(&target_usernames), + ); + let mut members: Vec = raw.iter().map(|(uid, nick, remark)| { - let disp = if !remark.is_empty() { remark.clone() } - else if !nick.is_empty() { nick.clone() } - else { names_map2.get(uid).cloned().unwrap_or_else(|| uid.clone()) }; + let contact_display = contact_display(uid, nick, remark, &names_map2); + let group_nickname = group_nicknames.get(uid).cloned().unwrap_or_default(); + let disp = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; let is_owner = uid == &owner && !owner.is_empty(); - json!({ "username": uid, "display": disp, "is_owner": is_owner }) + json!({ + "username": uid, + "display": disp, + "contact_display": contact_display, + "group_nickname": group_nickname, + "is_owner": is_owner, + }) }).collect(); // 群主排首位,其余按 display 字典序 @@ -1024,7 +1971,6 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result a["display"].as_str().unwrap_or("").cmp(b["display"].as_str().unwrap_or("")) }); - let _ = display2; // 不在此 closure 内使用 Ok(Some(members)) }).await??; @@ -1075,10 +2021,20 @@ pub async fn q_members(db: &DbCache, names: &Names, chat: &str) -> Result sender_set.extend(senders); } + let group_nicknames = load_group_nicknames(db, &username).await.unwrap_or_default(); let mut members: Vec = sender_set.iter().map(|u| { + let contact_display = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); + let group_nickname = group_nicknames.get(u).cloned().unwrap_or_default(); + let display = if group_nickname.is_empty() { + contact_display.clone() + } else { + group_nickname.clone() + }; json!({ "username": u, - "display": names_map.get(u).cloned().unwrap_or_else(|| u.clone()), + "display": display, + "contact_display": contact_display, + "group_nickname": group_nickname, "is_owner": false, }) }).collect(); @@ -1163,6 +2119,11 @@ pub async fn q_new_messages( let display = names.display(uname); let chat_type = chat_type_of(uname, names); let is_group = chat_type == "group"; + let group_nicknames = if is_group { + load_group_nicknames(db, uname).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); @@ -1170,6 +2131,7 @@ pub async fn q_new_messages( let uname2 = uname.clone(); let display2 = display.clone(); let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); let tname_for_log = tname.clone(); let msgs: Vec = match tokio::task::spawn_blocking(move || { @@ -1201,9 +2163,10 @@ pub async fn q_new_messages( let mut result = Vec::new(); for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows { let content = decompress_message(&content_bytes, ct); - let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map); + let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2); let text = fmt_content(local_id, local_type, &content, is_group); - result.push(json!({ + let url = appmsg_url_for_message(local_type, &content); + let mut msg = json!({ "chat": display2, "username": uname2, "is_group": is_group, @@ -1213,7 +2176,11 @@ pub async fn q_new_messages( "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok::<_, anyhow::Error>(result) }).await { @@ -1230,24 +2197,40 @@ pub async fn q_new_messages( all_msgs.truncate(limit); // 5. 重建 new_state,防止全局 limit 截断导致消息永久丢失: - // - 未变化的会话:沿用 session.db 的 last_timestamp - // - 变化但全被截断(无消息在最终结果中):保留旧 since_ts,下次重试 - // - 变化且有消息返回:推进到该会话在结果中的最大 timestamp - let mut new_state = session_ts_map; - // 先把 changed 会话重置回旧 since_ts - for (uname, _) in &changed { - let old_ts = state.as_ref() - .and_then(|m| m.get(uname)) - .copied() - .unwrap_or(fallback_ts); - new_state.insert(uname.clone(), old_ts); - } - // 再根据实际返回的消息向前推进 - for m in &all_msgs { - if let (Some(uname), Some(ts)) = (m["username"].as_str(), m["timestamp"].as_i64()) { - let e = new_state.entry(uname.to_string()).or_insert(0); - if ts > *e { *e = ts; } + // - 未变化的会话:沿用 session.db 的 last_timestamp(即 session_ts_map) + // - 变化但全被截断(无消息在最终结果中): + // * 后续调用 (state=Some):保留旧 since_ts,下次重试拿这部分消息 + // * 首次调用 (state=None):advance 到 session_ts,避免 since_ts 锁死在 + // fallback_ts 导致后续每次都回扫 24h。窗口会随调用次数 + 时间累积扩大, + // 性能持续衰退。代价:首次 + 被截断会话的老消息看不到,需走 `wx history`。 + // - 变化且有消息返回:advance 到该会话在结果中的最大 timestamp(增量 fetch 标准语义) + let returned_max_ts: HashMap = { + let mut m: HashMap = HashMap::new(); + for msg in &all_msgs { + if let (Some(u), Some(ts)) = (msg["username"].as_str(), msg["timestamp"].as_i64()) { + let e = m.entry(u.to_string()).or_insert(0); + if ts > *e { *e = ts; } + } } + m + }; + let mut new_state = session_ts_map; + for (uname, _) in &changed { + let in_results = returned_max_ts.contains_key(uname); + let prev = state.as_ref().and_then(|m| m.get(uname)).copied(); + let next_ts = match (in_results, prev) { + (true, _) => { + // 有消息返回:advance 到 returned_max;返回的最大 ts 通常 ≤ session_ts, + // 这样下次查 `since > returned_max` 仍能拿到 returned_max..session_ts 的截断尾巴。 + returned_max_ts[uname] + } + (false, Some(prev)) => prev, // 后续 + 截断:保持旧 since + (false, None) => { + // 首次 + 截断:advance 到 session_ts 兜底,避免 since_ts 锁死。 + new_state.get(uname).copied().unwrap_or(fallback_ts) + } + }; + new_state.insert(uname.clone(), next_ts); } Ok(json!({ @@ -1330,7 +2313,7 @@ pub async fn q_favorites( }; // WeChat 部分版本的 update_time 为毫秒,10位以上判定为毫秒后转秒 let ts_secs = if ts > 9_999_999_999 { ts / 1000 } else { ts }; - json!({ + let mut item = json!({ "id": local_id, "type": type_str, "type_num": ftype, @@ -1339,7 +2322,13 @@ pub async fn q_favorites( "preview": preview, "from": fromusr, "chat": chatname, - }) + }); + if ftype == 5 { + if let Some(url) = extract_favorite_url(&content) { + item["url"] = Value::String(url); + } + } + item }) .collect(); @@ -1376,13 +2365,17 @@ pub async fn q_stats( let mut type_counts: HashMap = HashMap::new(); let mut sender_counts: HashMap = HashMap::new(); let mut hour_counts = [0i64; 24]; + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; for (db_path, table_name) in &tables { let path = db_path.clone(); let tname = table_name.clone(); let uname = username.clone(); let is_group2 = is_group; - let names_map = names.map.clone(); // 用 SQL GROUP BY 在数据库侧聚合,避免把全量消息内容加载进内存 let result: (i64, HashMap, HashMap, [i64; 24]) = @@ -1469,8 +2462,7 @@ pub async fn q_stats( for (id, cnt) in rows.flatten() { if let Some(u) = id2u.get(&id) { if u != &uname { - let name = names_map.get(u).cloned().unwrap_or_else(|| u.clone()); - *sender_c.entry(name).or_insert(0) += cnt; + *sender_c.entry(u.clone()).or_insert(0) += cnt; } } } @@ -1495,11 +2487,7 @@ pub async fn q_stats( by_type.sort_by_key(|v| std::cmp::Reverse(v["count"].as_i64().unwrap_or(0))); // 发言排行,Top 10 - let mut top_senders: Vec = sender_counts.iter() - .map(|(s, c)| json!({ "sender": s, "count": c })) - .collect(); - top_senders.sort_by_key(|v| std::cmp::Reverse(v["count"].as_i64().unwrap_or(0))); - top_senders.truncate(10); + let top_senders = group_top_senders(&sender_counts, &names.map, &group_nicknames, 10); // 24小时分布 let by_hour: Vec = hour_counts.iter().enumerate() @@ -2001,6 +2989,805 @@ pub async fn q_sns_search( Ok(json!({ "keyword": keyword, "posts": posts, "total": total })) } +// ─── 公众号文章查询 ─────────────────────────────────────────────────────────── + +/// 一条公众号文章的解析产物 +#[derive(Debug)] +struct BizArticle { + /// 接收该推送的时间戳(即消息的 create_time) + recv_time: i64, + /// 公众号 username + account_username: String, + /// 文章标题 + title: String, + /// 文章链接 + url: String, + /// 摘要 + digest: String, + /// 封面图 + cover: String, + /// 文章发布时间(pub_time,单位秒) + pub_time: i64, +} + +/// 从 biz_message 表的单条 XML 解析出全部 article items +fn parse_biz_xml_items(recv_time: i64, account_username: &str, xml: &str) -> Vec { + let mut items = Vec::new(); + let mut search_from = 0; + loop { + let Some(item_start) = xml[search_from..].find("") else { break; }; + let abs_start = search_from + item_start; + let Some(item_end) = xml[abs_start..].find("") else { break; }; + let abs_end = abs_start + item_end + 7; + let item_xml = &xml[abs_start..abs_end]; + + let title = extract_cdata(item_xml, "title").unwrap_or_default(); + let url = extract_cdata(item_xml, "url").unwrap_or_default(); + // Skip items with no URL or empty title (e.g. payment entries) + if url.is_empty() || title.is_empty() { + search_from = abs_end; + continue; + } + let digest = extract_cdata(item_xml, "digest").unwrap_or_default(); + let cover = extract_cdata(item_xml, "cover").unwrap_or_default(); + let pub_time = extract_xml_text(item_xml, "pub_time") + .and_then(|s| s.parse::().ok()) + .unwrap_or(recv_time); + + items.push(BizArticle { + recv_time, + account_username: account_username.to_string(), + title, + url, + digest, + cover, + pub_time, + }); + search_from = abs_end; + } + items +} + +/// 提取 CDATA 或普通文本内容: `` 或 `...` +/// +/// 注意: 内容匹配到 `` 之前的内容。CDATA 块中的 "]]"已在 "]]\x3e" 之前, +/// 所以 inner 为 `` 或 `" 被 close tag 吸掉) +fn extract_cdata(xml: &str, tag: &str) -> Option { + let open = format!("<{}>", tag); + let close = format!("", tag); + let start = xml.find(&open)? + open.len(); + let end = xml[start..].find(&close)?; + let inner = xml[start..start + end].trim(); + if inner.starts_with("` → strip 9-char `` suffix + let body = &inner[9..]; + // Strip `]]>` (normal) or `]]` (edge case) + let cdata_end = b"]]>"; + let cdata_end2 = b"]]"; + let content: &str = if body.as_bytes().ends_with(cdata_end) { + &body[..body.len() - 3] + } else if body.as_bytes().ends_with(cdata_end2) { + &body[..body.len() - 2] + } else { + body + }; + let content = content.trim(); + if content.is_empty() { None } else { Some(content.to_string()) } + } else if inner.is_empty() { + None + } else { + Some(unescape_html(inner)) + } +} + +/// 查询公众号文章推送(biz_message_0.db) +/// +/// 每条消息可能包含多篇文章(多图文推送)。返回所有文章展开就的平底列表。 +pub async fn q_biz_articles( + db: &DbCache, + names: &Names, + limit: usize, + account: Option, + since: Option, + until: Option, + unread: bool, +) -> Result { + let biz_path = db.get("message/biz_message_0.db").await? + .context("无法解密 biz_message_0.db,请确认 all_keys.json 包含对应密钥")? +; + + // 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集, + // 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。 + let unread_usernames: Option> = if unread { + let session_path = db.get("session/session.db").await? + .context("无法解密 session.db")?; + let session_path2 = session_path.clone(); + let unread_rows: Vec = tokio::task::spawn_blocking(move || { + let conn = Connection::open(&session_path2)?; + let mut stmt = conn.prepare( + "SELECT username FROM SessionTable WHERE unread_count > 0" + )?; + let rows: Vec = stmt.query_map([], |row| row.get::<_, String>(0))? + .filter_map(|r| r.ok()) + .collect(); + Ok::<_, anyhow::Error>(rows) + }).await??; + // 仅保留公众号类型的未读会话 + let set: std::collections::HashSet = unread_rows.into_iter() + .filter(|u| chat_type_of(u, names) == "official_account") + .collect(); + if set.is_empty() { + // 没有未读公众号 → 直接空返回,避免打 biz 表扫描 + return Ok(json!({ "count": 0, "articles": [] })); + } + Some(set) + } else { + None + }; + + // 1. 从 Name2Id 表获取 rowid -> username 映射,再推导 md5 -> username + let biz_path2 = biz_path.clone(); + let id2username: HashMap = tokio::task::spawn_blocking(move || { + let conn = Connection::open(&biz_path2)?; + let mut stmt = conn.prepare("SELECT rowid, user_name FROM Name2Id WHERE user_name LIKE 'gh_%'")? + ; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) + })? + .collect::>>()?; + Ok::<_, anyhow::Error>(rows.into_iter().collect()) + }).await??; + + // 构建 md5(username) -> username 映射 + let md5_to_uname: HashMap = id2username.values() + .map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone())) + .collect(); + + // 2. 如果 指定了 --account,找到匹配的 username 列表 + let account_low = account.as_deref().map(|s| s.to_lowercase()); + let mut target_usernames: Option> = account_low.as_ref().map(|low| { + id2username.values() + .filter(|u| { + let display = names.display(u); + display.to_lowercase().contains(low.as_str()) + || u.to_lowercase().contains(low.as_str()) + }) + .cloned() + .collect() + }); + + // --unread 与 --account 取交集(进一步缩小范围) + if let Some(ref unread_set) = unread_usernames { + target_usernames = Some(match target_usernames.take() { + Some(acc_list) => acc_list.into_iter() + .filter(|u| unread_set.contains(u)) + .collect(), + None => unread_set.iter().cloned().collect(), + }); + // 交集为空 → 提前返回 + if target_usernames.as_ref().map(|v| v.is_empty()).unwrap_or(false) { + return Ok(json!({ "count": 0, "articles": [] })); + } + } + + // 3. 进行数据库查询 + let biz_path3 = biz_path.clone(); + let since2 = since; + let until2 = until; + let target_hashes: Option> = target_usernames.as_ref().map(|unames| { + unames.iter() + .map(|u| format!("{:x}", md5::compute(u.as_bytes()))) + .collect() + }); + + let rows: Vec<(String, i64, i64, Vec, i64)> = tokio::task::spawn_blocking(move || { + let conn = Connection::open(&biz_path3)?; + + // 列出所有 Msg_ 表 + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'" + )?; + let table_names: Vec = stmt.query_map([], |row| row.get(0))? + .filter_map(|r| r.ok()) + .collect(); + + let re = regex::Regex::new(r"^Msg_[0-9a-f]{32}$").unwrap(); + let mut all_rows: Vec<(String, i64, i64, Vec, i64)> = Vec::new(); + + for tname in &table_names { + if !re.is_match(tname) { continue; } + let hash = &tname[4..]; + + // account 过滤 + if let Some(ref hashes) = target_hashes { + if !hashes.iter().any(|h| h == hash) { continue; } + } + + let username = md5_to_uname.get(hash).cloned().unwrap_or_default(); + + // 构建过滤条件 + let mut clauses: Vec = Vec::new(); + let mut params: Vec> = Vec::new(); + // local_type & 0xFFFFFFFF = 49 是 appmsg(公众号文章) + clauses.push("(local_type & 4294967295) = 49".to_string()); + if let Some(s) = since2 { + clauses.push("create_time >= ?".to_string()); + params.push(Box::new(s)); + } + if let Some(u) = until2 { + clauses.push("create_time <= ?".to_string()); + params.push(Box::new(u)); + } + let where_clause = format!("WHERE {}", clauses.join(" AND ")); + + let sql = format!( + "SELECT create_time, WCDB_CT_message_content, message_content \ + FROM [{}] {} ORDER BY create_time DESC", + tname, where_clause + ); + + let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + if let Ok(mut inner_stmt) = conn.prepare(&sql) { + let msg_rows: Vec<_> = inner_stmt + .query_map(params_ref.as_slice(), |row| { + Ok(( + username.clone(), + row.get::<_, i64>(0)?, + row.get::<_, i64>(1).unwrap_or(0), + get_content_bytes(row, 2), + 0i64, + )) + }) + .map(|it| it.filter_map(|r| r.ok()).collect()) + .unwrap_or_default(); + all_rows.extend(msg_rows); + } + } + Ok::<_, anyhow::Error>(all_rows) + }).await??; + + // 4. 解压并解析 XML + let mut articles: Vec = Vec::new(); + for (username, recv_time, ct, content_bytes, _) in rows { + let content = decompress_message(&content_bytes, ct); + if content.is_empty() { continue; } + let items = parse_biz_xml_items(recv_time, &username, &content); + articles.extend(items); + } + + // 5. 按 pub_time DESC 排序 + articles.sort_by_key(|a| std::cmp::Reverse(a.pub_time)); + + // --unread 语义 A:每个公众号只保留最新 1 篇(已按 pub_time 排序,取首条即可) + if unread { + let mut seen = std::collections::HashSet::::new(); + articles.retain(|a| seen.insert(a.account_username.clone())); + } + + articles.truncate(limit); + + let results: Vec = articles.into_iter().map(|a| { + let account_display = names.display(&a.account_username); + json!({ + "time": fmt_time(a.pub_time, "%Y-%m-%d %H:%M"), + "timestamp": a.pub_time, + "recv_time": a.recv_time, + "recv_time_str": fmt_time(a.recv_time, "%Y-%m-%d %H:%M"), + "account": account_display, + "account_username": a.account_username, + "title": a.title, + "url": a.url, + "digest": a.digest, + "cover_url": a.cover, + }) + }).collect(); + + Ok(json!({ "count": results.len(), "articles": results })) +} + +// ─── 附件(当前先支持图片)查询与提取 ───────────────────────────────── +// +// 设计要点: +// - `q_attachments` 只走 `Msg_` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤 +// 出附件消息行,再编出 `attachment_id`。**不**去翻 `message_resource.db`,因为列出动作 +// 要可枚举几千条;resource lookup 留到 `q_extract` 才做。 +// - `q_extract` 走完整链:`AttachmentId` → `message_resource.db` 查 md5 → +// `/msg/attach/...` 找 .dat → 按 magic 分发到 v1/v2 decoder → 写盘。 +// - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。 +// 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。 + +/// 列出某会话内的附件消息(当前仅 image)。返回每条的 `attachment_id`, +/// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。 +pub async fn q_attachments( + db: &DbCache, + names: &Names, + chat: &str, + kinds: Option>, + limit: usize, + offset: usize, + since: Option, + until: Option, +) -> Result { + use crate::attachment::{AttachmentId, AttachmentKind}; + + let username = resolve_username(chat, names) + .with_context(|| format!("找不到联系人: {}", chat))?; + let display = names.display(&username); + let chat_type = chat_type_of(&username, names); + let is_group = chat_type == "group"; + + // 解析 kinds → 低 32 bit local_type 集合 + let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?; + if kind_filters.is_empty() { + anyhow::bail!("kinds 为空 — 当前至少传一种 image"); + } + let lo32_types: Vec = kind_filters.iter().map(|(_, t)| *t).collect(); + // local_type → AttachmentKind 反查(mask 完后定 kind) + let type_to_kind: HashMap = kind_filters.iter() + .map(|(k, t)| (*t, *k)) + .collect(); + + let tables = find_msg_tables(db, names, &username).await?; + if tables.is_empty() { + anyhow::bail!("找不到 {} 的消息记录", display); + } + + // 群聊需要 sender 显示名 + let group_nicknames = if is_group { + load_group_nicknames(db, &username).await.unwrap_or_default() + } else { + HashMap::new() + }; + + let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new(); + // 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx) + for (db_idx, (db_path, table_name)) in tables.iter().enumerate() { + let path = db_path.clone(); + let tname = table_name.clone(); + let uname = username.clone(); + let is_group2 = is_group; + let names_map = names.map.clone(); + let group_nicknames2 = group_nicknames.clone(); + let lo32_types2 = lo32_types.clone(); + let since2 = since; + let until2 = until; + // per-DB 软上限避免巨群全量加载 + let per_db_cap = (offset + limit).max(limit) * 2; + let db_idx2 = db_idx as i64; + + let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = + tokio::task::spawn_blocking(move || { + let conn = Connection::open(&path)?; + let id2u = load_id2u(&conn); + + // local_type 在 DB 里可能带高位 flag,过滤要 mask 低 32 bit + let placeholders = lo32_types2.iter().map(|_| "?").collect::>().join(","); + let mut clauses: Vec = vec![ + format!("(local_type & 4294967295) IN ({})", placeholders), + ]; + let mut params: Vec> = lo32_types2.iter() + .map(|t| Box::new(*t) as Box) + .collect(); + if let Some(s) = since2 { + clauses.push("create_time >= ?".into()); + params.push(Box::new(s)); + } + if let Some(u) = until2 { + clauses.push("create_time <= ?".into()); + params.push(Box::new(u)); + } + let where_clause = format!("WHERE {}", clauses.join(" AND ")); + + let sql = format!( + "SELECT local_id, local_type, create_time, real_sender_id, + message_content, WCDB_CT_message_content + FROM [{}] {} ORDER BY create_time DESC LIMIT ?", + tname, where_clause + ); + params.push(Box::new(per_db_cap as i64)); + + let params_ref: Vec<&dyn rusqlite::types::ToSql> = + params.iter().map(|p| p.as_ref()).collect(); + let mut stmt = conn.prepare(&sql)?; + let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = stmt + .query_map(params_ref.as_slice(), |row| { + let local_id: i64 = row.get(0)?; + let raw_type: i64 = row.get(1)?; + let lo32 = (raw_type as u64 & 0xFFFFFFFF) as i64; + let ts: i64 = row.get(2)?; + let real_sender_id: i64 = row.get(3)?; + let content_bytes = get_content_bytes(row, 4); + let ct: i64 = row.get::<_, i64>(5).unwrap_or(0); + let content = decompress_message(&content_bytes, ct); + let sender = if is_group2 { + sender_label(real_sender_id, &content, true, &uname, + &id2u, &names_map, &group_nicknames2) + } else { + String::new() + }; + Ok((local_id, lo32, ts, real_sender_id, sender, ts, db_idx2)) + })? + .filter_map(|r| r.ok()) + .collect(); + Ok::<_, anyhow::Error>(rows) + }) + .await??; + all_rows.extend(rows); + } + + // 全局按 ts DESC 排序后分页 + all_rows.sort_by_key(|r| std::cmp::Reverse(r.5)); + let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect(); + + // 翻成 JSON + let mut results: Vec = Vec::with_capacity(paged.len()); + for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged { + let kind = type_to_kind.get(&lo32).copied() + .unwrap_or(AttachmentKind::Image); // 理论不会 fallthrough + let id = AttachmentId { + v: 1, + chat: username.clone(), + local_id, + create_time: ts, + kind, + db: None, + }; + let id_str = id.encode()?; + + let mut row = json!({ + "attachment_id": id_str, + "kind": kind.as_str(), + "type": fmt_type(lo32), + "local_id": local_id, + "timestamp": ts, + "time": fmt_time(ts, "%Y-%m-%d %H:%M"), + }); + if is_group && !sender.is_empty() { + row["sender"] = Value::String(sender); + } + results.push(row); + } + + Ok(json!({ + "chat": display, + "username": username, + "is_group": is_group, + "chat_type": chat_type, + "count": results.len(), + "attachments": results, + })) +} + +/// 解码 attachment_id → 查 message_resource.db → 找本地 .dat → 解密 → 写盘。 +pub async fn q_extract( + db: &DbCache, + _names: &Names, + attachment_id: &str, + output: &str, + overwrite: bool, +) -> Result { + use crate::attachment::{ + attachment_id::AttachmentId, + decoder::{self, V2KeyMaterial}, + image_key, + resolver, + }; + + let id = AttachmentId::decode(attachment_id) + .context("解析 attachment_id 失败(不是合法 base64url(json)?)")?; + + let output_path = std::path::PathBuf::from(output); + if output_path.exists() && !overwrite { + anyhow::bail!( + "目标已存在:{}(加 --overwrite 覆盖)", + output_path.display() + ); + } + if let Some(parent) = output_path.parent() { + if !parent.as_os_str().is_empty() { + tokio::fs::create_dir_all(parent).await + .with_context(|| format!("创建输出目录失败:{}", parent.display()))?; + } + } + + // 1) 拿 message_resource.db + let resource_path = db.get("message/message_resource.db").await? + .context("无法解密 message_resource.db(请确认 all_keys.json 包含该 DB 的密钥)")?; + + // 2) 推 wxchat_base = db_dir.parent(),再拼 attach_root + let wxchat_base = db.db_dir().parent() + .ok_or_else(|| anyhow::anyhow!("db_dir 没有 parent,无法推断 xwechat_files 根目录"))? + .to_path_buf(); + let attach_root = resolver::attach_root_for(&wxchat_base); + + // 3) blocking pool 跑 resolver + 读盘 + 解码 + let id_for_task = id.clone(); + let resource_path2 = resource_path.clone(); + let attach_root2 = attach_root.clone(); + let wxchat_base2 = wxchat_base.clone(); + let output_path2 = output_path.clone(); + + let report: Value = tokio::task::spawn_blocking(move || -> Result { + let resolved = resolver::resolve_blocking(&id_for_task, &resource_path2, &attach_root2)?; + + let dat_bytes = std::fs::read(&resolved.dat_path) + .with_context(|| format!("读取 .dat 失败:{}", resolved.dat_path.display()))?; + + // V2 image key — 平台相关。`ImageKeyMaterial` 同时给 aes_key + xor_key。 + // xor_key 不能硬编码 0x88:实测 macOS 真实账号上是 `uin & 0xff` 派生的(0xa2 等), + // 所以这里桥接时必须把 provider 的 xor_key 透传给 V2KeyMaterial。 + // 缺 key 时让 decoder 自己抛带诊断的错。 + let provider = image_key::default_provider(); + let key_material = if let Some(p) = provider.as_ref() { + // 从 wxchat_base 末段拿 wxid + let wxid = wxchat_base2.file_name() + .and_then(|s| s.to_str()) + .unwrap_or_default() + .to_string(); + if wxid.is_empty() { + None + } else { + match p.get_key(&wxid) { + Ok(km) => Some(km), + Err(e) => { + eprintln!("[extract] image key 提取失败 (wxid={}): {} — V2 文件将无法解码", wxid, e); + None + } + } + } + } else { + None + }; + let v2_key = match key_material.as_ref() { + Some(km) => V2KeyMaterial { aes_key: Some(&km.aes_key), xor_key: km.xor_key }, + None => V2KeyMaterial::default(), + }; + + let decoded = decoder::dispatch(&dat_bytes, v2_key)?; + + // 写盘 + std::fs::write(&output_path2, &decoded.data) + .with_context(|| format!("写出文件失败:{}", output_path2.display()))?; + + // 注意:不要在这里塞 `ok: true`。dispatch 会用 Response::ok(v) 包一层, + // Response 的 `data: Value` 字段是 #[serde(flatten)] 写出的,本 payload + // 的 `ok` 会和 Response 自带的 `ok` 在线上拼成两个同名 key,CLI 反序列化时 + // serde_json 直接报 "duplicate field",业务请求看上去像 daemon 解析失败。 + Ok(json!({ + "kind": id_for_task.kind.as_str(), + "md5": resolved.md5, + "dat_path": resolved.dat_path.display().to_string(), + "dat_size": resolved.size, + "output": output_path2.display().to_string(), + "output_size": decoded.data.len(), + "format": decoded.format, + "decoder": decoded.decoder, + })) + }).await??; + + Ok(report) +} + +/// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。 +/// 当前只支持 image;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。 +fn parse_attachment_kinds( + kinds: Option<&[String]>, +) -> Result> { + use crate::attachment::AttachmentKind; + let raw = kinds.unwrap_or(&[]); + if raw.is_empty() { + return Ok(vec![(AttachmentKind::Image, 3)]); + } + let mut out: Vec<(AttachmentKind, i64)> = Vec::with_capacity(raw.len()); + let mut seen = HashSet::<&'static str>::new(); + for k in raw { + let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() { + "image" | "img" => (AttachmentKind::Image, 3), + "voice" | "audio" | "video" | "file" => { + anyhow::bail!("当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通") + } + other => anyhow::bail!("未知附件类型:{}(当前仅支持 image)", other), + }; + if seen.insert(kind.as_str()) { + out.push((kind, t)); + } + } + Ok(out) +} + +#[cfg(test)] +mod biz_tests { + use super::*; + + #[test] + fn extract_cdata_normal() { + let xml = "<![CDATA[TencentResearch]]>"; + assert_eq!(extract_cdata(xml, "title"), Some("TencentResearch".into())); + } + + #[test] + fn extract_cdata_empty() { + let xml = ""; + assert_eq!(extract_cdata(xml, "cover"), None); + } + + #[test] + fn extract_cdata_url() { + let xml = ""; + let result = extract_cdata(xml, "url"); + assert!(result.is_some()); + let url = result.unwrap(); + assert!(url.starts_with("http://mp.weixin.qq.com")); + assert!(!url.contains("CDATA")); + } + + #[test] + fn extract_cdata_no_cdata_wrapper() { + let xml = "1700000000"; + assert_eq!(extract_cdata(xml, "pub_time"), Some("1700000000".into())); + } + + #[test] + fn parse_biz_xml_items_single_article() { + let xml = r#" + <![CDATA[Test Article Title]]> + + + + 1700000000 + "#; + + let items = parse_biz_xml_items(1699999999, "gh_test123", xml); + assert_eq!(items.len(), 1); + assert_eq!(items[0].title, "Test Article Title"); + assert_eq!(items[0].url, "http://mp.weixin.qq.com/s?test=1"); + assert_eq!(items[0].digest, "Test Digest"); + assert_eq!(items[0].pub_time, 1700000000); + assert_eq!(items[0].account_username, "gh_test123"); + } + + #[test] + fn parse_biz_xml_items_skips_no_url() { + let xml = r#" + <![CDATA[Has Title No URL]]> + + 1700000001 + "#; + let items = parse_biz_xml_items(1700000001, "gh_test", xml); + assert_eq!(items.len(), 0); + } + + #[test] + fn parse_biz_xml_items_multi_article() { + let xml = r#" + + <![CDATA[Article 1]]> + + 1700000010 + + + <![CDATA[Article 2]]> + + 1700000020 + + "#; + let items = parse_biz_xml_items(1700000000, "gh_multi", xml); + assert_eq!(items.len(), 2); + assert_eq!(items[0].title, "Article 1"); + assert_eq!(items[1].title, "Article 2"); + } + + #[test] + fn parse_biz_xml_items_pub_time_fallback() { + // When pub_time is missing, should fall back to recv_time + let xml = r#" + <![CDATA[No PubTime]]> + + "#; + let items = parse_biz_xml_items(1700000099, "gh_fallback", xml); + assert_eq!(items.len(), 1); + assert_eq!(items[0].pub_time, 1700000099); // falls back to recv_time + } +} + +#[cfg(test)] +mod group_nickname_tests { + use super::*; + + fn varint(mut value: u64) -> Vec { + let mut out = Vec::new(); + loop { + let mut byte = (value & 0x7f) as u8; + value >>= 7; + if value != 0 { + byte |= 0x80; + } + out.push(byte); + if value == 0 { + return out; + } + } + } + + fn len_field(field_no: u64, bytes: &[u8]) -> Vec { + let mut out = varint((field_no << 3) | 2); + out.extend(varint(bytes.len() as u64)); + out.extend(bytes); + out + } + + fn string_field(field_no: u64, value: &str) -> Vec { + len_field(field_no, value.as_bytes()) + } + + fn member_chunk(username: &str, group_nickname: &str) -> Vec { + let mut member = Vec::new(); + member.extend(string_field(1, username)); + member.extend(string_field(2, group_nickname)); + len_field(1, &member) + } + + #[test] + fn parses_group_nickname_member_chunks() { + let mut ext_buffer = Vec::new(); + ext_buffer.extend(member_chunk("wxid_alice", "Alice In Group")); + ext_buffer.extend(member_chunk("bob_123456", "Bob Card")); + + let nicknames = parse_group_nickname_map(&ext_buffer, None); + + assert_eq!( + nicknames.get("wxid_alice").map(String::as_str), + Some("Alice In Group") + ); + assert_eq!( + nicknames.get("bob_123456").map(String::as_str), + Some("Bob Card") + ); + } + + #[test] + fn target_filter_anchors_member_username_choice() { + let mut member = Vec::new(); + member.extend(string_field(3, "candidate_name")); + member.extend(string_field(4, "wxid_target")); + member.extend(string_field(2, "Target Card")); + let ext_buffer = len_field(1, &member); + let targets = HashSet::from(["wxid_target".to_string()]); + + let nicknames = parse_group_nickname_map(&ext_buffer, Some(&targets)); + + assert_eq!( + nicknames.get("wxid_target").map(String::as_str), + Some("Target Card") + ); + assert!(!nicknames.contains_key("candidate_name")); + } + + #[test] + fn group_top_senders_keeps_duplicate_display_names_separate() { + let sender_counts = HashMap::from([ + ("wxid_alice".to_string(), 7), + ("wxid_bob".to_string(), 3), + ]); + let names = HashMap::from([ + ("wxid_alice".to_string(), "Alice Contact".to_string()), + ("wxid_bob".to_string(), "Bob Contact".to_string()), + ]); + let group_nicknames = HashMap::from([ + ("wxid_alice".to_string(), "同名".to_string()), + ("wxid_bob".to_string(), "同名".to_string()), + ]); + + let top = group_top_senders(&sender_counts, &names, &group_nicknames, 10); + + assert_eq!(top.len(), 2); + assert_eq!(top[0]["sender"].as_str(), Some("同名")); + assert_eq!(top[0]["count"].as_i64(), Some(7)); + assert_eq!(top[1]["sender"].as_str(), Some("同名")); + assert_eq!(top[1]["count"].as_i64(), Some(3)); + } +} + #[cfg(test)] mod sns_tests { use super::*; @@ -2126,6 +3913,96 @@ mod sns_tests { assert_eq!(escape_like_pattern(""), ""); } + #[test] + fn extract_appmsg_url_unescapes_html_entities() { + let xml = concat!( + "", + "5", + "https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1") + ); + } + + #[test] + fn extract_appmsg_url_strips_group_prefix_and_cdata() { + let xml = concat!( + "wxid_sender:\n", + "", + "5", + "", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/x?a=1&b=2") + ); + } + + #[test] + fn extract_appmsg_url_falls_back_to_url1() { + let xml = concat!( + "", + "5", + "https://example.com/fallback", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/fallback") + ); + } + + #[test] + fn extract_appmsg_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://bizmsgmenu?msgmenucontent=foo", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_appmsg_url_ignores_refermsg() { + let xml = concat!( + "", + "57", + "https://example.com/nested", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_favorite_url_reads_link_tag() { + let xml = concat!( + "", + "5", + "", + "" + ); + assert_eq!( + extract_favorite_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=foo&mid=1") + ); + } + + #[test] + fn extract_favorite_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://favorites/item/1", + "" + ); + assert_eq!(extract_favorite_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") } diff --git a/src/daemon/server.rs b/src/daemon/server.rs index 52472b7..e640fb1 100644 --- a/src/daemon/server.rs +++ b/src/daemon/server.rs @@ -224,6 +224,27 @@ async fn dispatch( Err(e) => Response::err(e.to_string()), } } + ReloadConfig => { + Response::ok(serde_json::json!({ "reloading": true })) + } + BizArticles { limit, account, since, until, unread } => { + match query::q_biz_articles(db, &names_arc, limit, account, since, until, unread).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } + Attachments { chat, kinds, limit, offset, since, until } => { + match query::q_attachments(db, &names_arc, &chat, kinds, limit, offset, since, until).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } + Extract { attachment_id, output, overwrite } => { + match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite).await { + Ok(v) => Response::ok(v), + Err(e) => Response::err(e.to_string()), + } + } } } diff --git a/src/ipc.rs b/src/ipc.rs index 873e2d4..a4615eb 100644 --- a/src/ipc.rs +++ b/src/ipc.rs @@ -102,6 +102,21 @@ pub enum Request { #[serde(skip_serializing_if = "Option::is_none")] user: Option, }, + /// 查询公众号文章推送(biz_message_0.db) + BizArticles { + #[serde(default = "default_limit_50")] + limit: usize, + /// 公众号名称过滤(模糊匹配 display name,None = 全部) + #[serde(skip_serializing_if = "Option::is_none")] + account: Option, + #[serde(skip_serializing_if = "Option::is_none")] + since: Option, + #[serde(skip_serializing_if = "Option::is_none")] + until: Option, + /// 只看有未读消息的公众号,每个公众号取最新 1 篇 + #[serde(default)] + unread: bool, + }, /// 朋友圈全文搜索(匹配 contentDesc) SnsSearch { keyword: String, @@ -114,6 +129,34 @@ pub enum Request { #[serde(skip_serializing_if = "Option::is_none")] user: Option, }, + /// 重新加载配置和密钥(init --force 后 daemon 不会自动重读) + ReloadConfig, + /// 列出某个会话里的图片附件 + /// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体 + Attachments { + chat: String, + /// 类型过滤:当前仅支持 image + #[serde(default, skip_serializing_if = "Option::is_none")] + kinds: Option>, + #[serde(default = "default_limit_50")] + limit: usize, + #[serde(default)] + offset: usize, + #[serde(skip_serializing_if = "Option::is_none")] + since: Option, + #[serde(skip_serializing_if = "Option::is_none")] + until: Option, + }, + /// 提取(解密)单个附件的本体到指定路径 + Extract { + /// `Attachments` 返回的不透明 ID + attachment_id: String, + /// 写入的绝对路径(daemon 直接写盘,不经 socket 传 binary) + output: String, + /// 已存在时是否覆盖 + #[serde(default)] + overwrite: bool, + }, } diff --git a/src/main.rs b/src/main.rs index 6c3f9a2..e6385fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod crypto; mod scanner; mod daemon; mod cli; +mod attachment; fn main() { if std::env::var("WX_DAEMON_MODE").is_ok() { diff --git a/src/scanner/linux.rs b/src/scanner/linux.rs index ba6f97b..d6f4ee9 100644 --- a/src/scanner/linux.rs +++ b/src/scanner/linux.rs @@ -3,7 +3,7 @@ /// 通过 /proc//maps 枚举内存区域, /// 通过 /proc//mem 读取内存内容, /// 搜索 x'<64hex><32hex>' 格式的 SQLCipher 密钥 -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use std::io::{Read, Seek, SeekFrom}; use std::path::Path; diff --git a/src/scanner/windows.rs b/src/scanner/windows.rs index a6660cb..391ba33 100644 --- a/src/scanner/windows.rs +++ b/src/scanner/windows.rs @@ -5,19 +5,19 @@ /// - OpenProcess: 获取进程句柄(需要 PROCESS_VM_READ | PROCESS_QUERY_INFORMATION) /// - VirtualQueryEx: 枚举内存区域 /// - ReadProcessMemory: 读取内存内容 -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use std::path::Path; use windows::Win32::Foundation::{CloseHandle, HANDLE}; +use windows::Win32::System::Diagnostics::Debug::ReadProcessMemory; use windows::Win32::System::Diagnostics::ToolHelp::{ CreateToolhelp32Snapshot, Process32First, Process32Next, PROCESSENTRY32, TH32CS_SNAPPROCESS, }; use windows::Win32::System::Memory::{ - VirtualQueryEx, MEMORY_BASIC_INFORMATION, MEM_COMMIT, PAGE_READWRITE, + VirtualQueryEx, MEMORY_BASIC_INFORMATION, MEM_COMMIT, PAGE_EXECUTE_READWRITE, + PAGE_EXECUTE_WRITECOPY, PAGE_GUARD, PAGE_NOCACHE, PAGE_READWRITE, PAGE_WRITECOMBINE, + PAGE_WRITECOPY, }; -use windows::Win32::System::Threading::{ - OpenProcess, PROCESS_QUERY_INFORMATION, PROCESS_VM_READ, -}; -use windows::Win32::System::Diagnostics::Debug::ReadProcessMemory; +use windows::Win32::System::Threading::{OpenProcess, PROCESS_QUERY_INFORMATION, PROCESS_VM_READ}; use super::{collect_db_salts, KeyEntry}; @@ -27,9 +27,7 @@ const CHUNK_SIZE: usize = 2 * 1024 * 1024; /// 查找 Weixin.exe 进程 PID fn find_wechat_pid() -> Option { // SAFETY: CreateToolhelp32Snapshot 标准 Windows API - let snap = unsafe { - CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0).ok()? - }; + let snap = unsafe { CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0).ok()? }; let mut entry = PROCESSENTRY32 { dwSize: std::mem::size_of::() as u32, @@ -43,8 +41,8 @@ fn find_wechat_pid() -> Option { return None; } loop { - let name = std::ffi::CStr::from_ptr(entry.szExeFile.as_ptr() as *const i8) - .to_string_lossy(); + let name = + std::ffi::CStr::from_ptr(entry.szExeFile.as_ptr() as *const i8).to_string_lossy(); if name.eq_ignore_ascii_case("Weixin.exe") { let pid = entry.th32ProcessID; let _ = CloseHandle(snap); @@ -60,8 +58,7 @@ fn find_wechat_pid() -> Option { } pub fn scan_keys(db_dir: &Path) -> Result> { - let pid = find_wechat_pid() - .context("找不到 Weixin.exe 进程,请确认微信正在运行")?; + let pid = find_wechat_pid().context("找不到 Weixin.exe 进程,请确认微信正在运行")?; eprintln!("WeChat PID: {}", pid); // SAFETY: OpenProcess 请求读取权限 @@ -78,7 +75,9 @@ pub fn scan_keys(db_dir: &Path) -> Result> { eprintln!("找到 {} 个候选密钥", raw_keys.len()); // SAFETY: 关闭进程句柄 - unsafe { let _ = CloseHandle(process); } + unsafe { + let _ = CloseHandle(process); + } let mut entries = Vec::new(); for (key_hex, salt_hex) in &raw_keys { @@ -119,8 +118,9 @@ fn scan_memory(process: HANDLE) -> Result> { let region_size = mbi.RegionSize; let base = mbi.BaseAddress as usize; - // 只扫描已提交的可读写页面 - if mbi.State == MEM_COMMIT && mbi.Protect == PAGE_READWRITE { + // 只扫描已提交的可读可写页面。Windows 的保护位可能带 modifier bits, + // 也可能是 WRITECOPY / EXECUTE_READWRITE 这种同样可读可写的保护类型。 + if mbi.State == MEM_COMMIT && is_writable_readable_page(mbi.Protect.0) { scan_region(process, base, region_size, &mut results); } @@ -133,12 +133,18 @@ fn scan_memory(process: HANDLE) -> Result> { Ok(results) } -fn scan_region( - process: HANDLE, - base: usize, - size: usize, - results: &mut Vec<(String, String)>, -) { +fn is_writable_readable_page(protect: u32) -> bool { + let base = protect & !(PAGE_GUARD.0 | PAGE_NOCACHE.0 | PAGE_WRITECOMBINE.0); + matches!( + base, + x if x == PAGE_READWRITE.0 + || x == PAGE_WRITECOPY.0 + || x == PAGE_EXECUTE_READWRITE.0 + || x == PAGE_EXECUTE_WRITECOPY.0 + ) +} + +fn scan_region(process: HANDLE, base: usize, size: usize, results: &mut Vec<(String, String)>) { let overlap = HEX_PATTERN_LEN + 3; let mut offset = 0usize; @@ -159,7 +165,8 @@ fn scan_region( buf.as_mut_ptr() as *mut _, chunk_size, Some(&mut bytes_read), - ).is_ok() + ) + .is_ok() }; if ok && bytes_read > 0 { @@ -203,10 +210,8 @@ fn search_pattern(buf: &[u8], results: &mut Vec<(String, String)>) { i += 1; continue; } - let key_hex = String::from_utf8_lossy(&buf[hex_start..hex_start + 64]) - .to_lowercase(); - let salt_hex = String::from_utf8_lossy(&buf[hex_start + 64..hex_start + 96]) - .to_lowercase(); + let key_hex = String::from_utf8_lossy(&buf[hex_start..hex_start + 64]).to_lowercase(); + let salt_hex = String::from_utf8_lossy(&buf[hex_start + 64..hex_start + 96]).to_lowercase(); let is_dup = results.iter().any(|(k, s)| k == &key_hex && s == &salt_hex); if !is_dup { results.push((key_hex, salt_hex));