mirror of https://github.com/jackwener/wx-cli.git
review: tighten attachment extraction scope
parent
7feacc6371
commit
b63589b368
|
|
@ -211,14 +211,14 @@ wx biz-articles --json | jq '.[].url' # 下游消费 URL
|
||||||
|
|
||||||
每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。
|
每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。
|
||||||
|
|
||||||
### 附件提取(图片 / 视频 / 文件 / 语音)
|
### 附件提取(图片)
|
||||||
|
|
||||||
聊天里的附件本体存在 `xwechat_files/<wxid>/msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。
|
聊天里的附件本体存在 `xwechat_files/<wxid>/msg/attach/...` 下的 `.dat` 文件,需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 解码才能拿到原图。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 1) 列出会话里的附件,先拿到不透明的 attachment_id(默认 image,可多选)
|
# 1) 列出会话里的图片附件,先拿到不透明的 attachment_id
|
||||||
wx attachments "张三"
|
wx attachments "张三"
|
||||||
wx attachments "AI群" --kind image --kind video -n 100
|
wx attachments "AI群" --kind image -n 100
|
||||||
wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
|
wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
|
||||||
|
|
||||||
# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等)
|
# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等)
|
||||||
|
|
@ -226,7 +226,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||||
```
|
```
|
||||||
|
|
||||||
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。
|
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||||
|
|
||||||
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||||
|
|
||||||
|
|
|
||||||
10
SKILL.md
10
SKILL.md
|
|
@ -242,14 +242,14 @@ wx biz-articles --since 2026-05-10 --json | jq '.[].url'
|
||||||
|
|
||||||
每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。
|
每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。
|
||||||
|
|
||||||
### 附件提取(图片 / 视频 / 文件 / 语音)
|
### 附件提取(图片)
|
||||||
|
|
||||||
聊天里的图片/视频/文件本体在 `xwechat_files/<wxid>/msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走:
|
聊天里的图片本体在 `xwechat_files/<wxid>/msg/attach/...` 下加密存储(`.dat`),需要按消息所在 `message_resource.db` 的 md5 + 平台相关 image key 才能解码。两步走:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 1) 先列出附件,拿到不透明的 attachment_id(默认 image,可多选)
|
# 1) 先列出图片附件,拿到不透明的 attachment_id
|
||||||
wx attachments "张三"
|
wx attachments "张三"
|
||||||
wx attachments "AI群" --kind image --kind video -n 100
|
wx attachments "AI群" --kind image -n 100
|
||||||
wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
|
wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
|
||||||
|
|
||||||
# 2) 用 attachment_id 把单个资源解密写到指定路径
|
# 2) 用 attachment_id 把单个资源解密写到指定路径
|
||||||
|
|
@ -257,7 +257,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||||
```
|
```
|
||||||
|
|
||||||
`attachments` 输出每条带:`attachment_id` / `kind`(image/voice/video/file)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。
|
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||||
|
|
||||||
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ pub fn lookup_md5_blocking(
|
||||||
resource_db_path: &Path,
|
resource_db_path: &Path,
|
||||||
chat: &str,
|
chat: &str,
|
||||||
local_id: i64,
|
local_id: i64,
|
||||||
|
create_time: i64,
|
||||||
msg_local_type_lo32: i64,
|
msg_local_type_lo32: i64,
|
||||||
) -> Result<Option<AttachmentMetadata>> {
|
) -> Result<Option<AttachmentMetadata>> {
|
||||||
let conn = Connection::open_with_flags(
|
let conn = Connection::open_with_flags(
|
||||||
|
|
@ -68,9 +69,25 @@ pub fn lookup_md5_blocking(
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
|
|
||||||
// 2) MessageResourceInfo: 同 chat 内 local_id 也会复用,按 create_time DESC 取最新
|
// 2) MessageResourceInfo:
|
||||||
|
// 同 chat 内 local_id 会复用,所以先用 create_time 精确命中;
|
||||||
|
// 若资源库里的时间戳跟 message_N.db 不完全对齐,再 fallback 到“同 local_id/type 取最新”
|
||||||
// message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型
|
// message_local_type 高 32 bit 是版本/会话 flag,低 32 bit 才是真实类型
|
||||||
let packed: Option<Vec<u8>> = conn
|
let packed_exact: Option<Vec<u8>> = conn
|
||||||
|
.query_row(
|
||||||
|
"SELECT packed_info FROM MessageResourceInfo
|
||||||
|
WHERE chat_id = ?1
|
||||||
|
AND message_local_id = ?2
|
||||||
|
AND (message_local_type = ?3 OR message_local_type % 4294967296 = ?3)
|
||||||
|
AND message_create_time = ?4
|
||||||
|
ORDER BY rowid DESC
|
||||||
|
LIMIT 1",
|
||||||
|
rusqlite::params![chat_id, local_id, msg_local_type_lo32, create_time],
|
||||||
|
|row| row.get(0),
|
||||||
|
)
|
||||||
|
.ok();
|
||||||
|
|
||||||
|
let packed: Option<Vec<u8>> = packed_exact.or_else(|| conn
|
||||||
.query_row(
|
.query_row(
|
||||||
"SELECT packed_info FROM MessageResourceInfo
|
"SELECT packed_info FROM MessageResourceInfo
|
||||||
WHERE chat_id = ?1
|
WHERE chat_id = ?1
|
||||||
|
|
@ -81,7 +98,7 @@ pub fn lookup_md5_blocking(
|
||||||
rusqlite::params![chat_id, local_id, msg_local_type_lo32],
|
rusqlite::params![chat_id, local_id, msg_local_type_lo32],
|
||||||
|row| row.get(0),
|
|row| row.get(0),
|
||||||
)
|
)
|
||||||
.ok();
|
.ok());
|
||||||
|
|
||||||
let Some(blob) = packed else {
|
let Some(blob) = packed else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
|
|
@ -235,7 +252,13 @@ pub fn resolve_blocking(
|
||||||
super::AttachmentKind::File => 49,
|
super::AttachmentKind::File => 49,
|
||||||
};
|
};
|
||||||
|
|
||||||
let meta = lookup_md5_blocking(resource_db_path, &id.chat, id.local_id, lo32_type)?
|
let meta = lookup_md5_blocking(
|
||||||
|
resource_db_path,
|
||||||
|
&id.chat,
|
||||||
|
id.local_id,
|
||||||
|
id.create_time,
|
||||||
|
lo32_type,
|
||||||
|
)?
|
||||||
.ok_or_else(|| {
|
.ok_or_else(|| {
|
||||||
anyhow!(
|
anyhow!(
|
||||||
"message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)",
|
"message_resource.db 中找不到 chat={} local_id={} type={} 的资源行(可能是非附件消息或资源库未同步)",
|
||||||
|
|
@ -306,6 +329,69 @@ mod tests {
|
||||||
assert!(extract_md5_from_packed_info(&blob).is_none());
|
assert!(extract_md5_from_packed_info(&blob).is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lookup_md5_prefers_exact_create_time_over_latest_reuse() {
|
||||||
|
let dir = tempdir_for_test();
|
||||||
|
let db_path = dir.join("message_resource.db");
|
||||||
|
let conn = Connection::open(&db_path).unwrap();
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE ChatName2Id (user_name TEXT)",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO ChatName2Id (rowid, user_name) VALUES (1, 'room@chatroom')",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE MessageResourceInfo (
|
||||||
|
chat_id INTEGER,
|
||||||
|
message_local_id INTEGER,
|
||||||
|
message_local_type INTEGER,
|
||||||
|
message_create_time INTEGER,
|
||||||
|
packed_info BLOB
|
||||||
|
)",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let old_blob = {
|
||||||
|
let mut blob = vec![0x12, 0x22, 0x0A, 0x20];
|
||||||
|
blob.extend_from_slice(b"11111111111111111111111111111111");
|
||||||
|
blob
|
||||||
|
};
|
||||||
|
let new_blob = {
|
||||||
|
let mut blob = vec![0x12, 0x22, 0x0A, 0x20];
|
||||||
|
blob.extend_from_slice(b"22222222222222222222222222222222");
|
||||||
|
blob
|
||||||
|
};
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO MessageResourceInfo
|
||||||
|
(chat_id, message_local_id, message_local_type, message_create_time, packed_info)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5)",
|
||||||
|
rusqlite::params![1i64, 7i64, 3i64, 1000i64, old_blob],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO MessageResourceInfo
|
||||||
|
(chat_id, message_local_id, message_local_type, message_create_time, packed_info)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5)",
|
||||||
|
rusqlite::params![1i64, 7i64, 3i64, 2000i64, new_blob],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let old = lookup_md5_blocking(&db_path, "room@chatroom", 7, 1000, 3)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
let new = lookup_md5_blocking(&db_path, "room@chatroom", 7, 2000, 3)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(old.md5, "11111111111111111111111111111111");
|
||||||
|
assert_eq!(new.md5, "22222222222222222222222222222222");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn three_month_candidates_includes_prev_curr_next() {
|
fn three_month_candidates_includes_prev_curr_next() {
|
||||||
// 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09
|
// 2025-08-15 (mid-month) → 2025-07, 2025-08, 2025-09
|
||||||
|
|
|
||||||
|
|
@ -264,13 +264,13 @@ enum Commands {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
json: bool,
|
json: bool,
|
||||||
},
|
},
|
||||||
/// 列出某会话的附件(图片 / 视频 / 文件 / 语音),返回不透明 attachment_id
|
/// 列出某会话的图片附件,返回不透明 attachment_id
|
||||||
Attachments {
|
Attachments {
|
||||||
/// 会话名称(联系人显示名 / wxid / @chatroom username 都可以)
|
/// 会话名称(联系人显示名 / wxid / @chatroom username 都可以)
|
||||||
chat: String,
|
chat: String,
|
||||||
/// 类型(多选,默认 image)。可选:image / voice / video / file
|
/// 类型(当前仅支持 image)
|
||||||
#[arg(long = "kind", value_name = "KIND",
|
#[arg(long = "kind", value_name = "KIND",
|
||||||
value_parser = ["image", "voice", "video", "file", "audio", "img"])]
|
value_parser = ["image", "img"])]
|
||||||
kinds: Vec<String>,
|
kinds: Vec<String>,
|
||||||
/// 显示数量
|
/// 显示数量
|
||||||
#[arg(short = 'n', long, default_value = "50")]
|
#[arg(short = 'n', long, default_value = "50")]
|
||||||
|
|
|
||||||
|
|
@ -3285,7 +3285,7 @@ pub async fn q_biz_articles(
|
||||||
Ok(json!({ "count": results.len(), "articles": results }))
|
Ok(json!({ "count": results.len(), "articles": results }))
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── 附件(图片 / 视频 / 文件 / 语音)查询与提取 ─────────────────────────────────
|
// ─── 附件(当前先支持图片)查询与提取 ─────────────────────────────────
|
||||||
//
|
//
|
||||||
// 设计要点:
|
// 设计要点:
|
||||||
// - `q_attachments` 只走 `Msg_<chat_md5>` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤
|
// - `q_attachments` 只走 `Msg_<chat_md5>` 表,按 `local_type & 0xFFFFFFFF IN (...)` 过滤
|
||||||
|
|
@ -3296,7 +3296,7 @@ pub async fn q_biz_articles(
|
||||||
// - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。
|
// - V2 image AES key 通过 `image_key::default_provider()` 拿(codex 后续填实现)。
|
||||||
// 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。
|
// 缺 key 时 V2 解码会返回明确错误,CLI 直接抛给用户。
|
||||||
|
|
||||||
/// 列出某会话内的附件消息(默认 image,可多选)。返回每条的 `attachment_id`,
|
/// 列出某会话内的附件消息(当前仅 image)。返回每条的 `attachment_id`,
|
||||||
/// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。
|
/// 后续传给 `Extract` 才真正读 message_resource.db + 解密 .dat。
|
||||||
pub async fn q_attachments(
|
pub async fn q_attachments(
|
||||||
db: &DbCache,
|
db: &DbCache,
|
||||||
|
|
@ -3319,7 +3319,7 @@ pub async fn q_attachments(
|
||||||
// 解析 kinds → 低 32 bit local_type 集合
|
// 解析 kinds → 低 32 bit local_type 集合
|
||||||
let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?;
|
let kind_filters: Vec<(AttachmentKind, i64)> = parse_attachment_kinds(kinds.as_deref())?;
|
||||||
if kind_filters.is_empty() {
|
if kind_filters.is_empty() {
|
||||||
anyhow::bail!("kinds 为空 — 至少传一种 image/video/file/voice");
|
anyhow::bail!("kinds 为空 — 当前至少传一种 image");
|
||||||
}
|
}
|
||||||
let lo32_types: Vec<i64> = kind_filters.iter().map(|(_, t)| *t).collect();
|
let lo32_types: Vec<i64> = kind_filters.iter().map(|(_, t)| *t).collect();
|
||||||
// local_type → AttachmentKind 反查(mask 完后定 kind)
|
// local_type → AttachmentKind 反查(mask 完后定 kind)
|
||||||
|
|
@ -3569,7 +3569,7 @@ pub async fn q_extract(
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。
|
/// 解析 `kinds` 参数到 `(AttachmentKind, lo32_local_type)` 列表。
|
||||||
/// 缺省(None / 空)按 image 处理。
|
/// 当前只支持 image;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||||
fn parse_attachment_kinds(
|
fn parse_attachment_kinds(
|
||||||
kinds: Option<&[String]>,
|
kinds: Option<&[String]>,
|
||||||
) -> Result<Vec<(crate::attachment::AttachmentKind, i64)>> {
|
) -> Result<Vec<(crate::attachment::AttachmentKind, i64)>> {
|
||||||
|
|
@ -3583,10 +3583,10 @@ fn parse_attachment_kinds(
|
||||||
for k in raw {
|
for k in raw {
|
||||||
let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() {
|
let (kind, t): (AttachmentKind, i64) = match k.to_ascii_lowercase().as_str() {
|
||||||
"image" | "img" => (AttachmentKind::Image, 3),
|
"image" | "img" => (AttachmentKind::Image, 3),
|
||||||
"voice" | "audio" => (AttachmentKind::Voice, 34),
|
"voice" | "audio" | "video" | "file" => {
|
||||||
"video" => (AttachmentKind::Video, 43),
|
anyhow::bail!("当前只支持 image 提取;video/file/voice 的资源路径与 decoder 还没接通")
|
||||||
"file" => (AttachmentKind::File, 49),
|
}
|
||||||
other => anyhow::bail!("未知附件类型:{}(支持 image/voice/video/file)", other),
|
other => anyhow::bail!("未知附件类型:{}(当前仅支持 image)", other),
|
||||||
};
|
};
|
||||||
if seen.insert(kind.as_str()) {
|
if seen.insert(kind.as_str()) {
|
||||||
out.push((kind, t));
|
out.push((kind, t));
|
||||||
|
|
|
||||||
|
|
@ -131,11 +131,11 @@ pub enum Request {
|
||||||
},
|
},
|
||||||
/// 重新加载配置和密钥(init --force 后 daemon 不会自动重读)
|
/// 重新加载配置和密钥(init --force 后 daemon 不会自动重读)
|
||||||
ReloadConfig,
|
ReloadConfig,
|
||||||
/// 列出某个会话里的附件(图片 / 视频 / 文件 / 语音)
|
/// 列出某个会话里的图片附件
|
||||||
/// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体
|
/// 输出每条带 `attachment_id`(不透明 base64url 句柄),传给 `Extract` 时取回本体
|
||||||
Attachments {
|
Attachments {
|
||||||
chat: String,
|
chat: String,
|
||||||
/// 类型过滤:image / video / file / voice,多选;缺省返回 image
|
/// 类型过滤:当前仅支持 image
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
kinds: Option<Vec<String>>,
|
kinds: Option<Vec<String>>,
|
||||||
#[serde(default = "default_limit_50")]
|
#[serde(default = "default_limit_50")]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue