feat(appmsg): 支持引用消息原文解析 (#28)

* feat(appmsg): parse quoted message content

* docs(appmsg): document quote message output
pull/18/head
Haoqing Wang 2026-05-14 14:42:03 +08:00 committed by GitHub
parent 35a8f0e94b
commit b0431352ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 317 additions and 28 deletions

View File

@ -158,6 +158,15 @@ wx search "会议" --in "工作群" --since 2026-01-01
群聊里的 `last_sender`、`sender` 和 `stats``top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。 群聊里的 `last_sender`、`sender` 和 `stats``top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。
引用消息会在 `history` / `search` / `new-messages` 输出中显示当前回复和被引用原文:
```text
[引用] 当前回复
↳ 发送者: 被引用内容
```
`--type link` / `--type file` 会包含微信 appmsg 里的链接、文件、合并聊天记录和引用消息等变体;搜索时也会匹配解压后可见的引用原文。
### 朋友圈SNS ### 朋友圈SNS
三个独立命令,区分"通知"和"帖子" 三个独立命令,区分"通知"和"帖子"

View File

@ -140,6 +140,15 @@ wx search "会议" --in "工作群" --since 2026-01-01
群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。 群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。
引用消息appmsg `type=57`)在 `history` / `search` / `new-messages` 输出里会展开为两行:第一行是当前回复,第二行以 `↳` 开头显示被引用原文,例如:
```text
[引用] 当前回复
↳ 发送者: 被引用内容
```
`--type link` / `--type file` 会覆盖微信 appmsg 的链接、文件、合并聊天记录和引用消息等变体;`search --type link` 也会匹配解压并格式化后的引用原文。
### 联系人与群组 ### 联系人与群组
```bash ```bash

View File

@ -500,19 +500,18 @@ fn query_messages(
let conn = Connection::open(db_path)?; let conn = Connection::open(db_path)?;
let id2u = load_id2u(&conn); let id2u = load_id2u(&conn);
let mut clauses = Vec::new(); let mut clauses: Vec<String> = Vec::new();
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new(); let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
if let Some(s) = since { if let Some(s) = since {
clauses.push("create_time >= ?"); clauses.push("create_time >= ?".into());
params.push(Box::new(s)); params.push(Box::new(s));
} }
if let Some(u) = until { if let Some(u) = until {
clauses.push("create_time <= ?"); clauses.push("create_time <= ?".into());
params.push(Box::new(u)); params.push(Box::new(u));
} }
if let Some(t) = msg_type { if let Some(t) = msg_type {
clauses.push("local_type = ?"); push_msg_type_filter(&mut clauses, &mut params, t);
params.push(Box::new(t));
} }
let where_clause = if clauses.is_empty() { let where_clause = if clauses.is_empty() {
String::new() String::new()
@ -579,8 +578,14 @@ fn search_in_table(
let id2u = load_id2u(conn); let id2u = load_id2u(conn);
// 转义 LIKE 通配符,使用 '\' 作为 ESCAPE 字符 // 转义 LIKE 通配符,使用 '\' 作为 ESCAPE 字符
let escaped_kw = keyword.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_"); let escaped_kw = keyword.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_");
let mut clauses = vec!["message_content LIKE ? ESCAPE '\\'".to_string()]; let search_decoded_content = msg_type == Some(49);
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(format!("%{}%", escaped_kw))]; let keyword_lower = keyword.to_lowercase();
let mut clauses: Vec<String> = Vec::new();
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
if !search_decoded_content {
clauses.push("message_content LIKE ? ESCAPE '\\'".to_string());
params.push(Box::new(format!("%{}%", escaped_kw)));
}
if let Some(s) = since { if let Some(s) = since {
clauses.push("create_time >= ?".into()); clauses.push("create_time >= ?".into());
params.push(Box::new(s)); params.push(Box::new(s));
@ -590,17 +595,23 @@ fn search_in_table(
params.push(Box::new(u)); params.push(Box::new(u));
} }
if let Some(t) = msg_type { if let Some(t) = msg_type {
clauses.push("local_type = ?".into()); push_msg_type_filter(&mut clauses, &mut params, t);
params.push(Box::new(t));
} }
let where_clause = format!("WHERE {}", clauses.join(" AND ")); let where_clause = if clauses.is_empty() {
String::new()
} else {
format!("WHERE {}", clauses.join(" AND "))
};
let limit_clause = if search_decoded_content { "" } else { " LIMIT ?" };
let sql = format!( let sql = format!(
"SELECT local_id, local_type, create_time, real_sender_id, "SELECT local_id, local_type, create_time, real_sender_id,
message_content, WCDB_CT_message_content message_content, WCDB_CT_message_content
FROM [{}] {} ORDER BY create_time DESC LIMIT ?", FROM [{}] {} ORDER BY create_time DESC{}",
table, where_clause table, where_clause, limit_clause
); );
params.push(Box::new(limit as i64)); if !search_decoded_content {
params.push(Box::new(limit as i64));
}
let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?; let mut stmt = conn.prepare(&sql)?;
@ -622,6 +633,9 @@ fn search_in_table(
let content = decompress_message(&content_bytes, ct); let content = decompress_message(&content_bytes, ct);
let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames);
let text = fmt_content(local_id, local_type, &content, is_group); let text = fmt_content(local_id, local_type, &content, is_group);
if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) {
continue;
}
result.push(json!({ result.push(json!({
"timestamp": ts, "timestamp": ts,
@ -631,10 +645,32 @@ fn search_in_table(
"content": text, "content": text,
"type": fmt_type(local_type), "type": fmt_type(local_type),
})); }));
if search_decoded_content && result.len() >= limit {
break;
}
} }
Ok(result) Ok(result)
} }
fn push_msg_type_filter(
clauses: &mut Vec<String>,
params: &mut Vec<Box<dyn rusqlite::types::ToSql>>,
msg_type: i64,
) {
clauses.push("(local_type & 4294967295) = ?".into());
params.push(Box::new(msg_type));
}
fn matches_search_text(raw: &str, formatted: &str, keyword: &str, keyword_lower: &str) -> bool {
contains_search_text(raw, keyword, keyword_lower)
|| contains_search_text(formatted, keyword, keyword_lower)
}
fn contains_search_text(haystack: &str, keyword: &str, keyword_lower: &str) -> bool {
haystack.contains(keyword)
|| (!keyword_lower.is_empty() && haystack.to_lowercase().contains(keyword_lower))
}
fn load_id2u(conn: &Connection) -> HashMap<i64, String> { fn load_id2u(conn: &Connection) -> HashMap<i64, String> {
let mut map = HashMap::new(); let mut map = HashMap::new();
if let Ok(mut stmt) = conn.prepare("SELECT rowid, user_name FROM Name2Id") { if let Ok(mut stmt) = conn.prepare("SELECT rowid, user_name FROM Name2Id") {
@ -1163,21 +1199,8 @@ fn parse_appmsg(text: &str) -> Option<String> {
match atype.as_str() { match atype.as_str() {
"6" => Some(if !title.is_empty() { format!("[文件] {}", title) } else { "[文件]".into() }), "6" => Some(if !title.is_empty() { format!("[文件] {}", title) } else { "[文件]".into() }),
"57" => { "57" => {
let ref_content = extract_xml_text(text, "content") let ref_content = quote_refermsg_content(text)
.map(|s| { .or_else(|| extract_xml_text(text, "content").and_then(|s| quote_content_text(&s, 40)))
// content 可能是 HTML 转义的 XML被引用的消息是 appmsg 时)
let unescaped = unescape_html(&s);
// 如果解转义后是 XML尝试递归解析
if unescaped.contains("<appmsg") {
if let Some(parsed) = parse_appmsg(&unescaped) {
return parsed;
}
}
let s: String = unescaped.split_whitespace().collect::<Vec<_>>().join(" ");
if s.chars().count() > 40 {
format!("{}...", s.chars().take(40).collect::<String>())
} else { s }
})
.unwrap_or_default(); .unwrap_or_default();
let quote = if !title.is_empty() { format!("[引用] {}", title) } else { "[引用]".into() }; let quote = if !title.is_empty() { format!("[引用] {}", title) } else { "[引用]".into() };
if !ref_content.is_empty() { if !ref_content.is_empty() {
@ -1191,6 +1214,56 @@ fn parse_appmsg(text: &str) -> Option<String> {
} }
} }
fn quote_refermsg_content(text: &str) -> Option<String> {
let refer = extract_xml_text(text, "refermsg")?;
let content = extract_xml_text(&refer, "content")
.and_then(|s| quote_content_text(&s, 80))
.or_else(|| {
extract_xml_text(&refer, "type")
.and_then(|t| quote_refermsg_type_label(&t).map(str::to_string))
})?;
match extract_xml_text(&refer, "displayname") {
Some(name) if !name.is_empty() => Some(format!("{}: {}", name, content)),
_ => Some(content),
}
}
fn quote_content_text(raw: &str, max_chars: usize) -> Option<String> {
let unescaped = unescape_html(raw);
if unescaped.contains("<appmsg") {
if let Some(parsed) = parse_appmsg(&unescaped) {
return Some(parsed);
}
}
let collapsed = collapse_text(&unescaped, max_chars);
if collapsed.is_empty() {
None
} else {
Some(collapsed)
}
}
fn quote_refermsg_type_label(t: &str) -> Option<&'static str> {
match t {
"1" => None,
"3" => Some("[图片]"),
"34" => Some("[语音]"),
"43" => Some("[视频]"),
"47" => Some("[表情]"),
"49" => Some("[链接/文件]"),
_ => None,
}
}
fn collapse_text(text: &str, max_chars: usize) -> String {
let collapsed = text.split_whitespace().collect::<Vec<_>>().join(" ");
if collapsed.chars().count() > max_chars {
format!("{}...", collapsed.chars().take(max_chars).collect::<String>())
} else {
collapsed
}
}
fn extract_xml_text(xml: &str, tag: &str) -> Option<String> { fn extract_xml_text(xml: &str, tag: &str) -> Option<String> {
let open = format!("<{}>", tag); let open = format!("<{}>", tag);
let close = format!("</{}>", tag); let close = format!("</{}>", tag);
@ -1223,6 +1296,204 @@ fn unescape_html(s: &str) -> String {
.replace("&apos;", "'") .replace("&apos;", "'")
} }
#[cfg(test)]
mod appmsg_tests {
use super::*;
#[test]
fn parse_quote_appmsg_reads_refermsg_content() {
let xml = r#"
<msg>
<appmsg appid="" sdkver="0">
<title>ai</title>
<type>57</type>
<content />
<refermsg>
<type>1</type>
<displayname></displayname>
<content> claude </content>
</refermsg>
</appmsg>
</msg>
"#;
assert_eq!(
parse_appmsg(xml).as_deref(),
Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着")
);
}
#[test]
fn query_messages_filters_appmsg_by_base_type() {
let path = temp_db_path("query_messages_filters_appmsg_by_base_type");
{
let conn = Connection::open(&path).expect("open temp db");
conn.execute(
"CREATE TABLE Msg_test (
local_id INTEGER,
local_type INTEGER,
create_time INTEGER,
real_sender_id INTEGER,
message_content TEXT,
WCDB_CT_message_content INTEGER
)",
[],
)
.expect("create message table");
conn.execute(
"INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
rusqlite::params![
1_i64,
((57_i64) << 32) | 49_i64,
1775146911_i64,
0_i64,
r#"<msg><appmsg><title>我也没有用ai啊</title><type>57</type><content /><refermsg><displayname>不再熬夜</displayname><content>昨天用 claude 爬小红书数据来着</content></refermsg></appmsg></msg>"#,
0_i64
],
)
.expect("insert quote message");
}
let rows = query_messages(
&path,
"Msg_test",
"wxid_r605h38n08mv22",
false,
&HashMap::new(),
&HashMap::new(),
None,
None,
Some(49),
10,
0,
)
.expect("query messages");
let _ = std::fs::remove_file(&path);
assert_eq!(rows.len(), 1);
assert_eq!(
rows[0]["content"].as_str(),
Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着")
);
}
#[test]
fn search_in_table_filters_appmsg_by_base_type() {
let conn = Connection::open_in_memory().expect("open in-memory db");
conn.execute(
"CREATE TABLE Msg_test (
local_id INTEGER,
local_type INTEGER,
create_time INTEGER,
real_sender_id INTEGER,
message_content TEXT,
WCDB_CT_message_content INTEGER
)",
[],
)
.expect("create message table");
conn.execute(
"INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
rusqlite::params![
1_i64,
((57_i64) << 32) | 49_i64,
1775146911_i64,
0_i64,
r#"<msg><appmsg><title>我也没有用ai啊</title><type>57</type><content /><refermsg><displayname>不再熬夜</displayname><content>昨天用 claude 爬小红书数据来着</content></refermsg></appmsg></msg>"#,
0_i64
],
)
.expect("insert quote message");
let rows = search_in_table(
&conn,
"Msg_test",
"wxid_r605h38n08mv22",
false,
&HashMap::new(),
&HashMap::new(),
"claude",
None,
None,
Some(49),
10,
)
.expect("search messages");
assert_eq!(rows.len(), 1);
assert_eq!(
rows[0]["content"].as_str(),
Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着")
);
}
#[test]
fn search_in_table_matches_decompressed_formatted_appmsg_content() {
let conn = Connection::open_in_memory().expect("open in-memory db");
conn.execute(
"CREATE TABLE Msg_test (
local_id INTEGER,
local_type INTEGER,
create_time INTEGER,
real_sender_id INTEGER,
message_content BLOB,
WCDB_CT_message_content INTEGER
)",
[],
)
.expect("create message table");
let xml = r#"<msg><appmsg><title>我也没有用ai啊</title><type>57</type><content /><refermsg><displayname>不再熬夜</displayname><content>昨天用 claude 爬小红书数据来着</content></refermsg></appmsg></msg>"#;
let compressed = zstd::encode_all(xml.as_bytes(), 0).expect("compress appmsg xml");
conn.execute(
"INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
rusqlite::params![
1_i64,
((57_i64) << 32) | 49_i64,
1775146911_i64,
0_i64,
compressed,
4_i64
],
)
.expect("insert compressed quote message");
let rows = search_in_table(
&conn,
"Msg_test",
"wxid_r605h38n08mv22",
false,
&HashMap::new(),
&HashMap::new(),
"claude",
None,
None,
Some(49),
10,
)
.expect("search messages");
assert_eq!(rows.len(), 1);
assert_eq!(
rows[0]["content"].as_str(),
Some("[引用] 我也没有用ai啊\n \u{21b3} 不再熬夜: 昨天用 claude 爬小红书数据来着")
);
}
fn temp_db_path(name: &str) -> std::path::PathBuf {
let unique = format!(
"wx-cli-{}-{}-{}.db",
name,
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.expect("clock before unix epoch")
.as_nanos()
);
std::env::temp_dir().join(unique)
}
}
fn fmt_time(ts: i64, fmt: &str) -> String { fn fmt_time(ts: i64, fmt: &str) -> String {
Local.timestamp_opt(ts, 0) Local.timestamp_opt(ts, 0)
.single() .single()