From 1b00d04598017392994fc09ea1009a7de4b5c990 Mon Sep 17 00:00:00 2001 From: Tsing Date: Thu, 14 May 2026 14:46:34 +0800 Subject: [PATCH] feat: expose url field for link/appmsg messages (#18) * feat: expose url field for link/appmsg messages Extract from appmsg XML in type-49 messages and append it as a 'url' field in history/search output. The field is omitted when the message has no valid URL (non-link types, empty, non-http). * fix: normalize appmsg urls across query outputs --------- Co-authored-by: tsinghu Co-authored-by: jackwener --- src/daemon/query.rs | 123 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 6 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 0c4b106..8aa14c1 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -549,15 +549,20 @@ fn query_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames); let text = fmt_content(local_id, local_type, &content, is_group); + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "sender": sender, "content": text, "type": fmt_type(local_type), "local_id": local_id, - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok(result) } @@ -636,15 +641,20 @@ fn search_in_table( if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) { continue; } + let url = appmsg_url_for_message(local_type, &content); - result.push(json!({ + let mut msg = json!({ "timestamp": ts, "time": fmt_time(ts, "%Y-%m-%d %H:%M"), "chat": "", "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); if search_decoded_content && result.len() >= limit { break; } @@ -1273,6 +1283,37 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { + if (local_type as u64 & 0xFFFFFFFF) != 49 { + return None; + } + extract_appmsg_url(content) +} + +fn strip_xml_cdata(s: &str) -> &str { + s.strip_prefix("")) + .unwrap_or(s) +} + +/// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) +fn extract_appmsg_url(text: &str) -> Option { + let xml = strip_group_prefix(text); + if !xml.contains(" Option { let open = format!("<{}", tag); let start = xml.find(&open)?; @@ -1906,7 +1947,8 @@ pub async fn q_new_messages( let content = decompress_message(&content_bytes, ct); let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2); let text = fmt_content(local_id, local_type, &content, is_group); - result.push(json!({ + let url = appmsg_url_for_message(local_type, &content); + let mut msg = json!({ "chat": display2, "username": uname2, "is_group": is_group, @@ -1916,7 +1958,11 @@ pub async fn q_new_messages( "sender": sender, "content": text, "type": fmt_type(local_type), - })); + }); + if let Some(u) = url { + msg["url"] = serde_json::Value::String(u); + } + result.push(msg); } Ok::<_, anyhow::Error>(result) }).await { @@ -2926,6 +2972,71 @@ mod sns_tests { assert_eq!(escape_like_pattern(""), ""); } + #[test] + fn extract_appmsg_url_unescapes_html_entities() { + let xml = concat!( + "", + "5", + "https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1") + ); + } + + #[test] + fn extract_appmsg_url_strips_group_prefix_and_cdata() { + let xml = concat!( + "wxid_sender:\n", + "", + "5", + "", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/x?a=1&b=2") + ); + } + + #[test] + fn extract_appmsg_url_falls_back_to_url1() { + let xml = concat!( + "", + "5", + "https://example.com/fallback", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/fallback") + ); + } + + #[test] + fn extract_appmsg_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://bizmsgmenu?msgmenucontent=foo", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_appmsg_url_ignores_refermsg() { + let xml = concat!( + "", + "57", + "https://example.com/nested", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") }