From 88a5bad3bcd2687db315b562d5ef0931f09c57ac Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 21 Apr 2026 22:13:38 +0800 Subject: [PATCH] fix: normalize appmsg urls across query outputs --- src/daemon/query.rs | 104 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 11 deletions(-) diff --git a/src/daemon/query.rs b/src/daemon/query.rs index 393c77e..8aa14c1 100644 --- a/src/daemon/query.rs +++ b/src/daemon/query.rs @@ -1283,20 +1283,32 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option { Some(xml[content_start..content_start + end].trim().to_string()) } +fn appmsg_url_for_message(local_type: i64, content: &str) -> Option { + if (local_type as u64 & 0xFFFFFFFF) != 49 { + return None; + } + extract_appmsg_url(content) +} + +fn strip_xml_cdata(s: &str) -> &str { + s.strip_prefix("")) + .unwrap_or(s) +} + /// 从 appmsg XML 中提取链接 URL(优先取 ,fallback 到 ) fn extract_appmsg_url(text: &str) -> Option { - // 群消息前缀 "wxid_xxx:\n" 需先剥离 - let xml = if text.contains(":\n") { - text.splitn(2, ":\n").nth(1).unwrap_or(text) - } else { - text - }; + let xml = strip_group_prefix(text); if !xml.contains("(result) }).await { @@ -2955,6 +2972,71 @@ mod sns_tests { assert_eq!(escape_like_pattern(""), ""); } + #[test] + fn extract_appmsg_url_unescapes_html_entities() { + let xml = concat!( + "", + "5", + "https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1") + ); + } + + #[test] + fn extract_appmsg_url_strips_group_prefix_and_cdata() { + let xml = concat!( + "wxid_sender:\n", + "", + "5", + "", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/x?a=1&b=2") + ); + } + + #[test] + fn extract_appmsg_url_falls_back_to_url1() { + let xml = concat!( + "", + "5", + "https://example.com/fallback", + "" + ); + assert_eq!( + extract_appmsg_url(xml).as_deref(), + Some("https://example.com/fallback") + ); + } + + #[test] + fn extract_appmsg_url_ignores_non_http_values() { + let xml = concat!( + "", + "5", + "weixin://bizmsgmenu?msgmenucontent=foo", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + + #[test] + fn extract_appmsg_url_ignores_refermsg() { + let xml = concat!( + "", + "57", + "https://example.com/nested", + "" + ); + assert_eq!(extract_appmsg_url(xml), None); + } + fn media_object(value: &Value) -> &serde_json::Map { value.as_object().expect("media entry should be an object") }