feat: expose url field for link/appmsg messages (#18)

* feat: expose url field for link/appmsg messages

Extract <url> from appmsg XML in type-49 messages and append it as
a 'url' field in history/search output. The field is omitted when
the message has no valid URL (non-link types, empty, non-http).

* fix: normalize appmsg urls across query outputs

---------

Co-authored-by: tsinghu <tsinghu@tencent.com>
Co-authored-by: jackwener <jakevingoo@gmail.com>
pull/24/head
Tsing 2026-05-14 14:46:34 +08:00 committed by GitHub
parent b0431352ce
commit 1b00d04598
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 117 additions and 6 deletions

View File

@ -549,15 +549,20 @@ fn query_messages(
let content = decompress_message(&content_bytes, ct);
let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames);
let text = fmt_content(local_id, local_type, &content, is_group);
let url = appmsg_url_for_message(local_type, &content);
result.push(json!({
let mut msg = json!({
"timestamp": ts,
"time": fmt_time(ts, "%Y-%m-%d %H:%M"),
"sender": sender,
"content": text,
"type": fmt_type(local_type),
"local_id": local_id,
}));
});
if let Some(u) = url {
msg["url"] = serde_json::Value::String(u);
}
result.push(msg);
}
Ok(result)
}
@ -636,15 +641,20 @@ fn search_in_table(
if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) {
continue;
}
let url = appmsg_url_for_message(local_type, &content);
result.push(json!({
let mut msg = json!({
"timestamp": ts,
"time": fmt_time(ts, "%Y-%m-%d %H:%M"),
"chat": "",
"sender": sender,
"content": text,
"type": fmt_type(local_type),
}));
});
if let Some(u) = url {
msg["url"] = serde_json::Value::String(u);
}
result.push(msg);
if search_decoded_content && result.len() >= limit {
break;
}
@ -1273,6 +1283,37 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option<String> {
Some(xml[content_start..content_start + end].trim().to_string())
}
fn appmsg_url_for_message(local_type: i64, content: &str) -> Option<String> {
if (local_type as u64 & 0xFFFFFFFF) != 49 {
return None;
}
extract_appmsg_url(content)
}
fn strip_xml_cdata(s: &str) -> &str {
s.strip_prefix("<![CDATA[")
.and_then(|inner| inner.strip_suffix("]]>"))
.unwrap_or(s)
}
/// 从 appmsg XML 中提取链接 URL优先取 <url>fallback 到 <url1>
fn extract_appmsg_url(text: &str) -> Option<String> {
let xml = strip_group_prefix(text);
if !xml.contains("<appmsg") {
return None;
}
if extract_xml_text(&xml, "type").as_deref() == Some("57") {
return None;
}
let url = extract_xml_text(&xml, "url")
.or_else(|| extract_xml_text(&xml, "url1"))
.map(|s| unescape_html(strip_xml_cdata(&s)))?;
if url.is_empty() || !(url.starts_with("http://") || url.starts_with("https://")) {
return None;
}
Some(url)
}
fn extract_xml_attr(xml: &str, tag: &str, attr: &str) -> Option<String> {
let open = format!("<{}", tag);
let start = xml.find(&open)?;
@ -1906,7 +1947,8 @@ pub async fn q_new_messages(
let content = decompress_message(&content_bytes, ct);
let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2);
let text = fmt_content(local_id, local_type, &content, is_group);
result.push(json!({
let url = appmsg_url_for_message(local_type, &content);
let mut msg = json!({
"chat": display2,
"username": uname2,
"is_group": is_group,
@ -1916,7 +1958,11 @@ pub async fn q_new_messages(
"sender": sender,
"content": text,
"type": fmt_type(local_type),
}));
});
if let Some(u) = url {
msg["url"] = serde_json::Value::String(u);
}
result.push(msg);
}
Ok::<_, anyhow::Error>(result)
}).await {
@ -2926,6 +2972,71 @@ mod sns_tests {
assert_eq!(escape_like_pattern(""), "");
}
#[test]
fn extract_appmsg_url_unescapes_html_entities() {
let xml = concat!(
"<appmsg>",
"<type>5</type>",
"<url>https://mp.weixin.qq.com/s?__biz=MzI4&amp;mid=2247&amp;idx=1</url>",
"</appmsg>"
);
assert_eq!(
extract_appmsg_url(xml).as_deref(),
Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1")
);
}
#[test]
fn extract_appmsg_url_strips_group_prefix_and_cdata() {
let xml = concat!(
"wxid_sender:\n",
"<appmsg>",
"<type>5</type>",
"<url><![CDATA[https://example.com/x?a=1&b=2]]></url>",
"</appmsg>"
);
assert_eq!(
extract_appmsg_url(xml).as_deref(),
Some("https://example.com/x?a=1&b=2")
);
}
#[test]
fn extract_appmsg_url_falls_back_to_url1() {
let xml = concat!(
"<appmsg>",
"<type>5</type>",
"<url1>https://example.com/fallback</url1>",
"</appmsg>"
);
assert_eq!(
extract_appmsg_url(xml).as_deref(),
Some("https://example.com/fallback")
);
}
#[test]
fn extract_appmsg_url_ignores_non_http_values() {
let xml = concat!(
"<appmsg>",
"<type>5</type>",
"<url>weixin://bizmsgmenu?msgmenucontent=foo</url>",
"</appmsg>"
);
assert_eq!(extract_appmsg_url(xml), None);
}
#[test]
fn extract_appmsg_url_ignores_refermsg() {
let xml = concat!(
"<appmsg>",
"<type>57</type>",
"<url>https://example.com/nested</url>",
"</appmsg>"
);
assert_eq!(extract_appmsg_url(xml), None);
}
fn media_object(value: &Value) -> &serde_json::Map<String, Value> {
value.as_object().expect("media entry should be an object")
}