mirror of https://github.com/jackwener/wx-cli.git
feat: expose url field for link/appmsg messages (#18)
* feat: expose url field for link/appmsg messages Extract <url> from appmsg XML in type-49 messages and append it as a 'url' field in history/search output. The field is omitted when the message has no valid URL (non-link types, empty, non-http). * fix: normalize appmsg urls across query outputs --------- Co-authored-by: tsinghu <tsinghu@tencent.com> Co-authored-by: jackwener <jakevingoo@gmail.com>pull/24/head
parent
b0431352ce
commit
1b00d04598
|
|
@ -549,15 +549,20 @@ fn query_messages(
|
||||||
let content = decompress_message(&content_bytes, ct);
|
let content = decompress_message(&content_bytes, ct);
|
||||||
let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames);
|
let sender = sender_label(real_sender_id, &content, is_group, chat_username, &id2u, names_map, group_nicknames);
|
||||||
let text = fmt_content(local_id, local_type, &content, is_group);
|
let text = fmt_content(local_id, local_type, &content, is_group);
|
||||||
|
let url = appmsg_url_for_message(local_type, &content);
|
||||||
|
|
||||||
result.push(json!({
|
let mut msg = json!({
|
||||||
"timestamp": ts,
|
"timestamp": ts,
|
||||||
"time": fmt_time(ts, "%Y-%m-%d %H:%M"),
|
"time": fmt_time(ts, "%Y-%m-%d %H:%M"),
|
||||||
"sender": sender,
|
"sender": sender,
|
||||||
"content": text,
|
"content": text,
|
||||||
"type": fmt_type(local_type),
|
"type": fmt_type(local_type),
|
||||||
"local_id": local_id,
|
"local_id": local_id,
|
||||||
}));
|
});
|
||||||
|
if let Some(u) = url {
|
||||||
|
msg["url"] = serde_json::Value::String(u);
|
||||||
|
}
|
||||||
|
result.push(msg);
|
||||||
}
|
}
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
@ -636,15 +641,20 @@ fn search_in_table(
|
||||||
if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) {
|
if search_decoded_content && !matches_search_text(&content, &text, keyword, &keyword_lower) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
let url = appmsg_url_for_message(local_type, &content);
|
||||||
|
|
||||||
result.push(json!({
|
let mut msg = json!({
|
||||||
"timestamp": ts,
|
"timestamp": ts,
|
||||||
"time": fmt_time(ts, "%Y-%m-%d %H:%M"),
|
"time": fmt_time(ts, "%Y-%m-%d %H:%M"),
|
||||||
"chat": "",
|
"chat": "",
|
||||||
"sender": sender,
|
"sender": sender,
|
||||||
"content": text,
|
"content": text,
|
||||||
"type": fmt_type(local_type),
|
"type": fmt_type(local_type),
|
||||||
}));
|
});
|
||||||
|
if let Some(u) = url {
|
||||||
|
msg["url"] = serde_json::Value::String(u);
|
||||||
|
}
|
||||||
|
result.push(msg);
|
||||||
if search_decoded_content && result.len() >= limit {
|
if search_decoded_content && result.len() >= limit {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -1273,6 +1283,37 @@ fn extract_xml_text(xml: &str, tag: &str) -> Option<String> {
|
||||||
Some(xml[content_start..content_start + end].trim().to_string())
|
Some(xml[content_start..content_start + end].trim().to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn appmsg_url_for_message(local_type: i64, content: &str) -> Option<String> {
|
||||||
|
if (local_type as u64 & 0xFFFFFFFF) != 49 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
extract_appmsg_url(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strip_xml_cdata(s: &str) -> &str {
|
||||||
|
s.strip_prefix("<![CDATA[")
|
||||||
|
.and_then(|inner| inner.strip_suffix("]]>"))
|
||||||
|
.unwrap_or(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 从 appmsg XML 中提取链接 URL(优先取 <url>,fallback 到 <url1>)
|
||||||
|
fn extract_appmsg_url(text: &str) -> Option<String> {
|
||||||
|
let xml = strip_group_prefix(text);
|
||||||
|
if !xml.contains("<appmsg") {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if extract_xml_text(&xml, "type").as_deref() == Some("57") {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let url = extract_xml_text(&xml, "url")
|
||||||
|
.or_else(|| extract_xml_text(&xml, "url1"))
|
||||||
|
.map(|s| unescape_html(strip_xml_cdata(&s)))?;
|
||||||
|
if url.is_empty() || !(url.starts_with("http://") || url.starts_with("https://")) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(url)
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_xml_attr(xml: &str, tag: &str, attr: &str) -> Option<String> {
|
fn extract_xml_attr(xml: &str, tag: &str, attr: &str) -> Option<String> {
|
||||||
let open = format!("<{}", tag);
|
let open = format!("<{}", tag);
|
||||||
let start = xml.find(&open)?;
|
let start = xml.find(&open)?;
|
||||||
|
|
@ -1906,7 +1947,8 @@ pub async fn q_new_messages(
|
||||||
let content = decompress_message(&content_bytes, ct);
|
let content = decompress_message(&content_bytes, ct);
|
||||||
let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2);
|
let sender = sender_label(real_sender_id, &content, is_group, &uname2, &id2u, &names_map, &group_nicknames2);
|
||||||
let text = fmt_content(local_id, local_type, &content, is_group);
|
let text = fmt_content(local_id, local_type, &content, is_group);
|
||||||
result.push(json!({
|
let url = appmsg_url_for_message(local_type, &content);
|
||||||
|
let mut msg = json!({
|
||||||
"chat": display2,
|
"chat": display2,
|
||||||
"username": uname2,
|
"username": uname2,
|
||||||
"is_group": is_group,
|
"is_group": is_group,
|
||||||
|
|
@ -1916,7 +1958,11 @@ pub async fn q_new_messages(
|
||||||
"sender": sender,
|
"sender": sender,
|
||||||
"content": text,
|
"content": text,
|
||||||
"type": fmt_type(local_type),
|
"type": fmt_type(local_type),
|
||||||
}));
|
});
|
||||||
|
if let Some(u) = url {
|
||||||
|
msg["url"] = serde_json::Value::String(u);
|
||||||
|
}
|
||||||
|
result.push(msg);
|
||||||
}
|
}
|
||||||
Ok::<_, anyhow::Error>(result)
|
Ok::<_, anyhow::Error>(result)
|
||||||
}).await {
|
}).await {
|
||||||
|
|
@ -2926,6 +2972,71 @@ mod sns_tests {
|
||||||
assert_eq!(escape_like_pattern(""), "");
|
assert_eq!(escape_like_pattern(""), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_appmsg_url_unescapes_html_entities() {
|
||||||
|
let xml = concat!(
|
||||||
|
"<appmsg>",
|
||||||
|
"<type>5</type>",
|
||||||
|
"<url>https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1</url>",
|
||||||
|
"</appmsg>"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
extract_appmsg_url(xml).as_deref(),
|
||||||
|
Some("https://mp.weixin.qq.com/s?__biz=MzI4&mid=2247&idx=1")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_appmsg_url_strips_group_prefix_and_cdata() {
|
||||||
|
let xml = concat!(
|
||||||
|
"wxid_sender:\n",
|
||||||
|
"<appmsg>",
|
||||||
|
"<type>5</type>",
|
||||||
|
"<url><![CDATA[https://example.com/x?a=1&b=2]]></url>",
|
||||||
|
"</appmsg>"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
extract_appmsg_url(xml).as_deref(),
|
||||||
|
Some("https://example.com/x?a=1&b=2")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_appmsg_url_falls_back_to_url1() {
|
||||||
|
let xml = concat!(
|
||||||
|
"<appmsg>",
|
||||||
|
"<type>5</type>",
|
||||||
|
"<url1>https://example.com/fallback</url1>",
|
||||||
|
"</appmsg>"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
extract_appmsg_url(xml).as_deref(),
|
||||||
|
Some("https://example.com/fallback")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_appmsg_url_ignores_non_http_values() {
|
||||||
|
let xml = concat!(
|
||||||
|
"<appmsg>",
|
||||||
|
"<type>5</type>",
|
||||||
|
"<url>weixin://bizmsgmenu?msgmenucontent=foo</url>",
|
||||||
|
"</appmsg>"
|
||||||
|
);
|
||||||
|
assert_eq!(extract_appmsg_url(xml), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_appmsg_url_ignores_refermsg() {
|
||||||
|
let xml = concat!(
|
||||||
|
"<appmsg>",
|
||||||
|
"<type>57</type>",
|
||||||
|
"<url>https://example.com/nested</url>",
|
||||||
|
"</appmsg>"
|
||||||
|
);
|
||||||
|
assert_eq!(extract_appmsg_url(xml), None);
|
||||||
|
}
|
||||||
|
|
||||||
fn media_object(value: &Value) -> &serde_json::Map<String, Value> {
|
fn media_object(value: &Value) -> &serde_json::Map<String, Value> {
|
||||||
value.as_object().expect("media entry should be an object")
|
value.as_object().expect("media entry should be an object")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue