mirror of https://github.com/jackwener/wx-cli.git
feat(biz): add wx biz-articles command to query public account messages (#33)
* feat(biz): add biz-articles command to query public account messages
加载 biz_message_0.db 提取公众号推送(标题/url/作者/时间)。
- daemon 端通过 DbCache 按需解密 biz_message_0.db(密钥已在 all_keys.json 中)
- 新增 IPC 变体 BizArticles(limit/account/since/until 参数)
- 新增 query 处理器 q_biz_articles:
- 通过 Name2Id 反查 gh_* username → md5 → Msg_<hash> 表映射
- 过滤 local_type & 0xFFFFFFFF = 49(appmsg 公众号文章)
- zstd 解压 + extract_cdata 解析 <mmreader>/<item> XML
- 支持多文章推送(一条消息含多篇文章)
- 输出字段:time/timestamp/recv_time/account/account_username/title/url/digest/cover_url
- 新增 CLI 子命令 wx biz-articles,参数:-n / --account / --since / --until / --json
- 新增工具函数 extract_cdata(CDATA 块解析)和 parse_biz_xml_items
- 新增 8 个单测(biz_tests 模块)覆盖 CDATA 解析和多文章场景
支持工作流:
wx biz-articles --since today --json | jq ".[].url" | xargs opencli weixin download
Verified: 返朴 ADHD 文章、Datawhale Claude Code 文章、土猛员外知识引擎文章均已正确提取。
* feat(biz-articles): add --unread filter (one latest article per account)
只列「有未读的公众号」的最近 1 篇文章 — 与 'wx unread --filter official'
行为一致,便于扫描"哪些公众号还有未读,标题是啥"。
- ipc.rs: BizArticles 加 unread: bool 字段(serde default = false 向后兼容)
- cli/mod.rs: --unread flag
- cli/biz_articles.rs: 透传 unread
- daemon/server.rs: dispatch 加 unread 参数
- daemon/query.rs: q_biz_articles
- 开启 --unread 时先查 session.db 拿 unread_count>0 且
chat_type==official_account 的 username 集合
- 与 --account 取交集(两者都给时进一步缩小范围)
- 空交集提前 return,避免无意义全表扫
- 解析后按 pub_time DESC 排,每个 account_username 只保留首条
- 最后再 truncate(limit)
* docs: PR draft - update --unread + --until usage
* chore(biz-articles): drop PR draft, document command, fix typo
- 删除 PR_DRAFT.md(误入 repo 的 PR 描述草稿,不该进 main)
- README.md / SKILL.md 补 biz-articles 用法
- query.rs: 密鑰 → 密钥
Co-authored-by: wx-cli-coder <coder@example.com>
---------
Co-authored-by: jackwener <jakevingoo@gmail.com>
Co-authored-by: wx-cli-coder <coder@example.com>
fix/query-correctness
parent
c284b4ade6
commit
dab3217d3f
15
README.md
15
README.md
|
|
@ -196,6 +196,21 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01
|
||||||
|
|
||||||
朋友圈数据只覆盖你本地刷到过的帖子(微信 app 按需下载)。
|
朋友圈数据只覆盖你本地刷到过的帖子(微信 app 按需下载)。
|
||||||
|
|
||||||
|
### 公众号文章
|
||||||
|
|
||||||
|
公众号文章推送存在独立的 `biz_message_0.db`,用 `biz-articles` 单独查:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wx biz-articles # 最近 50 篇
|
||||||
|
wx biz-articles -n 200 # 更多
|
||||||
|
wx biz-articles --account "返朴" # 限定公众号(名称模糊匹配)
|
||||||
|
wx biz-articles --since 2026-05-01 --until 2026-05-10
|
||||||
|
wx biz-articles --unread # 仅有未读的公众号,每号取最新 1 篇
|
||||||
|
wx biz-articles --json | jq '.[].url' # 下游消费 URL
|
||||||
|
```
|
||||||
|
|
||||||
|
每条返回:`account` / `account_username` / `title` / `url` / `digest` / `cover_url` / `time` / `timestamp` / `recv_time_str`。多图文推送会展开成多行。
|
||||||
|
|
||||||
### 联系人 & 群组
|
### 联系人 & 群组
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
27
SKILL.md
27
SKILL.md
|
|
@ -215,6 +215,33 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01 -n 50
|
||||||
|
|
||||||
> 只保存你本地刷到过的朋友圈(微信 app 按需下载)。没刷到过的帖子不在本地,任何命令都拿不到。
|
> 只保存你本地刷到过的朋友圈(微信 app 按需下载)。没刷到过的帖子不在本地,任何命令都拿不到。
|
||||||
|
|
||||||
|
### 公众号文章
|
||||||
|
|
||||||
|
公众号的文章推送存在独立的 `biz_message_0.db`,与普通 `message_0.db` 分开:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 最近 50 篇(默认)
|
||||||
|
wx biz-articles
|
||||||
|
|
||||||
|
# 更多
|
||||||
|
wx biz-articles -n 200
|
||||||
|
|
||||||
|
# 限定公众号(名称模糊匹配 display name / username)
|
||||||
|
wx biz-articles --account "返朴"
|
||||||
|
|
||||||
|
# 时间范围(YYYY-MM-DD,发布时间,非接收时间)
|
||||||
|
wx biz-articles --since 2026-05-01 --until 2026-05-10
|
||||||
|
|
||||||
|
# 仅有未读消息的公众号,每号取最新 1 篇(适合"今天有什么新推送"扫描)
|
||||||
|
wx biz-articles --unread
|
||||||
|
wx biz-articles --unread --account "Datawhale" # 与 --account 取交集
|
||||||
|
|
||||||
|
# 下游消费:拿 URL 做内容抓取
|
||||||
|
wx biz-articles --since 2026-05-10 --json | jq '.[].url'
|
||||||
|
```
|
||||||
|
|
||||||
|
每条返回的字段:`account` / `account_username`(`gh_*`)/ `title` / `url`(`mp.weixin.qq.com` 链接)/ `digest` / `cover_url` / `time` + `timestamp`(文章发布时间)/ `recv_time_str` + `recv_time`(微信接收推送的时间)。多图文推送会展开为多行。
|
||||||
|
|
||||||
### 收藏与统计
|
### 收藏与统计
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use crate::ipc::Request;
|
||||||
|
use super::history::{parse_time, parse_time_end};
|
||||||
|
use super::transport;
|
||||||
|
use super::output::{resolve, print_value};
|
||||||
|
|
||||||
|
pub fn cmd_biz_articles(
|
||||||
|
limit: usize,
|
||||||
|
account: Option<String>,
|
||||||
|
since: Option<String>,
|
||||||
|
until: Option<String>,
|
||||||
|
unread: bool,
|
||||||
|
json: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let since_ts = since.as_deref().map(parse_time).transpose()?;
|
||||||
|
let until_ts = until.as_deref().map(parse_time_end).transpose()?;
|
||||||
|
|
||||||
|
let req = Request::BizArticles {
|
||||||
|
limit,
|
||||||
|
account,
|
||||||
|
since: since_ts,
|
||||||
|
until: until_ts,
|
||||||
|
unread,
|
||||||
|
};
|
||||||
|
let resp = transport::send(req)?;
|
||||||
|
let data = resp.data.get("articles")
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or(serde_json::Value::Array(vec![]));
|
||||||
|
print_value(&data, &resolve(json))
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
mod init;
|
mod init;
|
||||||
|
pub mod biz_articles;
|
||||||
pub mod sessions;
|
pub mod sessions;
|
||||||
pub mod history;
|
pub mod history;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
|
|
@ -220,6 +221,27 @@ enum Commands {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
json: bool,
|
json: bool,
|
||||||
},
|
},
|
||||||
|
/// 查询公众号文章推送(本地缓存)
|
||||||
|
BizArticles {
|
||||||
|
/// 显示数量
|
||||||
|
#[arg(short = 'n', long, default_value = "50")]
|
||||||
|
limit: usize,
|
||||||
|
/// 限定公众号(名称模糊匹配)
|
||||||
|
#[arg(long)]
|
||||||
|
account: Option<String>,
|
||||||
|
/// 起始时间 YYYY-MM-DD
|
||||||
|
#[arg(long)]
|
||||||
|
since: Option<String>,
|
||||||
|
/// 结束时间 YYYY-MM-DD
|
||||||
|
#[arg(long)]
|
||||||
|
until: Option<String>,
|
||||||
|
/// 只看有未读的公众号,每个公众号取最新 1 篇
|
||||||
|
#[arg(long)]
|
||||||
|
unread: bool,
|
||||||
|
/// 输出 JSON(默认 YAML)
|
||||||
|
#[arg(long)]
|
||||||
|
json: bool,
|
||||||
|
},
|
||||||
/// 朋友圈全文搜索:匹配正文关键词
|
/// 朋友圈全文搜索:匹配正文关键词
|
||||||
SnsSearch {
|
SnsSearch {
|
||||||
/// 关键词
|
/// 关键词
|
||||||
|
|
@ -304,6 +326,9 @@ fn dispatch(cli: Cli) -> Result<()> {
|
||||||
Commands::SnsSearch { keyword, limit, since, until, user, json } => {
|
Commands::SnsSearch { keyword, limit, since, until, user, json } => {
|
||||||
sns_search::cmd_sns_search(keyword, limit, since, until, user, json)
|
sns_search::cmd_sns_search(keyword, limit, since, until, user, json)
|
||||||
}
|
}
|
||||||
|
Commands::BizArticles { limit, account, since, until, unread, json } => {
|
||||||
|
biz_articles::cmd_biz_articles(limit, account, since, until, unread, json)
|
||||||
|
}
|
||||||
Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd),
|
Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2945,6 +2945,397 @@ pub async fn q_sns_search(
|
||||||
Ok(json!({ "keyword": keyword, "posts": posts, "total": total }))
|
Ok(json!({ "keyword": keyword, "posts": posts, "total": total }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── 公众号文章查询 ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// 一条公众号文章的解析产物
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct BizArticle {
|
||||||
|
/// 接收该推送的时间戳(即消息的 create_time)
|
||||||
|
recv_time: i64,
|
||||||
|
/// 公众号 username
|
||||||
|
account_username: String,
|
||||||
|
/// 文章标题
|
||||||
|
title: String,
|
||||||
|
/// 文章链接
|
||||||
|
url: String,
|
||||||
|
/// 摘要
|
||||||
|
digest: String,
|
||||||
|
/// 封面图
|
||||||
|
cover: String,
|
||||||
|
/// 文章发布时间(pub_time,单位秒)
|
||||||
|
pub_time: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 从 biz_message 表的单条 XML 解析出全部 article items
|
||||||
|
fn parse_biz_xml_items(recv_time: i64, account_username: &str, xml: &str) -> Vec<BizArticle> {
|
||||||
|
let mut items = Vec::new();
|
||||||
|
let mut search_from = 0;
|
||||||
|
loop {
|
||||||
|
let Some(item_start) = xml[search_from..].find("<item>") else { break; };
|
||||||
|
let abs_start = search_from + item_start;
|
||||||
|
let Some(item_end) = xml[abs_start..].find("</item>") else { break; };
|
||||||
|
let abs_end = abs_start + item_end + 7;
|
||||||
|
let item_xml = &xml[abs_start..abs_end];
|
||||||
|
|
||||||
|
let title = extract_cdata(item_xml, "title").unwrap_or_default();
|
||||||
|
let url = extract_cdata(item_xml, "url").unwrap_or_default();
|
||||||
|
// Skip items with no URL or empty title (e.g. payment entries)
|
||||||
|
if url.is_empty() || title.is_empty() {
|
||||||
|
search_from = abs_end;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let digest = extract_cdata(item_xml, "digest").unwrap_or_default();
|
||||||
|
let cover = extract_cdata(item_xml, "cover").unwrap_or_default();
|
||||||
|
let pub_time = extract_xml_text(item_xml, "pub_time")
|
||||||
|
.and_then(|s| s.parse::<i64>().ok())
|
||||||
|
.unwrap_or(recv_time);
|
||||||
|
|
||||||
|
items.push(BizArticle {
|
||||||
|
recv_time,
|
||||||
|
account_username: account_username.to_string(),
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
digest,
|
||||||
|
cover,
|
||||||
|
pub_time,
|
||||||
|
});
|
||||||
|
search_from = abs_end;
|
||||||
|
}
|
||||||
|
items
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 提取 CDATA 或普通文本内容: `<tag><![CDATA[...]]></tag>` 或 `<tag>...</tag>`
|
||||||
|
///
|
||||||
|
/// 注意: 内容匹配到 `</tag>` 之前的内容。CDATA 块中的 "]]"已在 "]]\x3e" 之前,
|
||||||
|
/// 所以 inner 为 `<![CDATA[content]]>` 或 `<![CDATA[content]]` (如果 ">" 被 close tag 吸掉)
|
||||||
|
fn extract_cdata(xml: &str, tag: &str) -> Option<String> {
|
||||||
|
let open = format!("<{}>", tag);
|
||||||
|
let close = format!("</{}>", tag);
|
||||||
|
let start = xml.find(&open)? + open.len();
|
||||||
|
let end = xml[start..].find(&close)?;
|
||||||
|
let inner = xml[start..start + end].trim();
|
||||||
|
if inner.starts_with("<![CDATA[") {
|
||||||
|
// inner = `<![CDATA[content]]>` → strip 9-char `<![CDATA[` prefix + 3-char `]]>` suffix
|
||||||
|
let body = &inner[9..];
|
||||||
|
// Strip `]]>` (normal) or `]]` (edge case)
|
||||||
|
let cdata_end = b"]]>";
|
||||||
|
let cdata_end2 = b"]]";
|
||||||
|
let content: &str = if body.as_bytes().ends_with(cdata_end) {
|
||||||
|
&body[..body.len() - 3]
|
||||||
|
} else if body.as_bytes().ends_with(cdata_end2) {
|
||||||
|
&body[..body.len() - 2]
|
||||||
|
} else {
|
||||||
|
body
|
||||||
|
};
|
||||||
|
let content = content.trim();
|
||||||
|
if content.is_empty() { None } else { Some(content.to_string()) }
|
||||||
|
} else if inner.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(unescape_html(inner))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 查询公众号文章推送(biz_message_0.db)
|
||||||
|
///
|
||||||
|
/// 每条消息可能包含多篇文章(多图文推送)。返回所有文章展开就的平底列表。
|
||||||
|
pub async fn q_biz_articles(
|
||||||
|
db: &DbCache,
|
||||||
|
names: &Names,
|
||||||
|
limit: usize,
|
||||||
|
account: Option<String>,
|
||||||
|
since: Option<i64>,
|
||||||
|
until: Option<i64>,
|
||||||
|
unread: bool,
|
||||||
|
) -> Result<Value> {
|
||||||
|
let biz_path = db.get("message/biz_message_0.db").await?
|
||||||
|
.context("无法解密 biz_message_0.db,请确认 all_keys.json 包含对应密钥")?
|
||||||
|
;
|
||||||
|
|
||||||
|
// 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集,
|
||||||
|
// 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。
|
||||||
|
let unread_usernames: Option<std::collections::HashSet<String>> = if unread {
|
||||||
|
let session_path = db.get("session/session.db").await?
|
||||||
|
.context("无法解密 session.db")?;
|
||||||
|
let session_path2 = session_path.clone();
|
||||||
|
let unread_rows: Vec<String> = tokio::task::spawn_blocking(move || {
|
||||||
|
let conn = Connection::open(&session_path2)?;
|
||||||
|
let mut stmt = conn.prepare(
|
||||||
|
"SELECT username FROM SessionTable WHERE unread_count > 0"
|
||||||
|
)?;
|
||||||
|
let rows: Vec<String> = stmt.query_map([], |row| row.get::<_, String>(0))?
|
||||||
|
.filter_map(|r| r.ok())
|
||||||
|
.collect();
|
||||||
|
Ok::<_, anyhow::Error>(rows)
|
||||||
|
}).await??;
|
||||||
|
// 仅保留公众号类型的未读会话
|
||||||
|
let set: std::collections::HashSet<String> = unread_rows.into_iter()
|
||||||
|
.filter(|u| chat_type_of(u, names) == "official_account")
|
||||||
|
.collect();
|
||||||
|
if set.is_empty() {
|
||||||
|
// 没有未读公众号 → 直接空返回,避免打 biz 表扫描
|
||||||
|
return Ok(json!({ "count": 0, "articles": [] }));
|
||||||
|
}
|
||||||
|
Some(set)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// 1. 从 Name2Id 表获取 rowid -> username 映射,再推导 md5 -> username
|
||||||
|
let biz_path2 = biz_path.clone();
|
||||||
|
let id2username: HashMap<i64, String> = tokio::task::spawn_blocking(move || {
|
||||||
|
let conn = Connection::open(&biz_path2)?;
|
||||||
|
let mut stmt = conn.prepare("SELECT rowid, user_name FROM Name2Id WHERE user_name LIKE 'gh_%'")?
|
||||||
|
;
|
||||||
|
let rows = stmt.query_map([], |row| {
|
||||||
|
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
|
||||||
|
})?
|
||||||
|
.collect::<rusqlite::Result<Vec<_>>>()?;
|
||||||
|
Ok::<_, anyhow::Error>(rows.into_iter().collect())
|
||||||
|
}).await??;
|
||||||
|
|
||||||
|
// 构建 md5(username) -> username 映射
|
||||||
|
let md5_to_uname: HashMap<String, String> = id2username.values()
|
||||||
|
.map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// 2. 如果 指定了 --account,找到匹配的 username 列表
|
||||||
|
let account_low = account.as_deref().map(|s| s.to_lowercase());
|
||||||
|
let mut target_usernames: Option<Vec<String>> = account_low.as_ref().map(|low| {
|
||||||
|
id2username.values()
|
||||||
|
.filter(|u| {
|
||||||
|
let display = names.display(u);
|
||||||
|
display.to_lowercase().contains(low.as_str())
|
||||||
|
|| u.to_lowercase().contains(low.as_str())
|
||||||
|
})
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
});
|
||||||
|
|
||||||
|
// --unread 与 --account 取交集(进一步缩小范围)
|
||||||
|
if let Some(ref unread_set) = unread_usernames {
|
||||||
|
target_usernames = Some(match target_usernames.take() {
|
||||||
|
Some(acc_list) => acc_list.into_iter()
|
||||||
|
.filter(|u| unread_set.contains(u))
|
||||||
|
.collect(),
|
||||||
|
None => unread_set.iter().cloned().collect(),
|
||||||
|
});
|
||||||
|
// 交集为空 → 提前返回
|
||||||
|
if target_usernames.as_ref().map(|v| v.is_empty()).unwrap_or(false) {
|
||||||
|
return Ok(json!({ "count": 0, "articles": [] }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. 进行数据库查询
|
||||||
|
let biz_path3 = biz_path.clone();
|
||||||
|
let since2 = since;
|
||||||
|
let until2 = until;
|
||||||
|
let target_hashes: Option<Vec<String>> = target_usernames.as_ref().map(|unames| {
|
||||||
|
unames.iter()
|
||||||
|
.map(|u| format!("{:x}", md5::compute(u.as_bytes())))
|
||||||
|
.collect()
|
||||||
|
});
|
||||||
|
|
||||||
|
let rows: Vec<(String, i64, i64, Vec<u8>, i64)> = tokio::task::spawn_blocking(move || {
|
||||||
|
let conn = Connection::open(&biz_path3)?;
|
||||||
|
|
||||||
|
// 列出所有 Msg_<hash> 表
|
||||||
|
let mut stmt = conn.prepare(
|
||||||
|
"SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'"
|
||||||
|
)?;
|
||||||
|
let table_names: Vec<String> = stmt.query_map([], |row| row.get(0))?
|
||||||
|
.filter_map(|r| r.ok())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let re = regex::Regex::new(r"^Msg_[0-9a-f]{32}$").unwrap();
|
||||||
|
let mut all_rows: Vec<(String, i64, i64, Vec<u8>, i64)> = Vec::new();
|
||||||
|
|
||||||
|
for tname in &table_names {
|
||||||
|
if !re.is_match(tname) { continue; }
|
||||||
|
let hash = &tname[4..];
|
||||||
|
|
||||||
|
// account 过滤
|
||||||
|
if let Some(ref hashes) = target_hashes {
|
||||||
|
if !hashes.iter().any(|h| h == hash) { continue; }
|
||||||
|
}
|
||||||
|
|
||||||
|
let username = md5_to_uname.get(hash).cloned().unwrap_or_default();
|
||||||
|
|
||||||
|
// 构建过滤条件
|
||||||
|
let mut clauses: Vec<String> = Vec::new();
|
||||||
|
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
||||||
|
// local_type & 0xFFFFFFFF = 49 是 appmsg(公众号文章)
|
||||||
|
clauses.push("(local_type & 4294967295) = 49".to_string());
|
||||||
|
if let Some(s) = since2 {
|
||||||
|
clauses.push("create_time >= ?".to_string());
|
||||||
|
params.push(Box::new(s));
|
||||||
|
}
|
||||||
|
if let Some(u) = until2 {
|
||||||
|
clauses.push("create_time <= ?".to_string());
|
||||||
|
params.push(Box::new(u));
|
||||||
|
}
|
||||||
|
let where_clause = format!("WHERE {}", clauses.join(" AND "));
|
||||||
|
|
||||||
|
let sql = format!(
|
||||||
|
"SELECT create_time, WCDB_CT_message_content, message_content \
|
||||||
|
FROM [{}] {} ORDER BY create_time DESC",
|
||||||
|
tname, where_clause
|
||||||
|
);
|
||||||
|
|
||||||
|
let params_ref: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
|
||||||
|
if let Ok(mut inner_stmt) = conn.prepare(&sql) {
|
||||||
|
let msg_rows: Vec<_> = inner_stmt
|
||||||
|
.query_map(params_ref.as_slice(), |row| {
|
||||||
|
Ok((
|
||||||
|
username.clone(),
|
||||||
|
row.get::<_, i64>(0)?,
|
||||||
|
row.get::<_, i64>(1).unwrap_or(0),
|
||||||
|
get_content_bytes(row, 2),
|
||||||
|
0i64,
|
||||||
|
))
|
||||||
|
})
|
||||||
|
.map(|it| it.filter_map(|r| r.ok()).collect())
|
||||||
|
.unwrap_or_default();
|
||||||
|
all_rows.extend(msg_rows);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok::<_, anyhow::Error>(all_rows)
|
||||||
|
}).await??;
|
||||||
|
|
||||||
|
// 4. 解压并解析 XML
|
||||||
|
let mut articles: Vec<BizArticle> = Vec::new();
|
||||||
|
for (username, recv_time, ct, content_bytes, _) in rows {
|
||||||
|
let content = decompress_message(&content_bytes, ct);
|
||||||
|
if content.is_empty() { continue; }
|
||||||
|
let items = parse_biz_xml_items(recv_time, &username, &content);
|
||||||
|
articles.extend(items);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. 按 pub_time DESC 排序
|
||||||
|
articles.sort_by_key(|a| std::cmp::Reverse(a.pub_time));
|
||||||
|
|
||||||
|
// --unread 语义 A:每个公众号只保留最新 1 篇(已按 pub_time 排序,取首条即可)
|
||||||
|
if unread {
|
||||||
|
let mut seen = std::collections::HashSet::<String>::new();
|
||||||
|
articles.retain(|a| seen.insert(a.account_username.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
articles.truncate(limit);
|
||||||
|
|
||||||
|
let results: Vec<Value> = articles.into_iter().map(|a| {
|
||||||
|
let account_display = names.display(&a.account_username);
|
||||||
|
json!({
|
||||||
|
"time": fmt_time(a.pub_time, "%Y-%m-%d %H:%M"),
|
||||||
|
"timestamp": a.pub_time,
|
||||||
|
"recv_time": a.recv_time,
|
||||||
|
"recv_time_str": fmt_time(a.recv_time, "%Y-%m-%d %H:%M"),
|
||||||
|
"account": account_display,
|
||||||
|
"account_username": a.account_username,
|
||||||
|
"title": a.title,
|
||||||
|
"url": a.url,
|
||||||
|
"digest": a.digest,
|
||||||
|
"cover_url": a.cover,
|
||||||
|
})
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
Ok(json!({ "count": results.len(), "articles": results }))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod biz_tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_cdata_normal() {
|
||||||
|
let xml = "<title><![CDATA[TencentResearch]]></title>";
|
||||||
|
assert_eq!(extract_cdata(xml, "title"), Some("TencentResearch".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_cdata_empty() {
|
||||||
|
let xml = "<cover><![CDATA[]]></cover>";
|
||||||
|
assert_eq!(extract_cdata(xml, "cover"), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_cdata_url() {
|
||||||
|
let xml = "<url><![CDATA[http://mp.weixin.qq.com/s?__biz=abc&mid=123]]></url>";
|
||||||
|
let result = extract_cdata(xml, "url");
|
||||||
|
assert!(result.is_some());
|
||||||
|
let url = result.unwrap();
|
||||||
|
assert!(url.starts_with("http://mp.weixin.qq.com"));
|
||||||
|
assert!(!url.contains("CDATA"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_cdata_no_cdata_wrapper() {
|
||||||
|
let xml = "<pub_time>1700000000</pub_time>";
|
||||||
|
assert_eq!(extract_cdata(xml, "pub_time"), Some("1700000000".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_biz_xml_items_single_article() {
|
||||||
|
let xml = r#"<msg><appmsg><mmreader><category><item>
|
||||||
|
<title><![CDATA[Test Article Title]]></title>
|
||||||
|
<url><![CDATA[http://mp.weixin.qq.com/s?test=1]]></url>
|
||||||
|
<digest><![CDATA[Test Digest]]></digest>
|
||||||
|
<cover><![CDATA[https://example.com/cover.jpg]]></cover>
|
||||||
|
<pub_time>1700000000</pub_time>
|
||||||
|
</item></category></mmreader></appmsg></msg>"#;
|
||||||
|
|
||||||
|
let items = parse_biz_xml_items(1699999999, "gh_test123", xml);
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(items[0].title, "Test Article Title");
|
||||||
|
assert_eq!(items[0].url, "http://mp.weixin.qq.com/s?test=1");
|
||||||
|
assert_eq!(items[0].digest, "Test Digest");
|
||||||
|
assert_eq!(items[0].pub_time, 1700000000);
|
||||||
|
assert_eq!(items[0].account_username, "gh_test123");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_biz_xml_items_skips_no_url() {
|
||||||
|
let xml = r#"<msg><mmreader><category><item>
|
||||||
|
<title><![CDATA[Has Title No URL]]></title>
|
||||||
|
<url><![CDATA[]]></url>
|
||||||
|
<pub_time>1700000001</pub_time>
|
||||||
|
</item></category></mmreader></msg>"#;
|
||||||
|
let items = parse_biz_xml_items(1700000001, "gh_test", xml);
|
||||||
|
assert_eq!(items.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_biz_xml_items_multi_article() {
|
||||||
|
let xml = r#"<msg><mmreader><category>
|
||||||
|
<item>
|
||||||
|
<title><![CDATA[Article 1]]></title>
|
||||||
|
<url><![CDATA[http://mp.weixin.qq.com/s?a=1]]></url>
|
||||||
|
<pub_time>1700000010</pub_time>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title><![CDATA[Article 2]]></title>
|
||||||
|
<url><![CDATA[http://mp.weixin.qq.com/s?a=2]]></url>
|
||||||
|
<pub_time>1700000020</pub_time>
|
||||||
|
</item>
|
||||||
|
</category></mmreader></msg>"#;
|
||||||
|
let items = parse_biz_xml_items(1700000000, "gh_multi", xml);
|
||||||
|
assert_eq!(items.len(), 2);
|
||||||
|
assert_eq!(items[0].title, "Article 1");
|
||||||
|
assert_eq!(items[1].title, "Article 2");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_biz_xml_items_pub_time_fallback() {
|
||||||
|
// When pub_time is missing, should fall back to recv_time
|
||||||
|
let xml = r#"<item>
|
||||||
|
<title><![CDATA[No PubTime]]></title>
|
||||||
|
<url><![CDATA[http://mp.weixin.qq.com/s?x=1]]></url>
|
||||||
|
</item>"#;
|
||||||
|
let items = parse_biz_xml_items(1700000099, "gh_fallback", xml);
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(items[0].pub_time, 1700000099); // falls back to recv_time
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod group_nickname_tests {
|
mod group_nickname_tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
|
||||||
|
|
@ -234,5 +234,11 @@ async fn dispatch(
|
||||||
ReloadConfig => {
|
ReloadConfig => {
|
||||||
Response::ok(serde_json::json!({ "reloading": true }))
|
Response::ok(serde_json::json!({ "reloading": true }))
|
||||||
}
|
}
|
||||||
|
BizArticles { limit, account, since, until, unread } => {
|
||||||
|
match query::q_biz_articles(db, &names_arc, limit, account, since, until, unread).await {
|
||||||
|
Ok(v) => Response::ok(v),
|
||||||
|
Err(e) => Response::err(e.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
15
src/ipc.rs
15
src/ipc.rs
|
|
@ -102,6 +102,21 @@ pub enum Request {
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
user: Option<String>,
|
user: Option<String>,
|
||||||
},
|
},
|
||||||
|
/// 查询公众号文章推送(biz_message_0.db)
|
||||||
|
BizArticles {
|
||||||
|
#[serde(default = "default_limit_50")]
|
||||||
|
limit: usize,
|
||||||
|
/// 公众号名称过滤(模糊匹配 display name,None = 全部)
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
account: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
since: Option<i64>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
until: Option<i64>,
|
||||||
|
/// 只看有未读消息的公众号,每个公众号取最新 1 篇
|
||||||
|
#[serde(default)]
|
||||||
|
unread: bool,
|
||||||
|
},
|
||||||
/// 朋友圈全文搜索(匹配 contentDesc)
|
/// 朋友圈全文搜索(匹配 contentDesc)
|
||||||
SnsSearch {
|
SnsSearch {
|
||||||
keyword: String,
|
keyword: String,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue