mirror of https://github.com/jackwener/wx-cli.git
fix(biz-articles): read all biz_message shards (#81)
parent
94fcc36ffe
commit
08af894594
|
|
@ -217,7 +217,7 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01
|
||||||
|
|
||||||
### 公众号文章
|
### 公众号文章
|
||||||
|
|
||||||
公众号文章推送存在独立的 `biz_message_0.db`,用 `biz-articles` 单独查:
|
公众号文章推送存在独立的 `biz_message_*.db` 分片,用 `biz-articles` 单独查:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
wx biz-articles # 最近 50 篇
|
wx biz-articles # 最近 50 篇
|
||||||
|
|
|
||||||
2
SKILL.md
2
SKILL.md
|
|
@ -242,7 +242,7 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01 -n 50
|
||||||
|
|
||||||
### 公众号文章
|
### 公众号文章
|
||||||
|
|
||||||
公众号的文章推送存在独立的 `biz_message_0.db`,与普通 `message_0.db` 分开:
|
公众号的文章推送存在独立的 `biz_message_*.db` 分片,与普通 `message_0.db` 分开:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 最近 50 篇(默认)
|
# 最近 50 篇(默认)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,39 @@ use std::sync::Arc;
|
||||||
|
|
||||||
use crate::config;
|
use crate::config;
|
||||||
|
|
||||||
|
fn normalized_rel_key(rel_key: &str) -> String {
|
||||||
|
rel_key.replace('\\', "/")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_msg_db_key(rel_key: &str) -> bool {
|
||||||
|
let rel_key = normalized_rel_key(rel_key);
|
||||||
|
rel_key.starts_with("message/message_")
|
||||||
|
&& rel_key.ends_with(".db")
|
||||||
|
&& !rel_key.contains("_fts")
|
||||||
|
&& !rel_key.contains("_resource")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_biz_msg_db_key(rel_key: &str) -> bool {
|
||||||
|
let rel_key = normalized_rel_key(rel_key);
|
||||||
|
rel_key.starts_with("message/biz_message_")
|
||||||
|
&& rel_key.ends_with(".db")
|
||||||
|
&& !rel_key.contains("_fts")
|
||||||
|
&& !rel_key.contains("_resource")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect_db_keys(
|
||||||
|
all_keys: &HashMap<String, String>,
|
||||||
|
predicate: fn(&str) -> bool,
|
||||||
|
) -> Vec<String> {
|
||||||
|
let mut keys: Vec<String> = all_keys
|
||||||
|
.keys()
|
||||||
|
.filter(|k| predicate(k))
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
keys.sort();
|
||||||
|
keys
|
||||||
|
}
|
||||||
|
|
||||||
/// daemon 入口
|
/// daemon 入口
|
||||||
///
|
///
|
||||||
/// 当 WX_DAEMON_MODE 环境变量设置时,main() 调用此函数
|
/// 当 WX_DAEMON_MODE 环境变量设置时,main() 调用此函数
|
||||||
|
|
@ -49,17 +82,8 @@ async fn async_run() -> Result<()> {
|
||||||
let db = Arc::new(cache::DbCache::new(cfg.db_dir.clone(), all_keys.clone()).await?);
|
let db = Arc::new(cache::DbCache::new(cfg.db_dir.clone(), all_keys.clone()).await?);
|
||||||
|
|
||||||
// 收集消息 DB 列表
|
// 收集消息 DB 列表
|
||||||
let msg_db_keys: Vec<String> = all_keys
|
let msg_db_keys = collect_db_keys(&all_keys, is_msg_db_key);
|
||||||
.keys()
|
let biz_msg_db_keys = collect_db_keys(&all_keys, is_biz_msg_db_key);
|
||||||
.filter(|k| {
|
|
||||||
let k = k.replace('\\', "/");
|
|
||||||
k.contains("message/message_")
|
|
||||||
&& k.ends_with(".db")
|
|
||||||
&& !k.contains("_fts")
|
|
||||||
&& !k.contains("_resource")
|
|
||||||
})
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// 预热:加载联系人 + 解密 session.db
|
// 预热:加载联系人 + 解密 session.db
|
||||||
eprintln!("[daemon] 预热...");
|
eprintln!("[daemon] 预热...");
|
||||||
|
|
@ -69,11 +93,13 @@ async fn async_run() -> Result<()> {
|
||||||
map: HashMap::new(),
|
map: HashMap::new(),
|
||||||
md5_to_uname: HashMap::new(),
|
md5_to_uname: HashMap::new(),
|
||||||
msg_db_keys: Vec::new(),
|
msg_db_keys: Vec::new(),
|
||||||
|
biz_msg_db_keys: Vec::new(),
|
||||||
verify_flags: HashMap::new(),
|
verify_flags: HashMap::new(),
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
let mut names = names_raw;
|
let mut names = names_raw;
|
||||||
names.msg_db_keys = msg_db_keys;
|
names.msg_db_keys = msg_db_keys;
|
||||||
|
names.biz_msg_db_keys = biz_msg_db_keys;
|
||||||
|
|
||||||
let _ = db.get("session/session.db").await;
|
let _ = db.get("session/session.db").await;
|
||||||
let _ = db.get("sns/sns.db").await;
|
let _ = db.get("sns/sns.db").await;
|
||||||
|
|
@ -149,3 +175,28 @@ fn cleanup_ipc_files() {
|
||||||
let _ = std::fs::remove_file(config::sock_path());
|
let _ = std::fs::remove_file(config::sock_path());
|
||||||
let _ = std::fs::remove_file(config::pid_path());
|
let _ = std::fs::remove_file(config::pid_path());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::{is_biz_msg_db_key, is_msg_db_key};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn message_db_key_filter_ignores_biz_and_auxiliary_files() {
|
||||||
|
assert!(is_msg_db_key("message/message_0.db"));
|
||||||
|
assert!(is_msg_db_key("message\\message_12.db"));
|
||||||
|
assert!(!is_msg_db_key("message/biz_message_0.db"));
|
||||||
|
assert!(!is_msg_db_key("message/message_0.db-wal"));
|
||||||
|
assert!(!is_msg_db_key("message/message_0_fts.db"));
|
||||||
|
assert!(!is_msg_db_key("message/message_0_resource.db"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn biz_message_db_key_filter_matches_only_biz_shards() {
|
||||||
|
assert!(is_biz_msg_db_key("message/biz_message_0.db"));
|
||||||
|
assert!(is_biz_msg_db_key("message\\biz_message_3.db"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/message_0.db"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/biz_message_0.db-wal"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/biz_message_0_fts.db"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/biz_message_0_resource.db"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,8 @@ pub struct Names {
|
||||||
pub md5_to_uname: HashMap<String, String>,
|
pub md5_to_uname: HashMap<String, String>,
|
||||||
/// 消息 DB 的相对路径列表(message/message_N.db)
|
/// 消息 DB 的相对路径列表(message/message_N.db)
|
||||||
pub msg_db_keys: Vec<String>,
|
pub msg_db_keys: Vec<String>,
|
||||||
|
/// 公众号推送 DB 的相对路径列表(message/biz_message_N.db)
|
||||||
|
pub biz_msg_db_keys: Vec<String>,
|
||||||
/// username -> contact.verify_flag(0=真人,非 0 通常为公众号/服务号/认证账号)
|
/// username -> contact.verify_flag(0=真人,非 0 通常为公众号/服务号/认证账号)
|
||||||
pub verify_flags: HashMap<String, i64>,
|
pub verify_flags: HashMap<String, i64>,
|
||||||
}
|
}
|
||||||
|
|
@ -269,6 +271,7 @@ pub async fn load_names(db: &DbCache) -> Result<Names> {
|
||||||
map,
|
map,
|
||||||
md5_to_uname,
|
md5_to_uname,
|
||||||
msg_db_keys: Vec::new(),
|
msg_db_keys: Vec::new(),
|
||||||
|
biz_msg_db_keys: Vec::new(),
|
||||||
verify_flags,
|
verify_flags,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -4010,7 +4013,7 @@ fn extract_cdata(xml: &str, tag: &str) -> Option<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 查询公众号文章推送(biz_message_0.db)
|
/// 查询公众号文章推送(biz_message_*.db 分片)
|
||||||
///
|
///
|
||||||
/// 每条消息可能包含多篇文章(多图文推送)。返回所有文章展开就的平底列表。
|
/// 每条消息可能包含多篇文章(多图文推送)。返回所有文章展开就的平底列表。
|
||||||
pub async fn q_biz_articles(
|
pub async fn q_biz_articles(
|
||||||
|
|
@ -4022,10 +4025,17 @@ pub async fn q_biz_articles(
|
||||||
until: Option<i64>,
|
until: Option<i64>,
|
||||||
unread: bool,
|
unread: bool,
|
||||||
) -> Result<Value> {
|
) -> Result<Value> {
|
||||||
let biz_path = db
|
let mut biz_paths = Vec::new();
|
||||||
.get("message/biz_message_0.db")
|
for rel_key in &names.biz_msg_db_keys {
|
||||||
.await?
|
if let Some(path) = db.get(rel_key).await? {
|
||||||
.context("无法解密 biz_message_0.db,请确认 all_keys.json 包含对应密钥")?;
|
biz_paths.push(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if biz_paths.is_empty() {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"无法解密任何 biz_message_*.db,请确认 all_keys.json 包含对应密钥"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
// 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集,
|
// 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集,
|
||||||
// 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。
|
// 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。
|
||||||
|
|
@ -4060,32 +4070,37 @@ pub async fn q_biz_articles(
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
// 1. 从 Name2Id 表获取 rowid -> username 映射,再推导 md5 -> username
|
// 1. 从全部 biz shard 的 Name2Id 表收集 username,再推导 md5 -> username
|
||||||
let biz_path2 = biz_path.clone();
|
let biz_paths2 = biz_paths.clone();
|
||||||
let id2username: HashMap<i64, String> = tokio::task::spawn_blocking(move || {
|
let biz_usernames: HashSet<String> = tokio::task::spawn_blocking(move || {
|
||||||
let conn = Connection::open(&biz_path2)?;
|
let mut usernames = HashSet::new();
|
||||||
let mut stmt =
|
for biz_path in biz_paths2 {
|
||||||
conn.prepare("SELECT rowid, user_name FROM Name2Id WHERE user_name LIKE 'gh_%'")?;
|
let conn = Connection::open(&biz_path)?;
|
||||||
let rows = stmt
|
let mut stmt = conn.prepare(
|
||||||
.query_map([], |row| {
|
"SELECT DISTINCT user_name FROM Name2Id \
|
||||||
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
|
WHERE user_name IS NOT NULL AND user_name != ''",
|
||||||
})?
|
)?;
|
||||||
.collect::<rusqlite::Result<Vec<_>>>()?;
|
let rows: Vec<String> = stmt
|
||||||
Ok::<_, anyhow::Error>(rows.into_iter().collect())
|
.query_map([], |row| row.get::<_, String>(0))?
|
||||||
|
.filter_map(|r| r.ok())
|
||||||
|
.collect();
|
||||||
|
usernames.extend(rows);
|
||||||
|
}
|
||||||
|
Ok::<_, anyhow::Error>(usernames)
|
||||||
})
|
})
|
||||||
.await??;
|
.await??;
|
||||||
|
|
||||||
// 构建 md5(username) -> username 映射
|
// 构建 md5(username) -> username 映射
|
||||||
let md5_to_uname: HashMap<String, String> = id2username
|
let md5_to_uname: HashMap<String, String> = biz_usernames
|
||||||
.values()
|
.iter()
|
||||||
.map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone()))
|
.map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone()))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// 2. 如果 指定了 --account,找到匹配的 username 列表
|
// 2. 如果 指定了 --account,找到匹配的 username 列表
|
||||||
let account_low = account.as_deref().map(|s| s.to_lowercase());
|
let account_low = account.as_deref().map(|s| s.to_lowercase());
|
||||||
let mut target_usernames: Option<Vec<String>> = account_low.as_ref().map(|low| {
|
let mut target_usernames: Option<Vec<String>> = account_low.as_ref().map(|low| {
|
||||||
id2username
|
biz_usernames
|
||||||
.values()
|
.iter()
|
||||||
.filter(|u| {
|
.filter(|u| {
|
||||||
let display = names.display(u);
|
let display = names.display(u);
|
||||||
display.to_lowercase().contains(low.as_str())
|
display.to_lowercase().contains(low.as_str())
|
||||||
|
|
@ -4115,7 +4130,7 @@ pub async fn q_biz_articles(
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. 进行数据库查询
|
// 3. 进行数据库查询
|
||||||
let biz_path3 = biz_path.clone();
|
let biz_paths3 = biz_paths;
|
||||||
let since2 = since;
|
let since2 = since;
|
||||||
let until2 = until;
|
let until2 = until;
|
||||||
let target_hashes: Option<Vec<String>> = target_usernames.as_ref().map(|unames| {
|
let target_hashes: Option<Vec<String>> = target_usernames.as_ref().map(|unames| {
|
||||||
|
|
@ -4126,71 +4141,72 @@ pub async fn q_biz_articles(
|
||||||
});
|
});
|
||||||
|
|
||||||
let rows: Vec<(String, i64, i64, Vec<u8>, i64)> = tokio::task::spawn_blocking(move || {
|
let rows: Vec<(String, i64, i64, Vec<u8>, i64)> = tokio::task::spawn_blocking(move || {
|
||||||
let conn = Connection::open(&biz_path3)?;
|
|
||||||
|
|
||||||
// 列出所有 Msg_<hash> 表
|
|
||||||
let mut stmt = conn
|
|
||||||
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'")?;
|
|
||||||
let table_names: Vec<String> = stmt
|
|
||||||
.query_map([], |row| row.get(0))?
|
|
||||||
.filter_map(|r| r.ok())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let re = regex::Regex::new(r"^Msg_[0-9a-f]{32}$").unwrap();
|
let re = regex::Regex::new(r"^Msg_[0-9a-f]{32}$").unwrap();
|
||||||
let mut all_rows: Vec<(String, i64, i64, Vec<u8>, i64)> = Vec::new();
|
let mut all_rows: Vec<(String, i64, i64, Vec<u8>, i64)> = Vec::new();
|
||||||
|
|
||||||
for tname in &table_names {
|
for biz_path in biz_paths3 {
|
||||||
if !re.is_match(tname) {
|
let conn = Connection::open(&biz_path)?;
|
||||||
continue;
|
let mut stmt = conn.prepare(
|
||||||
}
|
"SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'",
|
||||||
let hash = &tname[4..];
|
)?;
|
||||||
|
let table_names: Vec<String> = stmt
|
||||||
|
.query_map([], |row| row.get(0))?
|
||||||
|
.filter_map(|r| r.ok())
|
||||||
|
.collect();
|
||||||
|
|
||||||
// account 过滤
|
for tname in &table_names {
|
||||||
if let Some(ref hashes) = target_hashes {
|
if !re.is_match(tname) {
|
||||||
if !hashes.iter().any(|h| h == hash) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
let hash = &tname[4..];
|
||||||
|
|
||||||
let username = md5_to_uname.get(hash).cloned().unwrap_or_default();
|
// account 过滤
|
||||||
|
if let Some(ref hashes) = target_hashes {
|
||||||
|
if !hashes.iter().any(|h| h == hash) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 构建过滤条件
|
let username = md5_to_uname.get(hash).cloned().unwrap_or_default();
|
||||||
let mut clauses: Vec<String> = Vec::new();
|
|
||||||
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
|
||||||
// local_type & 0xFFFFFFFF = 49 是 appmsg(公众号文章)
|
|
||||||
clauses.push("(local_type & 4294967295) = 49".to_string());
|
|
||||||
if let Some(s) = since2 {
|
|
||||||
clauses.push("create_time >= ?".to_string());
|
|
||||||
params.push(Box::new(s));
|
|
||||||
}
|
|
||||||
if let Some(u) = until2 {
|
|
||||||
clauses.push("create_time <= ?".to_string());
|
|
||||||
params.push(Box::new(u));
|
|
||||||
}
|
|
||||||
let where_clause = format!("WHERE {}", clauses.join(" AND "));
|
|
||||||
|
|
||||||
let sql = format!(
|
// 构建过滤条件
|
||||||
"SELECT create_time, WCDB_CT_message_content, message_content \
|
let mut clauses: Vec<String> = Vec::new();
|
||||||
FROM [{}] {} ORDER BY create_time DESC",
|
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
||||||
tname, where_clause
|
// local_type & 0xFFFFFFFF = 49 是 appmsg(公众号文章)
|
||||||
);
|
clauses.push("(local_type & 4294967295) = 49".to_string());
|
||||||
|
if let Some(s) = since2 {
|
||||||
|
clauses.push("create_time >= ?".to_string());
|
||||||
|
params.push(Box::new(s));
|
||||||
|
}
|
||||||
|
if let Some(u) = until2 {
|
||||||
|
clauses.push("create_time <= ?".to_string());
|
||||||
|
params.push(Box::new(u));
|
||||||
|
}
|
||||||
|
let where_clause = format!("WHERE {}", clauses.join(" AND "));
|
||||||
|
|
||||||
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
let sql = format!(
|
||||||
params.iter().map(|p| p.as_ref()).collect();
|
"SELECT create_time, WCDB_CT_message_content, message_content \
|
||||||
if let Ok(mut inner_stmt) = conn.prepare(&sql) {
|
FROM [{}] {} ORDER BY create_time DESC",
|
||||||
let msg_rows: Vec<_> = inner_stmt
|
tname, where_clause
|
||||||
.query_map(params_ref.as_slice(), |row| {
|
);
|
||||||
Ok((
|
|
||||||
username.clone(),
|
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
||||||
row.get::<_, i64>(0)?,
|
params.iter().map(|p| p.as_ref()).collect();
|
||||||
row.get::<_, i64>(1).unwrap_or(0),
|
if let Ok(mut inner_stmt) = conn.prepare(&sql) {
|
||||||
get_content_bytes(row, 2),
|
let msg_rows: Vec<_> = inner_stmt
|
||||||
0i64,
|
.query_map(params_ref.as_slice(), |row| {
|
||||||
))
|
Ok((
|
||||||
})
|
username.clone(),
|
||||||
.map(|it| it.filter_map(|r| r.ok()).collect())
|
row.get::<_, i64>(0)?,
|
||||||
.unwrap_or_default();
|
row.get::<_, i64>(1).unwrap_or(0),
|
||||||
all_rows.extend(msg_rows);
|
get_content_bytes(row, 2),
|
||||||
|
0i64,
|
||||||
|
))
|
||||||
|
})
|
||||||
|
.map(|it| it.filter_map(|r| r.ok()).collect())
|
||||||
|
.unwrap_or_default();
|
||||||
|
all_rows.extend(msg_rows);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok::<_, anyhow::Error>(all_rows)
|
Ok::<_, anyhow::Error>(all_rows)
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ pub enum Request {
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
user: Option<String>,
|
user: Option<String>,
|
||||||
},
|
},
|
||||||
/// 查询公众号文章推送(biz_message_0.db)
|
/// 查询公众号文章推送(biz_message_*.db 分片)
|
||||||
BizArticles {
|
BizArticles {
|
||||||
#[serde(default = "default_limit_50")]
|
#[serde(default = "default_limit_50")]
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue