mirror of https://github.com/jackwener/wx-cli.git
Compare commits
10 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
08af894594 | |
|
|
94fcc36ffe | |
|
|
0612789d19 | |
|
|
f8550ae74d | |
|
|
5f87ce6348 | |
|
|
ed95812332 | |
|
|
be1a174226 | |
|
|
c34f5f8fe2 | |
|
|
b58ae5468d | |
|
|
7451ce5684 |
|
|
@ -71,6 +71,8 @@ windows = { version = "0.58", features = [
|
||||||
"Win32_System_Threading",
|
"Win32_System_Threading",
|
||||||
"Win32_Foundation",
|
"Win32_Foundation",
|
||||||
"Win32_System_Memory",
|
"Win32_System_Memory",
|
||||||
|
"Win32_System_Com",
|
||||||
|
"Win32_UI_Shell",
|
||||||
] }
|
] }
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
|
|
|
||||||
12
README.md
12
README.md
|
|
@ -168,6 +168,14 @@ wx search "会议" --in "工作群" --since 2026-01-01
|
||||||
|
|
||||||
群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。
|
群聊里的 `last_sender`、`sender` 和 `stats` 的 `top_senders` 会优先使用群昵称(群名片)。如果本地数据库里没有对应群昵称,则回退到联系人备注、微信昵称或 username。
|
||||||
|
|
||||||
|
`history` / `search` / `new-messages` / `attachments` 以及 `stats.top_senders`,在群聊上下文里还会附带稳定身份三件套:
|
||||||
|
|
||||||
|
- `sender_username`:稳定 wxid,用来区分两个昵称同名的成员
|
||||||
|
- `sender_contact_display`:通讯录里的显示名(备注 > 昵称 > wxid 兜底)
|
||||||
|
- `sender_group_nickname`:群名片本身(同 `sender` 的来源,方便机器读取时不必再解析)
|
||||||
|
|
||||||
|
解析不到 wxid 时(id2u 没命中且老格式 `wxid_xxx:\n...` 前缀也不存在)这三字段不会输出,避免伪造空字段污染下游过滤。
|
||||||
|
|
||||||
`history` / `search` / `sessions` / `unread` / `new-messages` / `stats` / `attachments` 现在都会附带 `meta`:
|
`history` / `search` / `sessions` / `unread` / `new-messages` / `stats` / `attachments` 现在都会附带 `meta`:
|
||||||
|
|
||||||
- `status`: `ok` / `possibly_stale` / `possibly_stale_unknown_shards` / `windowed`
|
- `status`: `ok` / `possibly_stale` / `possibly_stale_unknown_shards` / `windowed`
|
||||||
|
|
@ -209,7 +217,7 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01
|
||||||
|
|
||||||
### 公众号文章
|
### 公众号文章
|
||||||
|
|
||||||
公众号文章推送存在独立的 `biz_message_0.db`,用 `biz-articles` 单独查:
|
公众号文章推送存在独立的 `biz_message_*.db` 分片,用 `biz-articles` 单独查:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
wx biz-articles # 最近 50 篇
|
wx biz-articles # 最近 50 篇
|
||||||
|
|
@ -237,7 +245,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||||
```
|
```
|
||||||
|
|
||||||
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender`。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages`:`sender_username` 是 wxid,用于两个同名成员之间的稳定区分;解析不到 wxid 时这三字段不输出)。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||||
|
|
||||||
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||||
|
|
||||||
|
|
|
||||||
6
SKILL.md
6
SKILL.md
|
|
@ -159,6 +159,8 @@ wx search "会议" --in "工作群" --since 2026-01-01
|
||||||
|
|
||||||
群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。
|
群聊消息里的 `last_sender`、`sender` 和 `stats.top_senders` 会优先显示群昵称(群名片)。如果本地数据库没有群昵称,再回退到联系人备注、微信昵称或 username。
|
||||||
|
|
||||||
|
`history` / `search` / `new-messages` / `attachments` 和 `stats.top_senders` 在群上下文里同时输出稳定身份三件套:`sender_username`(稳定 wxid,用来区分同名成员)/ `sender_contact_display`(备注 > 昵称 > wxid 兜底)/ `sender_group_nickname`(群名片,等价于 `sender` 的来源,免去再做字符串解析)。当 wxid 解析不到时,这三字段不会输出,避免空字符串污染下游过滤。
|
||||||
|
|
||||||
`sessions` / `unread` / `history` / `search` / `new-messages` / `stats` / `attachments` 的 stdout 现在统一是 wrapper:
|
`sessions` / `unread` / `history` / `search` / `new-messages` / `stats` / `attachments` 的 stdout 现在统一是 wrapper:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
|
@ -240,7 +242,7 @@ wx sns-search "婚礼" --user "李四" --since 2023-01-01 -n 50
|
||||||
|
|
||||||
### 公众号文章
|
### 公众号文章
|
||||||
|
|
||||||
公众号的文章推送存在独立的 `biz_message_0.db`,与普通 `message_0.db` 分开:
|
公众号的文章推送存在独立的 `biz_message_*.db` 分片,与普通 `message_0.db` 分开:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 最近 50 篇(默认)
|
# 最近 50 篇(默认)
|
||||||
|
|
@ -280,7 +282,7 @@ wx extract <attachment_id> -o ~/Desktop/photo.jpg
|
||||||
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
|
||||||
```
|
```
|
||||||
|
|
||||||
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender`。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。
|
||||||
|
|
||||||
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,8 +35,13 @@ pub fn cmd_init(force: bool) -> Result<()> {
|
||||||
|
|
||||||
// Step 1: 检测 db_dir
|
// Step 1: 检测 db_dir
|
||||||
println!("检测微信数据目录...");
|
println!("检测微信数据目录...");
|
||||||
let db_dir = config::auto_detect_db_dir()
|
let db_dir = config::auto_detect_db_dir().with_context(|| format!(
|
||||||
.context("未能自动检测到微信数据目录\n请手动编辑 config.json 中的 db_dir 字段")?;
|
"未能自动检测到微信数据目录\n\
|
||||||
|
请编辑配置文件并填写 db_dir 字段:\n \
|
||||||
|
{}\n\
|
||||||
|
(文件不存在则首次保存后自动创建;db_dir 示例: <data_root>\\xwechat_files\\<wxid>\\db_storage)",
|
||||||
|
config_path.display()
|
||||||
|
))?;
|
||||||
println!("找到数据目录: {}", db_dir.display());
|
println!("找到数据目录: {}", db_dir.display());
|
||||||
|
|
||||||
// Step 2: 扫描密钥(需要 root/sudo)
|
// Step 2: 扫描密钥(需要 root/sudo)
|
||||||
|
|
|
||||||
|
|
@ -320,9 +320,11 @@ fn detect_db_dir_impl() -> Option<PathBuf> {
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
if path.extension().map(|e| e == "ini").unwrap_or(false) {
|
if path.extension().map(|e| e == "ini").unwrap_or(false) {
|
||||||
if let Ok(content) = std::fs::read_to_string(&path) {
|
if let Ok(content) = std::fs::read_to_string(&path) {
|
||||||
let data_root = content.trim().to_string();
|
let Some(data_root) = resolve_windows_data_root(content.trim()) else {
|
||||||
if PathBuf::from(&data_root).is_dir() {
|
continue;
|
||||||
let pattern = PathBuf::from(&data_root).join("xwechat_files");
|
};
|
||||||
|
if data_root.is_dir() {
|
||||||
|
let pattern = data_root.join("xwechat_files");
|
||||||
if let Ok(entries2) = std::fs::read_dir(&pattern) {
|
if let Ok(entries2) = std::fs::read_dir(&pattern) {
|
||||||
for entry2 in entries2.flatten() {
|
for entry2 in entries2.flatten() {
|
||||||
let storage = entry2.path().join("db_storage");
|
let storage = entry2.path().join("db_storage");
|
||||||
|
|
@ -340,6 +342,72 @@ fn detect_db_dir_impl() -> Option<PathBuf> {
|
||||||
candidates.into_iter().next_back()
|
candidates.into_iter().next_back()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve the data-root path that Weixin writes to its `*.ini` file under
|
||||||
|
/// `%APPDATA%\Tencent\xwechat\config\`.
|
||||||
|
///
|
||||||
|
/// Observed forms in the wild:
|
||||||
|
/// - A plain absolute path, e.g. `D:\WeChatFiles`.
|
||||||
|
/// - The literal token `MyDocument:` (sometimes with a trailing slash),
|
||||||
|
/// which is not a real filesystem path. Empirically this denotes
|
||||||
|
/// "the current user's Documents folder"; users who relocated
|
||||||
|
/// Documents to e.g. `D:\Documents` saw auto-detect fail silently
|
||||||
|
/// because `PathBuf::from("MyDocument:").is_dir()` is false.
|
||||||
|
///
|
||||||
|
/// We accept either form. For the `MyDocument:` token we resolve via
|
||||||
|
/// `SHGetKnownFolderPath(FOLDERID_Documents)`, which respects the standard
|
||||||
|
/// shell-folder redirect at
|
||||||
|
/// `HKCU\Software\Microsoft\Windows\CurrentVersion\Explorer\User Shell Folders\Personal`.
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
fn resolve_windows_data_root(content: &str) -> Option<PathBuf> {
|
||||||
|
let trimmed = content.trim();
|
||||||
|
// Strip an optional trailing slash so `MyDocument:\` and `MyDocument:/` also match.
|
||||||
|
let stripped = trimmed
|
||||||
|
.strip_suffix(['\\', '/'])
|
||||||
|
.unwrap_or(trimmed);
|
||||||
|
if stripped.eq_ignore_ascii_case("MyDocument:") {
|
||||||
|
return known_documents_dir();
|
||||||
|
}
|
||||||
|
Some(PathBuf::from(trimmed))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
fn known_documents_dir() -> Option<PathBuf> {
|
||||||
|
use std::ffi::OsString;
|
||||||
|
use std::os::windows::ffi::OsStringExt;
|
||||||
|
use windows::Win32::Foundation::HANDLE;
|
||||||
|
use windows::Win32::System::Com::CoTaskMemFree;
|
||||||
|
use windows::Win32::UI::Shell::{
|
||||||
|
FOLDERID_Documents, SHGetKnownFolderPath, KF_FLAG_DEFAULT,
|
||||||
|
};
|
||||||
|
|
||||||
|
// SAFETY: standard Win32 known-folder API. SHGetKnownFolderPath either returns
|
||||||
|
// a heap-allocated PWSTR that the caller must free with CoTaskMemFree, or an
|
||||||
|
// error — in which case the out-pointer is not allocated. We free on every
|
||||||
|
// success path. Passing a null token (HANDLE::default()) means "the calling
|
||||||
|
// user", which is exactly what we want.
|
||||||
|
unsafe {
|
||||||
|
let pwstr =
|
||||||
|
SHGetKnownFolderPath(&FOLDERID_Documents, KF_FLAG_DEFAULT, HANDLE::default()).ok()?;
|
||||||
|
if pwstr.0.is_null() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// Walk the NUL-terminated wide string to compute its length.
|
||||||
|
let mut len = 0usize;
|
||||||
|
while *pwstr.0.add(len) != 0 {
|
||||||
|
len += 1;
|
||||||
|
}
|
||||||
|
let slice = std::slice::from_raw_parts(pwstr.0, len);
|
||||||
|
let os_str = OsString::from_wide(slice);
|
||||||
|
CoTaskMemFree(Some(pwstr.0 as *const _));
|
||||||
|
let path = PathBuf::from(os_str);
|
||||||
|
if path.as_os_str().is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
|
#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
|
||||||
fn detect_db_dir_impl() -> Option<PathBuf> {
|
fn detect_db_dir_impl() -> Option<PathBuf> {
|
||||||
None
|
None
|
||||||
|
|
@ -351,6 +419,8 @@ mod tests {
|
||||||
config_path_in_dir, default_config_path, find_existing_config_path, home_config_path,
|
config_path_in_dir, default_config_path, find_existing_config_path, home_config_path,
|
||||||
resolve_cli_home,
|
resolve_cli_home,
|
||||||
};
|
};
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
use super::{known_documents_dir, resolve_windows_data_root};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
@ -409,4 +479,24 @@ mod tests {
|
||||||
let path = default_config_path(Some(&cwd), Some(&exe), Some(&home));
|
let path = default_config_path(Some(&cwd), Some(&exe), Some(&home));
|
||||||
assert_eq!(path, cwd.join("config.json"));
|
assert_eq!(path, cwd.join("config.json"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
#[test]
|
||||||
|
fn resolve_windows_data_root_passes_through_absolute_path() {
|
||||||
|
let p = resolve_windows_data_root("D:\\WeChatFiles").unwrap();
|
||||||
|
assert_eq!(p, PathBuf::from("D:\\WeChatFiles"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
#[test]
|
||||||
|
fn resolve_windows_data_root_recognises_mydocument_keyword() {
|
||||||
|
// Should match the keyword exactly (case-insensitive, with or without trailing slash)
|
||||||
|
// and resolve to a non-empty Documents path via SHGetKnownFolderPath.
|
||||||
|
let docs = known_documents_dir().expect("Documents known folder must resolve");
|
||||||
|
for keyword in ["MyDocument:", "mydocument:", "MyDocument:\\", "MyDocument:/"] {
|
||||||
|
let resolved = resolve_windows_data_root(keyword)
|
||||||
|
.unwrap_or_else(|| panic!("keyword {keyword:?} should resolve"));
|
||||||
|
assert_eq!(resolved, docs, "keyword {keyword:?}");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,39 @@ use std::sync::Arc;
|
||||||
|
|
||||||
use crate::config;
|
use crate::config;
|
||||||
|
|
||||||
|
fn normalized_rel_key(rel_key: &str) -> String {
|
||||||
|
rel_key.replace('\\', "/")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_msg_db_key(rel_key: &str) -> bool {
|
||||||
|
let rel_key = normalized_rel_key(rel_key);
|
||||||
|
rel_key.starts_with("message/message_")
|
||||||
|
&& rel_key.ends_with(".db")
|
||||||
|
&& !rel_key.contains("_fts")
|
||||||
|
&& !rel_key.contains("_resource")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_biz_msg_db_key(rel_key: &str) -> bool {
|
||||||
|
let rel_key = normalized_rel_key(rel_key);
|
||||||
|
rel_key.starts_with("message/biz_message_")
|
||||||
|
&& rel_key.ends_with(".db")
|
||||||
|
&& !rel_key.contains("_fts")
|
||||||
|
&& !rel_key.contains("_resource")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect_db_keys(
|
||||||
|
all_keys: &HashMap<String, String>,
|
||||||
|
predicate: fn(&str) -> bool,
|
||||||
|
) -> Vec<String> {
|
||||||
|
let mut keys: Vec<String> = all_keys
|
||||||
|
.keys()
|
||||||
|
.filter(|k| predicate(k))
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
keys.sort();
|
||||||
|
keys
|
||||||
|
}
|
||||||
|
|
||||||
/// daemon 入口
|
/// daemon 入口
|
||||||
///
|
///
|
||||||
/// 当 WX_DAEMON_MODE 环境变量设置时,main() 调用此函数
|
/// 当 WX_DAEMON_MODE 环境变量设置时,main() 调用此函数
|
||||||
|
|
@ -49,17 +82,8 @@ async fn async_run() -> Result<()> {
|
||||||
let db = Arc::new(cache::DbCache::new(cfg.db_dir.clone(), all_keys.clone()).await?);
|
let db = Arc::new(cache::DbCache::new(cfg.db_dir.clone(), all_keys.clone()).await?);
|
||||||
|
|
||||||
// 收集消息 DB 列表
|
// 收集消息 DB 列表
|
||||||
let msg_db_keys: Vec<String> = all_keys
|
let msg_db_keys = collect_db_keys(&all_keys, is_msg_db_key);
|
||||||
.keys()
|
let biz_msg_db_keys = collect_db_keys(&all_keys, is_biz_msg_db_key);
|
||||||
.filter(|k| {
|
|
||||||
let k = k.replace('\\', "/");
|
|
||||||
k.contains("message/message_")
|
|
||||||
&& k.ends_with(".db")
|
|
||||||
&& !k.contains("_fts")
|
|
||||||
&& !k.contains("_resource")
|
|
||||||
})
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// 预热:加载联系人 + 解密 session.db
|
// 预热:加载联系人 + 解密 session.db
|
||||||
eprintln!("[daemon] 预热...");
|
eprintln!("[daemon] 预热...");
|
||||||
|
|
@ -69,11 +93,13 @@ async fn async_run() -> Result<()> {
|
||||||
map: HashMap::new(),
|
map: HashMap::new(),
|
||||||
md5_to_uname: HashMap::new(),
|
md5_to_uname: HashMap::new(),
|
||||||
msg_db_keys: Vec::new(),
|
msg_db_keys: Vec::new(),
|
||||||
|
biz_msg_db_keys: Vec::new(),
|
||||||
verify_flags: HashMap::new(),
|
verify_flags: HashMap::new(),
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
let mut names = names_raw;
|
let mut names = names_raw;
|
||||||
names.msg_db_keys = msg_db_keys;
|
names.msg_db_keys = msg_db_keys;
|
||||||
|
names.biz_msg_db_keys = biz_msg_db_keys;
|
||||||
|
|
||||||
let _ = db.get("session/session.db").await;
|
let _ = db.get("session/session.db").await;
|
||||||
let _ = db.get("sns/sns.db").await;
|
let _ = db.get("sns/sns.db").await;
|
||||||
|
|
@ -149,3 +175,28 @@ fn cleanup_ipc_files() {
|
||||||
let _ = std::fs::remove_file(config::sock_path());
|
let _ = std::fs::remove_file(config::sock_path());
|
||||||
let _ = std::fs::remove_file(config::pid_path());
|
let _ = std::fs::remove_file(config::pid_path());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::{is_biz_msg_db_key, is_msg_db_key};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn message_db_key_filter_ignores_biz_and_auxiliary_files() {
|
||||||
|
assert!(is_msg_db_key("message/message_0.db"));
|
||||||
|
assert!(is_msg_db_key("message\\message_12.db"));
|
||||||
|
assert!(!is_msg_db_key("message/biz_message_0.db"));
|
||||||
|
assert!(!is_msg_db_key("message/message_0.db-wal"));
|
||||||
|
assert!(!is_msg_db_key("message/message_0_fts.db"));
|
||||||
|
assert!(!is_msg_db_key("message/message_0_resource.db"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn biz_message_db_key_filter_matches_only_biz_shards() {
|
||||||
|
assert!(is_biz_msg_db_key("message/biz_message_0.db"));
|
||||||
|
assert!(is_biz_msg_db_key("message\\biz_message_3.db"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/message_0.db"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/biz_message_0.db-wal"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/biz_message_0_fts.db"));
|
||||||
|
assert!(!is_biz_msg_db_key("message/biz_message_0_resource.db"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,8 @@ pub struct Names {
|
||||||
pub md5_to_uname: HashMap<String, String>,
|
pub md5_to_uname: HashMap<String, String>,
|
||||||
/// 消息 DB 的相对路径列表(message/message_N.db)
|
/// 消息 DB 的相对路径列表(message/message_N.db)
|
||||||
pub msg_db_keys: Vec<String>,
|
pub msg_db_keys: Vec<String>,
|
||||||
|
/// 公众号推送 DB 的相对路径列表(message/biz_message_N.db)
|
||||||
|
pub biz_msg_db_keys: Vec<String>,
|
||||||
/// username -> contact.verify_flag(0=真人,非 0 通常为公众号/服务号/认证账号)
|
/// username -> contact.verify_flag(0=真人,非 0 通常为公众号/服务号/认证账号)
|
||||||
pub verify_flags: HashMap<String, i64>,
|
pub verify_flags: HashMap<String, i64>,
|
||||||
}
|
}
|
||||||
|
|
@ -269,6 +271,7 @@ pub async fn load_names(db: &DbCache) -> Result<Names> {
|
||||||
map,
|
map,
|
||||||
md5_to_uname,
|
md5_to_uname,
|
||||||
msg_db_keys: Vec::new(),
|
msg_db_keys: Vec::new(),
|
||||||
|
biz_msg_db_keys: Vec::new(),
|
||||||
verify_flags,
|
verify_flags,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -922,6 +925,7 @@ fn query_messages(
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows {
|
for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows {
|
||||||
let content = decompress_message(&content_bytes, ct);
|
let content = decompress_message(&content_bytes, ct);
|
||||||
|
let sender_username = sender_username(real_sender_id, &content, is_group, chat_username, &id2u);
|
||||||
let sender = sender_label(
|
let sender = sender_label(
|
||||||
real_sender_id,
|
real_sender_id,
|
||||||
&content,
|
&content,
|
||||||
|
|
@ -942,6 +946,7 @@ fn query_messages(
|
||||||
"type": fmt_type(local_type),
|
"type": fmt_type(local_type),
|
||||||
"local_id": local_id,
|
"local_id": local_id,
|
||||||
});
|
});
|
||||||
|
add_sender_identity(&mut msg, is_group, &sender_username, names_map, group_nicknames);
|
||||||
if let Some(u) = url {
|
if let Some(u) = url {
|
||||||
msg["url"] = serde_json::Value::String(u);
|
msg["url"] = serde_json::Value::String(u);
|
||||||
}
|
}
|
||||||
|
|
@ -1027,6 +1032,7 @@ fn search_in_table(
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows {
|
for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows {
|
||||||
let content = decompress_message(&content_bytes, ct);
|
let content = decompress_message(&content_bytes, ct);
|
||||||
|
let sender_username = sender_username(real_sender_id, &content, is_group, chat_username, &id2u);
|
||||||
let sender = sender_label(
|
let sender = sender_label(
|
||||||
real_sender_id,
|
real_sender_id,
|
||||||
&content,
|
&content,
|
||||||
|
|
@ -1051,6 +1057,7 @@ fn search_in_table(
|
||||||
"content": text,
|
"content": text,
|
||||||
"type": fmt_type(local_type),
|
"type": fmt_type(local_type),
|
||||||
});
|
});
|
||||||
|
add_sender_identity(&mut msg, is_group, &sender_username, names_map, group_nicknames);
|
||||||
if let Some(u) = url {
|
if let Some(u) = url {
|
||||||
msg["url"] = serde_json::Value::String(u);
|
msg["url"] = serde_json::Value::String(u);
|
||||||
}
|
}
|
||||||
|
|
@ -1376,6 +1383,12 @@ fn pick_group_nickname(strings: &[(u64, String)], username: &str) -> Option<Stri
|
||||||
let mut best = String::new();
|
let mut best = String::new();
|
||||||
|
|
||||||
for (idx, (field_no, value)) in strings.iter().enumerate() {
|
for (idx, (field_no, value)) in strings.iter().enumerate() {
|
||||||
|
// In current WeChat 4.x ext_buffer member chunks, field 2 is the group
|
||||||
|
// card/nickname. Field 4 is often another username-like value such as an
|
||||||
|
// inviter/owner and must not be promoted to a nickname.
|
||||||
|
if *field_no != 2 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
let value = value.trim();
|
let value = value.trim();
|
||||||
if value.is_empty()
|
if value.is_empty()
|
||||||
|| value == username
|
|| value == username
|
||||||
|
|
@ -1388,9 +1401,6 @@ fn pick_group_nickname(strings: &[(u64, String)], username: &str) -> Option<Stri
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut score = 0i64;
|
let mut score = 0i64;
|
||||||
if *field_no == 2 {
|
|
||||||
score += 100;
|
|
||||||
}
|
|
||||||
if !looks_like_username(value) {
|
if !looks_like_username(value) {
|
||||||
score += 20;
|
score += 20;
|
||||||
}
|
}
|
||||||
|
|
@ -1489,10 +1499,12 @@ fn group_top_senders(
|
||||||
let mut top_senders: Vec<Value> = sender_counts
|
let mut top_senders: Vec<Value> = sender_counts
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(username, count)| {
|
.map(|(username, count)| {
|
||||||
json!({
|
let mut row = json!({
|
||||||
"sender": sender_display(username, "", names, group_nicknames),
|
"sender": sender_display(username, "", names, group_nicknames),
|
||||||
"count": count,
|
"count": count,
|
||||||
})
|
});
|
||||||
|
add_sender_identity(&mut row, true, username, names, group_nicknames);
|
||||||
|
row
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
top_senders.sort_by(|a, b| {
|
top_senders.sort_by(|a, b| {
|
||||||
|
|
@ -1511,6 +1523,48 @@ fn group_top_senders(
|
||||||
top_senders
|
top_senders
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn sender_username(
|
||||||
|
real_sender_id: i64,
|
||||||
|
content: &str,
|
||||||
|
is_group: bool,
|
||||||
|
chat_username: &str,
|
||||||
|
id2u: &HashMap<i64, String>,
|
||||||
|
) -> String {
|
||||||
|
let sender_uname = id2u.get(&real_sender_id).cloned().unwrap_or_default();
|
||||||
|
if !is_group {
|
||||||
|
if !sender_uname.is_empty() && sender_uname != chat_username {
|
||||||
|
return sender_uname;
|
||||||
|
}
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
if !sender_uname.is_empty() && sender_uname != chat_username {
|
||||||
|
return sender_uname;
|
||||||
|
}
|
||||||
|
if content.contains(":\n") {
|
||||||
|
return content.splitn(2, ":\n").next().unwrap_or("").to_string();
|
||||||
|
}
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_sender_identity(
|
||||||
|
row: &mut Value,
|
||||||
|
is_group: bool,
|
||||||
|
username: &str,
|
||||||
|
names: &HashMap<String, String>,
|
||||||
|
group_nicknames: &HashMap<String, String>,
|
||||||
|
) {
|
||||||
|
if !is_group || username.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
row["sender_username"] = Value::String(username.to_string());
|
||||||
|
row["sender_contact_display"] = Value::String(
|
||||||
|
names.get(username).cloned().unwrap_or_else(|| username.to_string())
|
||||||
|
);
|
||||||
|
row["sender_group_nickname"] = Value::String(
|
||||||
|
group_nicknames.get(username).cloned().unwrap_or_default()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn sender_label(
|
fn sender_label(
|
||||||
real_sender_id: i64,
|
real_sender_id: i64,
|
||||||
content: &str,
|
content: &str,
|
||||||
|
|
@ -2108,6 +2162,194 @@ mod appmsg_tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn query_messages_includes_stable_group_sender_identity() {
|
||||||
|
let path = temp_db_path("query_messages_includes_stable_group_sender_identity");
|
||||||
|
{
|
||||||
|
let conn = Connection::open(&path).expect("open temp db");
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE Name2Id (
|
||||||
|
user_name TEXT
|
||||||
|
)",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
.expect("create Name2Id table");
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO Name2Id(rowid, user_name) VALUES (?1, ?2)",
|
||||||
|
rusqlite::params![42_i64, "wxid_alice"],
|
||||||
|
)
|
||||||
|
.expect("insert Name2Id row");
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE Msg_test (
|
||||||
|
local_id INTEGER,
|
||||||
|
local_type INTEGER,
|
||||||
|
create_time INTEGER,
|
||||||
|
real_sender_id INTEGER,
|
||||||
|
message_content TEXT,
|
||||||
|
WCDB_CT_message_content INTEGER
|
||||||
|
)",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
.expect("create message table");
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
|
rusqlite::params![
|
||||||
|
1_i64,
|
||||||
|
1_i64,
|
||||||
|
1775146911_i64,
|
||||||
|
42_i64,
|
||||||
|
"hello",
|
||||||
|
0_i64
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.expect("insert text message");
|
||||||
|
}
|
||||||
|
|
||||||
|
let names = HashMap::from([("wxid_alice".to_string(), "Alice Contact".to_string())]);
|
||||||
|
let group_nicknames = HashMap::from([("wxid_alice".to_string(), "同名".to_string())]);
|
||||||
|
let rows = query_messages(
|
||||||
|
&path,
|
||||||
|
"Msg_test",
|
||||||
|
"123@chatroom",
|
||||||
|
true,
|
||||||
|
&names,
|
||||||
|
&group_nicknames,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
10,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
.expect("query messages");
|
||||||
|
|
||||||
|
let _ = std::fs::remove_file(&path);
|
||||||
|
|
||||||
|
assert_eq!(rows.len(), 1);
|
||||||
|
assert_eq!(rows[0]["sender"].as_str(), Some("同名"));
|
||||||
|
assert_eq!(rows[0]["sender_username"].as_str(), Some("wxid_alice"));
|
||||||
|
assert_eq!(rows[0]["sender_contact_display"].as_str(), Some("Alice Contact"));
|
||||||
|
assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn search_in_table_includes_stable_group_sender_identity() {
|
||||||
|
let conn = Connection::open_in_memory().expect("open in-memory db");
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE Name2Id (
|
||||||
|
user_name TEXT
|
||||||
|
)",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
.expect("create Name2Id table");
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO Name2Id(rowid, user_name) VALUES (?1, ?2)",
|
||||||
|
rusqlite::params![42_i64, "wxid_alice"],
|
||||||
|
)
|
||||||
|
.expect("insert Name2Id row");
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE Msg_test (
|
||||||
|
local_id INTEGER,
|
||||||
|
local_type INTEGER,
|
||||||
|
create_time INTEGER,
|
||||||
|
real_sender_id INTEGER,
|
||||||
|
message_content TEXT,
|
||||||
|
WCDB_CT_message_content INTEGER
|
||||||
|
)",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
.expect("create message table");
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO Msg_test VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
|
rusqlite::params![1_i64, 1_i64, 1775146911_i64, 42_i64, "needle", 0_i64],
|
||||||
|
)
|
||||||
|
.expect("insert text message");
|
||||||
|
|
||||||
|
let names = HashMap::from([("wxid_alice".to_string(), "Alice Contact".to_string())]);
|
||||||
|
let group_nicknames = HashMap::from([("wxid_alice".to_string(), "同名".to_string())]);
|
||||||
|
let rows = search_in_table(
|
||||||
|
&conn,
|
||||||
|
"Msg_test",
|
||||||
|
"123@chatroom",
|
||||||
|
true,
|
||||||
|
&names,
|
||||||
|
&group_nicknames,
|
||||||
|
"needle",
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
.expect("search messages");
|
||||||
|
|
||||||
|
assert_eq!(rows.len(), 1);
|
||||||
|
assert_eq!(rows[0]["sender"].as_str(), Some("同名"));
|
||||||
|
assert_eq!(rows[0]["sender_username"].as_str(), Some("wxid_alice"));
|
||||||
|
assert_eq!(rows[0]["sender_contact_display"].as_str(), Some("Alice Contact"));
|
||||||
|
assert_eq!(rows[0]["sender_group_nickname"].as_str(), Some("同名"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q_attachments 是异步 + 依赖 DbCache,无法直接 unit-test 整条 pipeline。
|
||||||
|
/// 这里锁住 attachment row 复用 `add_sender_identity` 后的最终 JSON 形状:
|
||||||
|
/// 两个 group nickname 同为 "同名" 的成员,attachment 行可以通过 sender_username 区分。
|
||||||
|
#[test]
|
||||||
|
fn attachment_row_gets_stable_group_sender_identity_via_helper() {
|
||||||
|
let names: HashMap<String, String> = HashMap::from([
|
||||||
|
("wxid_alice".to_string(), "Alice Contact".to_string()),
|
||||||
|
("wxid_bob".to_string(), "Bob Contact".to_string()),
|
||||||
|
]);
|
||||||
|
let group_nicknames: HashMap<String, String> = HashMap::from([
|
||||||
|
("wxid_alice".to_string(), "同名".to_string()),
|
||||||
|
("wxid_bob".to_string(), "同名".to_string()),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let mut alice_row = json!({
|
||||||
|
"attachment_id": "abc",
|
||||||
|
"kind": "image",
|
||||||
|
"type": "Image",
|
||||||
|
"local_id": 1,
|
||||||
|
"timestamp": 1775146911,
|
||||||
|
"time": "2026-04-30 12:00",
|
||||||
|
"sender": "同名",
|
||||||
|
});
|
||||||
|
add_sender_identity(&mut alice_row, true, "wxid_alice", &names, &group_nicknames);
|
||||||
|
assert_eq!(alice_row["sender"].as_str(), Some("同名"));
|
||||||
|
assert_eq!(alice_row["sender_username"].as_str(), Some("wxid_alice"));
|
||||||
|
assert_eq!(alice_row["sender_contact_display"].as_str(), Some("Alice Contact"));
|
||||||
|
assert_eq!(alice_row["sender_group_nickname"].as_str(), Some("同名"));
|
||||||
|
|
||||||
|
let mut bob_row = json!({
|
||||||
|
"attachment_id": "def",
|
||||||
|
"kind": "image",
|
||||||
|
"type": "Image",
|
||||||
|
"local_id": 2,
|
||||||
|
"timestamp": 1775146922,
|
||||||
|
"time": "2026-04-30 12:00",
|
||||||
|
"sender": "同名",
|
||||||
|
});
|
||||||
|
add_sender_identity(&mut bob_row, true, "wxid_bob", &names, &group_nicknames);
|
||||||
|
assert_eq!(bob_row["sender_username"].as_str(), Some("wxid_bob"));
|
||||||
|
// 同样 sender_group_nickname 都是 "同名",但 sender_username 能区分
|
||||||
|
assert_ne!(
|
||||||
|
alice_row["sender_username"], bob_row["sender_username"],
|
||||||
|
"sender_username 必须区分两位同名成员"
|
||||||
|
);
|
||||||
|
|
||||||
|
// 非群 chat 不该追加 identity 字段(行为对齐 history/search/new-messages)
|
||||||
|
let mut private_row = json!({"attachment_id": "ghi", "sender": ""});
|
||||||
|
add_sender_identity(&mut private_row, false, "wxid_alice", &names, &group_nicknames);
|
||||||
|
assert!(private_row.get("sender_username").is_none());
|
||||||
|
assert!(private_row.get("sender_contact_display").is_none());
|
||||||
|
assert!(private_row.get("sender_group_nickname").is_none());
|
||||||
|
|
||||||
|
// group 但 sender_username 解析为空(非常老的格式、id2u 没命中、content 也没 wxid_xxx:\n 前缀):
|
||||||
|
// 不要伪造空字段,整段 identity 也不追加
|
||||||
|
let mut unknown_row = json!({"attachment_id": "jkl", "sender": ""});
|
||||||
|
add_sender_identity(&mut unknown_row, true, "", &names, &group_nicknames);
|
||||||
|
assert!(unknown_row.get("sender_username").is_none());
|
||||||
|
assert!(unknown_row.get("sender_contact_display").is_none());
|
||||||
|
assert!(unknown_row.get("sender_group_nickname").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn search_in_table_filters_appmsg_by_base_type() {
|
fn search_in_table_filters_appmsg_by_base_type() {
|
||||||
let conn = Connection::open_in_memory().expect("open in-memory db");
|
let conn = Connection::open_in_memory().expect("open in-memory db");
|
||||||
|
|
@ -2750,6 +2992,7 @@ pub async fn q_new_messages(
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows {
|
for (local_id, local_type, ts, real_sender_id, content_bytes, ct) in rows {
|
||||||
let content = decompress_message(&content_bytes, ct);
|
let content = decompress_message(&content_bytes, ct);
|
||||||
|
let sender_username = sender_username(real_sender_id, &content, is_group, &uname2, &id2u);
|
||||||
let sender = sender_label(
|
let sender = sender_label(
|
||||||
real_sender_id,
|
real_sender_id,
|
||||||
&content,
|
&content,
|
||||||
|
|
@ -2772,6 +3015,7 @@ pub async fn q_new_messages(
|
||||||
"content": text,
|
"content": text,
|
||||||
"type": fmt_type(local_type),
|
"type": fmt_type(local_type),
|
||||||
});
|
});
|
||||||
|
add_sender_identity(&mut msg, is_group, &sender_username, &names_map, &group_nicknames2);
|
||||||
if let Some(u) = url {
|
if let Some(u) = url {
|
||||||
msg["url"] = serde_json::Value::String(u);
|
msg["url"] = serde_json::Value::String(u);
|
||||||
}
|
}
|
||||||
|
|
@ -3769,7 +4013,7 @@ fn extract_cdata(xml: &str, tag: &str) -> Option<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 查询公众号文章推送(biz_message_0.db)
|
/// 查询公众号文章推送(biz_message_*.db 分片)
|
||||||
///
|
///
|
||||||
/// 每条消息可能包含多篇文章(多图文推送)。返回所有文章展开就的平底列表。
|
/// 每条消息可能包含多篇文章(多图文推送)。返回所有文章展开就的平底列表。
|
||||||
pub async fn q_biz_articles(
|
pub async fn q_biz_articles(
|
||||||
|
|
@ -3781,10 +4025,17 @@ pub async fn q_biz_articles(
|
||||||
until: Option<i64>,
|
until: Option<i64>,
|
||||||
unread: bool,
|
unread: bool,
|
||||||
) -> Result<Value> {
|
) -> Result<Value> {
|
||||||
let biz_path = db
|
let mut biz_paths = Vec::new();
|
||||||
.get("message/biz_message_0.db")
|
for rel_key in &names.biz_msg_db_keys {
|
||||||
.await?
|
if let Some(path) = db.get(rel_key).await? {
|
||||||
.context("无法解密 biz_message_0.db,请确认 all_keys.json 包含对应密钥")?;
|
biz_paths.push(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if biz_paths.is_empty() {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"无法解密任何 biz_message_*.db,请确认 all_keys.json 包含对应密钥"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
// 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集,
|
// 开启 --unread:从 session.db 拿“公众号 + unread_count>0”的 username 子集,
|
||||||
// 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。
|
// 作为合集过滤(与 --account 取交集),后续结果按 account_username 去重取顶 1 篇。
|
||||||
|
|
@ -3819,32 +4070,37 @@ pub async fn q_biz_articles(
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
// 1. 从 Name2Id 表获取 rowid -> username 映射,再推导 md5 -> username
|
// 1. 从全部 biz shard 的 Name2Id 表收集 username,再推导 md5 -> username
|
||||||
let biz_path2 = biz_path.clone();
|
let biz_paths2 = biz_paths.clone();
|
||||||
let id2username: HashMap<i64, String> = tokio::task::spawn_blocking(move || {
|
let biz_usernames: HashSet<String> = tokio::task::spawn_blocking(move || {
|
||||||
let conn = Connection::open(&biz_path2)?;
|
let mut usernames = HashSet::new();
|
||||||
let mut stmt =
|
for biz_path in biz_paths2 {
|
||||||
conn.prepare("SELECT rowid, user_name FROM Name2Id WHERE user_name LIKE 'gh_%'")?;
|
let conn = Connection::open(&biz_path)?;
|
||||||
let rows = stmt
|
let mut stmt = conn.prepare(
|
||||||
.query_map([], |row| {
|
"SELECT DISTINCT user_name FROM Name2Id \
|
||||||
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
|
WHERE user_name IS NOT NULL AND user_name != ''",
|
||||||
})?
|
)?;
|
||||||
.collect::<rusqlite::Result<Vec<_>>>()?;
|
let rows: Vec<String> = stmt
|
||||||
Ok::<_, anyhow::Error>(rows.into_iter().collect())
|
.query_map([], |row| row.get::<_, String>(0))?
|
||||||
|
.filter_map(|r| r.ok())
|
||||||
|
.collect();
|
||||||
|
usernames.extend(rows);
|
||||||
|
}
|
||||||
|
Ok::<_, anyhow::Error>(usernames)
|
||||||
})
|
})
|
||||||
.await??;
|
.await??;
|
||||||
|
|
||||||
// 构建 md5(username) -> username 映射
|
// 构建 md5(username) -> username 映射
|
||||||
let md5_to_uname: HashMap<String, String> = id2username
|
let md5_to_uname: HashMap<String, String> = biz_usernames
|
||||||
.values()
|
.iter()
|
||||||
.map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone()))
|
.map(|u| (format!("{:x}", md5::compute(u.as_bytes())), u.clone()))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// 2. 如果 指定了 --account,找到匹配的 username 列表
|
// 2. 如果 指定了 --account,找到匹配的 username 列表
|
||||||
let account_low = account.as_deref().map(|s| s.to_lowercase());
|
let account_low = account.as_deref().map(|s| s.to_lowercase());
|
||||||
let mut target_usernames: Option<Vec<String>> = account_low.as_ref().map(|low| {
|
let mut target_usernames: Option<Vec<String>> = account_low.as_ref().map(|low| {
|
||||||
id2username
|
biz_usernames
|
||||||
.values()
|
.iter()
|
||||||
.filter(|u| {
|
.filter(|u| {
|
||||||
let display = names.display(u);
|
let display = names.display(u);
|
||||||
display.to_lowercase().contains(low.as_str())
|
display.to_lowercase().contains(low.as_str())
|
||||||
|
|
@ -3874,7 +4130,7 @@ pub async fn q_biz_articles(
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. 进行数据库查询
|
// 3. 进行数据库查询
|
||||||
let biz_path3 = biz_path.clone();
|
let biz_paths3 = biz_paths;
|
||||||
let since2 = since;
|
let since2 = since;
|
||||||
let until2 = until;
|
let until2 = until;
|
||||||
let target_hashes: Option<Vec<String>> = target_usernames.as_ref().map(|unames| {
|
let target_hashes: Option<Vec<String>> = target_usernames.as_ref().map(|unames| {
|
||||||
|
|
@ -3885,71 +4141,72 @@ pub async fn q_biz_articles(
|
||||||
});
|
});
|
||||||
|
|
||||||
let rows: Vec<(String, i64, i64, Vec<u8>, i64)> = tokio::task::spawn_blocking(move || {
|
let rows: Vec<(String, i64, i64, Vec<u8>, i64)> = tokio::task::spawn_blocking(move || {
|
||||||
let conn = Connection::open(&biz_path3)?;
|
|
||||||
|
|
||||||
// 列出所有 Msg_<hash> 表
|
|
||||||
let mut stmt = conn
|
|
||||||
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'")?;
|
|
||||||
let table_names: Vec<String> = stmt
|
|
||||||
.query_map([], |row| row.get(0))?
|
|
||||||
.filter_map(|r| r.ok())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let re = regex::Regex::new(r"^Msg_[0-9a-f]{32}$").unwrap();
|
let re = regex::Regex::new(r"^Msg_[0-9a-f]{32}$").unwrap();
|
||||||
let mut all_rows: Vec<(String, i64, i64, Vec<u8>, i64)> = Vec::new();
|
let mut all_rows: Vec<(String, i64, i64, Vec<u8>, i64)> = Vec::new();
|
||||||
|
|
||||||
for tname in &table_names {
|
for biz_path in biz_paths3 {
|
||||||
if !re.is_match(tname) {
|
let conn = Connection::open(&biz_path)?;
|
||||||
continue;
|
let mut stmt = conn.prepare(
|
||||||
}
|
"SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'Msg_%'",
|
||||||
let hash = &tname[4..];
|
)?;
|
||||||
|
let table_names: Vec<String> = stmt
|
||||||
|
.query_map([], |row| row.get(0))?
|
||||||
|
.filter_map(|r| r.ok())
|
||||||
|
.collect();
|
||||||
|
|
||||||
// account 过滤
|
for tname in &table_names {
|
||||||
if let Some(ref hashes) = target_hashes {
|
if !re.is_match(tname) {
|
||||||
if !hashes.iter().any(|h| h == hash) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
let hash = &tname[4..];
|
||||||
|
|
||||||
let username = md5_to_uname.get(hash).cloned().unwrap_or_default();
|
// account 过滤
|
||||||
|
if let Some(ref hashes) = target_hashes {
|
||||||
|
if !hashes.iter().any(|h| h == hash) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 构建过滤条件
|
let username = md5_to_uname.get(hash).cloned().unwrap_or_default();
|
||||||
let mut clauses: Vec<String> = Vec::new();
|
|
||||||
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
|
||||||
// local_type & 0xFFFFFFFF = 49 是 appmsg(公众号文章)
|
|
||||||
clauses.push("(local_type & 4294967295) = 49".to_string());
|
|
||||||
if let Some(s) = since2 {
|
|
||||||
clauses.push("create_time >= ?".to_string());
|
|
||||||
params.push(Box::new(s));
|
|
||||||
}
|
|
||||||
if let Some(u) = until2 {
|
|
||||||
clauses.push("create_time <= ?".to_string());
|
|
||||||
params.push(Box::new(u));
|
|
||||||
}
|
|
||||||
let where_clause = format!("WHERE {}", clauses.join(" AND "));
|
|
||||||
|
|
||||||
let sql = format!(
|
// 构建过滤条件
|
||||||
"SELECT create_time, WCDB_CT_message_content, message_content \
|
let mut clauses: Vec<String> = Vec::new();
|
||||||
FROM [{}] {} ORDER BY create_time DESC",
|
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
||||||
tname, where_clause
|
// local_type & 0xFFFFFFFF = 49 是 appmsg(公众号文章)
|
||||||
);
|
clauses.push("(local_type & 4294967295) = 49".to_string());
|
||||||
|
if let Some(s) = since2 {
|
||||||
|
clauses.push("create_time >= ?".to_string());
|
||||||
|
params.push(Box::new(s));
|
||||||
|
}
|
||||||
|
if let Some(u) = until2 {
|
||||||
|
clauses.push("create_time <= ?".to_string());
|
||||||
|
params.push(Box::new(u));
|
||||||
|
}
|
||||||
|
let where_clause = format!("WHERE {}", clauses.join(" AND "));
|
||||||
|
|
||||||
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
let sql = format!(
|
||||||
params.iter().map(|p| p.as_ref()).collect();
|
"SELECT create_time, WCDB_CT_message_content, message_content \
|
||||||
if let Ok(mut inner_stmt) = conn.prepare(&sql) {
|
FROM [{}] {} ORDER BY create_time DESC",
|
||||||
let msg_rows: Vec<_> = inner_stmt
|
tname, where_clause
|
||||||
.query_map(params_ref.as_slice(), |row| {
|
);
|
||||||
Ok((
|
|
||||||
username.clone(),
|
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
||||||
row.get::<_, i64>(0)?,
|
params.iter().map(|p| p.as_ref()).collect();
|
||||||
row.get::<_, i64>(1).unwrap_or(0),
|
if let Ok(mut inner_stmt) = conn.prepare(&sql) {
|
||||||
get_content_bytes(row, 2),
|
let msg_rows: Vec<_> = inner_stmt
|
||||||
0i64,
|
.query_map(params_ref.as_slice(), |row| {
|
||||||
))
|
Ok((
|
||||||
})
|
username.clone(),
|
||||||
.map(|it| it.filter_map(|r| r.ok()).collect())
|
row.get::<_, i64>(0)?,
|
||||||
.unwrap_or_default();
|
row.get::<_, i64>(1).unwrap_or(0),
|
||||||
all_rows.extend(msg_rows);
|
get_content_bytes(row, 2),
|
||||||
|
0i64,
|
||||||
|
))
|
||||||
|
})
|
||||||
|
.map(|it| it.filter_map(|r| r.ok()).collect())
|
||||||
|
.unwrap_or_default();
|
||||||
|
all_rows.extend(msg_rows);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok::<_, anyhow::Error>(all_rows)
|
Ok::<_, anyhow::Error>(all_rows)
|
||||||
|
|
@ -4057,9 +4314,12 @@ pub async fn q_attachments(
|
||||||
HashMap::new()
|
HashMap::new()
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut all_rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = Vec::new();
|
let mut all_rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = Vec::new();
|
||||||
let mut shard_hits = 0usize;
|
let mut shard_hits = 0usize;
|
||||||
// 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label, ts_for_sort, db_idx)
|
// 元组:(local_id, local_type_lo32, create_time, real_sender_id, sender_label,
|
||||||
|
// sender_username, ts_for_sort, db_idx)
|
||||||
|
// sender_username 是稳定 wxid,用来让 sender_contact_display / sender_group_nickname
|
||||||
|
// 落在 attachment row 上(消除"两个同名成员的图分不清谁发的"歧义)。
|
||||||
for (db_idx, shard) in shards.iter().enumerate() {
|
for (db_idx, shard) in shards.iter().enumerate() {
|
||||||
let path = shard.path.clone();
|
let path = shard.path.clone();
|
||||||
let tname = shard.table.clone();
|
let tname = shard.table.clone();
|
||||||
|
|
@ -4074,7 +4334,7 @@ pub async fn q_attachments(
|
||||||
let per_db_cap = (offset + limit).max(limit) * 2;
|
let per_db_cap = (offset + limit).max(limit) * 2;
|
||||||
let db_idx2 = db_idx as i64;
|
let db_idx2 = db_idx as i64;
|
||||||
|
|
||||||
let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> =
|
let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> =
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
let conn = Connection::open(&path)?;
|
let conn = Connection::open(&path)?;
|
||||||
let id2u = load_id2u(&conn);
|
let id2u = load_id2u(&conn);
|
||||||
|
|
@ -4112,7 +4372,7 @@ pub async fn q_attachments(
|
||||||
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
let params_ref: Vec<&dyn rusqlite::types::ToSql> =
|
||||||
params.iter().map(|p| p.as_ref()).collect();
|
params.iter().map(|p| p.as_ref()).collect();
|
||||||
let mut stmt = conn.prepare(&sql)?;
|
let mut stmt = conn.prepare(&sql)?;
|
||||||
let rows: Vec<(i64, i64, i64, i64, String, i64, i64)> = stmt
|
let rows: Vec<(i64, i64, i64, i64, String, String, i64, i64)> = stmt
|
||||||
.query_map(params_ref.as_slice(), |row| {
|
.query_map(params_ref.as_slice(), |row| {
|
||||||
let local_id: i64 = row.get(0)?;
|
let local_id: i64 = row.get(0)?;
|
||||||
let raw_type: i64 = row.get(1)?;
|
let raw_type: i64 = row.get(1)?;
|
||||||
|
|
@ -4122,20 +4382,29 @@ pub async fn q_attachments(
|
||||||
let content_bytes = get_content_bytes(row, 4);
|
let content_bytes = get_content_bytes(row, 4);
|
||||||
let ct: i64 = row.get::<_, i64>(5).unwrap_or(0);
|
let ct: i64 = row.get::<_, i64>(5).unwrap_or(0);
|
||||||
let content = decompress_message(&content_bytes, ct);
|
let content = decompress_message(&content_bytes, ct);
|
||||||
let sender = if is_group2 {
|
let (sender, sender_uname) = if is_group2 {
|
||||||
sender_label(
|
(
|
||||||
real_sender_id,
|
sender_label(
|
||||||
&content,
|
real_sender_id,
|
||||||
true,
|
&content,
|
||||||
&uname,
|
true,
|
||||||
&id2u,
|
&uname,
|
||||||
&names_map,
|
&id2u,
|
||||||
&group_nicknames2,
|
&names_map,
|
||||||
|
&group_nicknames2,
|
||||||
|
),
|
||||||
|
sender_username(
|
||||||
|
real_sender_id,
|
||||||
|
&content,
|
||||||
|
true,
|
||||||
|
&uname,
|
||||||
|
&id2u,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
String::new()
|
(String::new(), String::new())
|
||||||
};
|
};
|
||||||
Ok((local_id, lo32, ts, real_sender_id, sender, ts, db_idx2))
|
Ok((local_id, lo32, ts, real_sender_id, sender, sender_uname, ts, db_idx2))
|
||||||
})?
|
})?
|
||||||
.filter_map(|r| r.ok())
|
.filter_map(|r| r.ok())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
@ -4148,13 +4417,13 @@ pub async fn q_attachments(
|
||||||
all_rows.extend(rows);
|
all_rows.extend(rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 全局按 ts DESC 排序后分页
|
// 全局按 ts DESC 排序后分页(ts_for_sort 在 tuple index 6)
|
||||||
all_rows.sort_by_key(|r| std::cmp::Reverse(r.5));
|
all_rows.sort_by_key(|r| std::cmp::Reverse(r.6));
|
||||||
let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect();
|
let paged: Vec<_> = all_rows.into_iter().skip(offset).take(limit).collect();
|
||||||
|
|
||||||
// 翻成 JSON
|
// 翻成 JSON
|
||||||
let mut results: Vec<Value> = Vec::with_capacity(paged.len());
|
let mut results: Vec<Value> = Vec::with_capacity(paged.len());
|
||||||
for (local_id, lo32, ts, _real_sender_id, sender, _ts2, _db_idx) in paged {
|
for (local_id, lo32, ts, _real_sender_id, sender, sender_uname, _ts2, _db_idx) in paged {
|
||||||
let kind = type_to_kind
|
let kind = type_to_kind
|
||||||
.get(&lo32)
|
.get(&lo32)
|
||||||
.copied()
|
.copied()
|
||||||
|
|
@ -4180,6 +4449,7 @@ pub async fn q_attachments(
|
||||||
if is_group && !sender.is_empty() {
|
if is_group && !sender.is_empty() {
|
||||||
row["sender"] = Value::String(sender);
|
row["sender"] = Value::String(sender);
|
||||||
}
|
}
|
||||||
|
add_sender_identity(&mut row, is_group, &sender_uname, &names.map, &group_nicknames);
|
||||||
results.push(row);
|
results.push(row);
|
||||||
}
|
}
|
||||||
let unknown_shards = current_unknown_shards(db, names);
|
let unknown_shards = current_unknown_shards(db, names);
|
||||||
|
|
@ -4525,6 +4795,30 @@ mod group_nickname_tests {
|
||||||
assert!(!nicknames.contains_key("candidate_name"));
|
assert!(!nicknames.contains_key("candidate_name"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ignores_non_card_string_fields_as_group_nicknames() {
|
||||||
|
let mut ext_buffer = Vec::new();
|
||||||
|
|
||||||
|
let mut member_without_card = Vec::new();
|
||||||
|
member_without_card.extend(string_field(1, "wxid_alice"));
|
||||||
|
member_without_card.extend(string_field(4, "owner_or_inviter"));
|
||||||
|
ext_buffer.extend(len_field(1, &member_without_card));
|
||||||
|
|
||||||
|
let mut member_with_card = Vec::new();
|
||||||
|
member_with_card.extend(string_field(1, "wxid_bob"));
|
||||||
|
member_with_card.extend(string_field(2, "Bob In Group"));
|
||||||
|
member_with_card.extend(string_field(4, "owner_or_inviter"));
|
||||||
|
ext_buffer.extend(len_field(1, &member_with_card));
|
||||||
|
|
||||||
|
let nicknames = parse_group_nickname_map(&ext_buffer, None);
|
||||||
|
|
||||||
|
assert!(!nicknames.contains_key("wxid_alice"));
|
||||||
|
assert_eq!(
|
||||||
|
nicknames.get("wxid_bob").map(String::as_str),
|
||||||
|
Some("Bob In Group")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn group_top_senders_keeps_duplicate_display_names_separate() {
|
fn group_top_senders_keeps_duplicate_display_names_separate() {
|
||||||
let sender_counts =
|
let sender_counts =
|
||||||
|
|
@ -4542,8 +4836,14 @@ mod group_nickname_tests {
|
||||||
|
|
||||||
assert_eq!(top.len(), 2);
|
assert_eq!(top.len(), 2);
|
||||||
assert_eq!(top[0]["sender"].as_str(), Some("同名"));
|
assert_eq!(top[0]["sender"].as_str(), Some("同名"));
|
||||||
|
assert_eq!(top[0]["sender_username"].as_str(), Some("wxid_alice"));
|
||||||
|
assert_eq!(top[0]["sender_contact_display"].as_str(), Some("Alice Contact"));
|
||||||
|
assert_eq!(top[0]["sender_group_nickname"].as_str(), Some("同名"));
|
||||||
assert_eq!(top[0]["count"].as_i64(), Some(7));
|
assert_eq!(top[0]["count"].as_i64(), Some(7));
|
||||||
assert_eq!(top[1]["sender"].as_str(), Some("同名"));
|
assert_eq!(top[1]["sender"].as_str(), Some("同名"));
|
||||||
|
assert_eq!(top[1]["sender_username"].as_str(), Some("wxid_bob"));
|
||||||
|
assert_eq!(top[1]["sender_contact_display"].as_str(), Some("Bob Contact"));
|
||||||
|
assert_eq!(top[1]["sender_group_nickname"].as_str(), Some("同名"));
|
||||||
assert_eq!(top[1]["count"].as_i64(), Some(3));
|
assert_eq!(top[1]["count"].as_i64(), Some(3));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ pub enum Request {
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
user: Option<String>,
|
user: Option<String>,
|
||||||
},
|
},
|
||||||
/// 查询公众号文章推送(biz_message_0.db)
|
/// 查询公众号文章推送(biz_message_*.db 分片)
|
||||||
BizArticles {
|
BizArticles {
|
||||||
#[serde(default = "default_limit_50")]
|
#[serde(default = "default_limit_50")]
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue