mirror of https://github.com/jackwener/wx-cli.git
Investigate get_chat_history limit
parent
2cd180c63a
commit
7c42ff5d38
|
|
@ -139,7 +139,7 @@ claude mcp add wechat -- python C:\Users\你的用户名\wechat-decrypt\mcp_serv
|
||||||
|
|
||||||
前置条件:需要先运行 `python main.py` 或 `python find_all_keys.py` 完成密钥提取。
|
前置条件:需要先运行 `python main.py` 或 `python find_all_keys.py` 完成密钥提取。
|
||||||
|
|
||||||
说明:`get_chat_history` 和 `search_messages` 的 `limit` 最大为 `500`。
|
说明:`search_messages` 的 `limit` 最大为 `500`;`get_chat_history` 支持更大的 `limit`,但消息很多时仍建议配合 `offset` 分页读取。
|
||||||
|
|
||||||
**[查看使用案例 →](USAGE.md)**
|
**[查看使用案例 →](USAGE.md)**
|
||||||
|
|
||||||
|
|
|
||||||
2
USAGE.md
2
USAGE.md
|
|
@ -191,7 +191,7 @@ Claude 可以获取大量消息后自动分析活跃度、话题分布、关键
|
||||||
> 帮我分析一下██群最近一周的情况
|
> 帮我分析一下██群最近一周的情况
|
||||||
```
|
```
|
||||||
|
|
||||||
Claude 会调用 `get_chat_history(chat_name="██群", limit=500)` 获取消息,然后输出:
|
Claude 会调用 `get_chat_history(chat_name="██群", limit=500)` 获取消息,然后输出。消息很多时,也可以把 `limit` 设得更大,或配合 `offset` 分页读取:
|
||||||
|
|
||||||
```
|
```
|
||||||
## ██群最近一周分析
|
## ██群最近一周分析
|
||||||
|
|
|
||||||
|
|
@ -224,6 +224,7 @@ _self_username = None
|
||||||
_XML_UNSAFE_RE = re.compile(r'<!DOCTYPE|<!ENTITY', re.IGNORECASE)
|
_XML_UNSAFE_RE = re.compile(r'<!DOCTYPE|<!ENTITY', re.IGNORECASE)
|
||||||
_XML_PARSE_MAX_LEN = 20000
|
_XML_PARSE_MAX_LEN = 20000
|
||||||
_QUERY_LIMIT_MAX = 500
|
_QUERY_LIMIT_MAX = 500
|
||||||
|
_HISTORY_QUERY_BATCH_SIZE = 500
|
||||||
|
|
||||||
|
|
||||||
def _load_contacts_from(db_path):
|
def _load_contacts_from(db_path):
|
||||||
|
|
@ -635,11 +636,11 @@ def _find_msg_tables_for_user(username):
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
|
|
||||||
def _validate_pagination(limit, offset=0):
|
def _validate_pagination(limit, offset=0, limit_max=_QUERY_LIMIT_MAX):
|
||||||
if limit <= 0:
|
if limit <= 0:
|
||||||
raise ValueError("limit 必须大于 0")
|
raise ValueError("limit 必须大于 0")
|
||||||
if limit > _QUERY_LIMIT_MAX:
|
if limit_max is not None and limit > limit_max:
|
||||||
raise ValueError(f"limit 不能大于 {_QUERY_LIMIT_MAX}")
|
raise ValueError(f"limit 不能大于 {limit_max}")
|
||||||
if offset < 0:
|
if offset < 0:
|
||||||
raise ValueError("offset 不能小于 0")
|
raise ValueError("offset 不能小于 0")
|
||||||
|
|
||||||
|
|
@ -841,8 +842,6 @@ def _build_history_line(row, ctx, names, id_to_username):
|
||||||
sender, text = _format_message_text(
|
sender, text = _format_message_text(
|
||||||
local_id, local_type, content, ctx['is_group'], ctx['username'], ctx['display_name'], names
|
local_id, local_type, content, ctx['is_group'], ctx['username'], ctx['display_name'], names
|
||||||
)
|
)
|
||||||
if text and len(text) > 500:
|
|
||||||
text = text[:500] + '...'
|
|
||||||
|
|
||||||
sender_label = _resolve_sender_label(
|
sender_label = _resolve_sender_label(
|
||||||
real_sender_id, sender, ctx['is_group'], ctx['username'], ctx['display_name'], names, id_to_username
|
real_sender_id, sender, ctx['is_group'], ctx['username'], ctx['display_name'], names, id_to_username
|
||||||
|
|
@ -880,6 +879,10 @@ def _message_query_batch_size(candidate_limit):
|
||||||
return candidate_limit
|
return candidate_limit
|
||||||
|
|
||||||
|
|
||||||
|
def _history_query_batch_size(candidate_limit):
|
||||||
|
return min(candidate_limit, _HISTORY_QUERY_BATCH_SIZE)
|
||||||
|
|
||||||
|
|
||||||
def _page_ranked_entries(entries, limit, offset):
|
def _page_ranked_entries(entries, limit, offset):
|
||||||
ordered = sorted(entries, key=lambda item: item[0], reverse=True)
|
ordered = sorted(entries, key=lambda item: item[0], reverse=True)
|
||||||
paged = ordered[offset:offset + limit]
|
paged = ordered[offset:offset + limit]
|
||||||
|
|
@ -891,22 +894,40 @@ def _collect_chat_history_lines(ctx, names, start_ts=None, end_ts=None, limit=20
|
||||||
collected = []
|
collected = []
|
||||||
failures = []
|
failures = []
|
||||||
candidate_limit = _candidate_page_size(limit, offset)
|
candidate_limit = _candidate_page_size(limit, offset)
|
||||||
|
batch_size = _history_query_batch_size(candidate_limit)
|
||||||
|
|
||||||
for table_ctx in _iter_table_contexts(ctx):
|
for table_ctx in _iter_table_contexts(ctx):
|
||||||
try:
|
try:
|
||||||
with closing(sqlite3.connect(table_ctx['db_path'])) as conn:
|
with closing(sqlite3.connect(table_ctx['db_path'])) as conn:
|
||||||
id_to_username = _load_name2id_maps(conn)
|
id_to_username = _load_name2id_maps(conn)
|
||||||
|
fetch_offset = 0
|
||||||
|
collected_before_table = len(collected)
|
||||||
# 当前页上的消息一定落在各分表最近的 offset+limit 条记录内。
|
# 当前页上的消息一定落在各分表最近的 offset+limit 条记录内。
|
||||||
rows = _query_messages(
|
while len(collected) - collected_before_table < candidate_limit:
|
||||||
conn,
|
rows = _query_messages(
|
||||||
table_ctx['table_name'],
|
conn,
|
||||||
start_ts=start_ts,
|
table_ctx['table_name'],
|
||||||
end_ts=end_ts,
|
start_ts=start_ts,
|
||||||
limit=candidate_limit,
|
end_ts=end_ts,
|
||||||
offset=0,
|
limit=batch_size,
|
||||||
)
|
offset=fetch_offset,
|
||||||
for row in rows:
|
)
|
||||||
collected.append(_build_history_line(row, table_ctx, names, id_to_username))
|
if not rows:
|
||||||
|
break
|
||||||
|
fetch_offset += len(rows)
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
try:
|
||||||
|
collected.append(_build_history_line(row, table_ctx, names, id_to_username))
|
||||||
|
except Exception as e:
|
||||||
|
failures.append(
|
||||||
|
f"{table_ctx['display_name']} local_id={row[0]} create_time={row[2]}: {e}"
|
||||||
|
)
|
||||||
|
if len(collected) - collected_before_table >= candidate_limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
if len(rows) < batch_size:
|
||||||
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
failures.append(f"{table_ctx['db_path']}: {e}")
|
failures.append(f"{table_ctx['db_path']}: {e}")
|
||||||
|
|
||||||
|
|
@ -1221,16 +1242,16 @@ def get_chat_history(chat_name: str, limit: int = 50, offset: int = 0, start_tim
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
chat_name: 聊天对象的名字、备注名或wxid,自动模糊匹配
|
chat_name: 聊天对象的名字、备注名或wxid,自动模糊匹配
|
||||||
limit: 返回的消息数量,默认50,最大500
|
limit: 返回的消息数量,默认50;支持较大的值,建议配合 offset 分页使用
|
||||||
offset: 分页偏移量,默认0
|
offset: 分页偏移量,默认0
|
||||||
start_time: 起始时间,支持 YYYY-MM-DD / YYYY-MM-DD HH:MM / YYYY-MM-DD HH:MM:SS
|
start_time: 起始时间,支持 YYYY-MM-DD / YYYY-MM-DD HH:MM / YYYY-MM-DD HH:MM:SS
|
||||||
end_time: 结束时间,支持 YYYY-MM-DD / YYYY-MM-DD HH:MM / YYYY-MM-DD HH:MM:SS
|
end_time: 结束时间,支持 YYYY-MM-DD / YYYY-MM-DD HH:MM / YYYY-MM-DD HH:MM:SS
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
_validate_pagination(limit, offset)
|
_validate_pagination(limit, offset, limit_max=None)
|
||||||
start_ts, end_ts = _parse_time_range(start_time, end_time)
|
start_ts, end_ts = _parse_time_range(start_time, end_time)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return f"错误: {e}"
|
return f"错误: {e}"
|
||||||
|
|
||||||
ctx = _resolve_chat_context(chat_name)
|
ctx = _resolve_chat_context(chat_name)
|
||||||
if not ctx:
|
if not ctx:
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,10 @@ class SearchMessagesTests(unittest.TestCase):
|
||||||
with self.assertRaisesRegex(ValueError, "limit 不能大于 500"):
|
with self.assertRaisesRegex(ValueError, "limit 不能大于 500"):
|
||||||
mcp_server._validate_pagination(501, 0)
|
mcp_server._validate_pagination(501, 0)
|
||||||
|
|
||||||
|
def test_validate_pagination_allows_large_limit_when_limit_is_unbounded(self):
|
||||||
|
# get_chat_history 允许更大的 limit,只校验正数和 offset。
|
||||||
|
mcp_server._validate_pagination(999999, 0, limit_max=None)
|
||||||
|
|
||||||
def test_page_search_entries_returns_chronological_results_with_offset(self):
|
def test_page_search_entries_returns_chronological_results_with_offset(self):
|
||||||
# 结果应先按最新时间分页,再把当前页恢复成时间正序输出。
|
# 结果应先按最新时间分页,再把当前页恢复成时间正序输出。
|
||||||
entries = [(1, "a"), (5, "e"), (3, "c"), (4, "d"), (2, "b")]
|
entries = [(1, "a"), (5, "e"), (3, "c"), (4, "d"), (2, "b")]
|
||||||
|
|
@ -377,6 +381,43 @@ class SearchMessagesTests(unittest.TestCase):
|
||||||
self.assertIn("new message", result)
|
self.assertIn("new message", result)
|
||||||
self.assertNotIn("old message", result)
|
self.assertNotIn("old message", result)
|
||||||
|
|
||||||
|
def test_get_chat_history_large_limit_reads_all_rows_across_shards(self):
|
||||||
|
# 大 limit 下,跨分片历史查询不能只返回较旧分片里的少量消息。
|
||||||
|
older_messages = [
|
||||||
|
(index, 1000 + index, f"old shard message {index}")
|
||||||
|
for index in range(1, 18)
|
||||||
|
]
|
||||||
|
newer_messages = [
|
||||||
|
(index, 2000 + index, f"new shard message {index}")
|
||||||
|
for index in range(1, 296)
|
||||||
|
]
|
||||||
|
older_db = self.create_db("history_cross_shard_older.db", {"alice": older_messages})
|
||||||
|
newer_db = self.create_db("history_cross_shard_newer.db", {"alice": newer_messages})
|
||||||
|
ctx = {
|
||||||
|
"query": "Alice",
|
||||||
|
"username": "alice",
|
||||||
|
"display_name": "Alice",
|
||||||
|
"db_path": newer_db,
|
||||||
|
"table_name": _msg_table_name("alice"),
|
||||||
|
"message_tables": [
|
||||||
|
{"db_path": newer_db, "table_name": _msg_table_name("alice")},
|
||||||
|
{"db_path": older_db, "table_name": _msg_table_name("alice")},
|
||||||
|
],
|
||||||
|
"is_group": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch.object(mcp_server, "get_contact_names", return_value={"alice": "Alice"}), patch.object(
|
||||||
|
mcp_server, "_resolve_chat_context", return_value=ctx
|
||||||
|
):
|
||||||
|
result = mcp_server.get_chat_history("Alice", limit=500, offset=0)
|
||||||
|
|
||||||
|
self.assertIn("Alice 的消息记录(返回 312 条,offset=0, limit=500)", result)
|
||||||
|
self.assertIn("new shard message 295", result)
|
||||||
|
self.assertIn("old shard message 17", result)
|
||||||
|
|
||||||
|
body = result.split(":\n\n", 1)[1]
|
||||||
|
self.assertEqual(len(body.splitlines()), 312)
|
||||||
|
|
||||||
def test_get_chat_history_uses_bounded_sql_pagination(self):
|
def test_get_chat_history_uses_bounded_sql_pagination(self):
|
||||||
# 历史查询应把 offset+limit 下推到 SQL,避免把整张消息表读出来后再切片。
|
# 历史查询应把 offset+limit 下推到 SQL,避免把整张消息表读出来后再切片。
|
||||||
db_path = self.create_db(
|
db_path = self.create_db(
|
||||||
|
|
@ -419,6 +460,36 @@ class SearchMessagesTests(unittest.TestCase):
|
||||||
self.assertNotIn("oldest", result)
|
self.assertNotIn("oldest", result)
|
||||||
self.assertEqual(calls, [(_msg_table_name("alice"), 3, 0)])
|
self.assertEqual(calls, [(_msg_table_name("alice"), 3, 0)])
|
||||||
|
|
||||||
|
def test_get_chat_history_allows_large_limit_values(self):
|
||||||
|
# 历史查询不应再把大 limit 直接拒绝掉。
|
||||||
|
db_path = self.create_db(
|
||||||
|
"history_large_limit.db",
|
||||||
|
{
|
||||||
|
"alice": [
|
||||||
|
(1, 200, "message 1"),
|
||||||
|
(2, 100, "message 2"),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
ctx = {
|
||||||
|
"query": "Alice",
|
||||||
|
"username": "alice",
|
||||||
|
"display_name": "Alice",
|
||||||
|
"db_path": db_path,
|
||||||
|
"table_name": _msg_table_name("alice"),
|
||||||
|
"message_tables": [{"db_path": db_path, "table_name": _msg_table_name("alice")}],
|
||||||
|
"is_group": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch.object(mcp_server, "get_contact_names", return_value={"alice": "Alice"}), patch.object(
|
||||||
|
mcp_server, "_resolve_chat_context", return_value=ctx
|
||||||
|
):
|
||||||
|
result = mcp_server.get_chat_history("Alice", limit=999999, offset=0)
|
||||||
|
|
||||||
|
self.assertNotIn("错误:", result)
|
||||||
|
self.assertIn("message 1", result)
|
||||||
|
self.assertIn("message 2", result)
|
||||||
|
|
||||||
def test_get_chat_history_keeps_partial_results_when_formatting_fails(self):
|
def test_get_chat_history_keeps_partial_results_when_formatting_fails(self):
|
||||||
# 单条坏消息不应让整个历史查询失败,已有结果仍应返回并附带失败说明。
|
# 单条坏消息不应让整个历史查询失败,已有结果仍应返回并附带失败说明。
|
||||||
db_path = self.create_db(
|
db_path = self.create_db(
|
||||||
|
|
@ -452,6 +523,31 @@ class SearchMessagesTests(unittest.TestCase):
|
||||||
self.assertIn("查询失败:", result)
|
self.assertIn("查询失败:", result)
|
||||||
self.assertIn("bad row", result)
|
self.assertIn("bad row", result)
|
||||||
|
|
||||||
|
def test_get_chat_history_does_not_truncate_long_messages(self):
|
||||||
|
# 历史记录应返回完整消息内容,而不是固定截断到 500 字符。
|
||||||
|
long_message = "x" * 600
|
||||||
|
db_path = self.create_db(
|
||||||
|
"history_long_message.db",
|
||||||
|
{"alice": [(1, 200, long_message)]},
|
||||||
|
)
|
||||||
|
ctx = {
|
||||||
|
"query": "Alice",
|
||||||
|
"username": "alice",
|
||||||
|
"display_name": "Alice",
|
||||||
|
"db_path": db_path,
|
||||||
|
"table_name": _msg_table_name("alice"),
|
||||||
|
"message_tables": [{"db_path": db_path, "table_name": _msg_table_name("alice")}],
|
||||||
|
"is_group": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch.object(mcp_server, "get_contact_names", return_value={"alice": "Alice"}), patch.object(
|
||||||
|
mcp_server, "_resolve_chat_context", return_value=ctx
|
||||||
|
):
|
||||||
|
result = mcp_server.get_chat_history("Alice", limit=1, offset=0)
|
||||||
|
|
||||||
|
self.assertIn(long_message, result)
|
||||||
|
self.assertNotIn(("x" * 500) + "...", result)
|
||||||
|
|
||||||
def test_search_messages_single_chat_merges_sharded_message_tables(self):
|
def test_search_messages_single_chat_merges_sharded_message_tables(self):
|
||||||
# 单聊搜索也要跨分片合并,否则最近消息可能查不到。
|
# 单聊搜索也要跨分片合并,否则最近消息可能查不到。
|
||||||
older_db = self.create_db("search_older.db", {"alice": [(1, 100, "foo old")]})
|
older_db = self.create_db("search_older.db", {"alice": [(1, 100, "foo old")]})
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue