- 新增开机自启管理模块,支持静默后台启动
- 创建 `_autostart.bat` 和 `_autostart.vbs` 脚本实现无窗口启动
- 在 UI 设置页面添加开机自启开关控件
- 通过注册表管理自启项,支持启用/禁用状态切换
♻️ refactor(评论): 优化评论解析逻辑并增强 AI 回复自然度
- 重构 `get_feed_comments` 方法,优先从结构化 JSON 提取评论数据
- 改进 `_parse_comments` 方法,支持多种嵌套格式的评论列表解析
- 新增 `_humanize` 和 `_humanize_content` 方法,去除 AI 生成内容的书面痕迹
- 调整多个提示词模板,强调真人化、口语化的写作风格,避免 AI 特征
- 提高生成回复和评论时的温度参数,增加输出多样性
482 lines
17 KiB
Python
482 lines
17 KiB
Python
"""
|
||
小红书 MCP HTTP 客户端
|
||
封装对 xiaohongshu-mcp 服务 (http://localhost:18060/mcp) 的调用
|
||
"""
|
||
import requests
|
||
import json
|
||
import logging
|
||
import uuid
|
||
import base64
|
||
import re
|
||
import io
|
||
from PIL import Image
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
MCP_DEFAULT_URL = "http://localhost:18060/mcp"
|
||
MCP_TIMEOUT = 60 # 秒
|
||
|
||
# 全局客户端缓存 —— 同一 URL 复用同一实例,避免反复 initialize
|
||
_client_cache: dict[str, "MCPClient"] = {}
|
||
|
||
|
||
def get_mcp_client(base_url: str = MCP_DEFAULT_URL) -> "MCPClient":
|
||
"""获取 MCP 客户端(单例),同一 URL 复用同一实例"""
|
||
if base_url not in _client_cache:
|
||
_client_cache[base_url] = MCPClient(base_url)
|
||
client = _client_cache[base_url]
|
||
return client
|
||
|
||
|
||
class MCPClient:
|
||
"""小红书 MCP 服务的 HTTP 客户端封装"""
|
||
|
||
def __init__(self, base_url: str = MCP_DEFAULT_URL):
|
||
self.base_url = base_url
|
||
self.session = requests.Session()
|
||
self.session.headers.update({"Content-Type": "application/json"})
|
||
self._session_id = None
|
||
self._initialized = False
|
||
|
||
# ---------- 底层通信 ----------
|
||
|
||
def _call(self, method: str, params: dict = None, *,
|
||
is_notification: bool = False) -> dict:
|
||
"""发送 JSON-RPC 请求到 MCP 服务
|
||
|
||
Args:
|
||
is_notification: 若为 True 则不带 id(JSON-RPC 通知)
|
||
"""
|
||
payload = {
|
||
"jsonrpc": "2.0",
|
||
"method": method,
|
||
"params": params or {},
|
||
}
|
||
# JSON-RPC 通知不带 id
|
||
if not is_notification:
|
||
payload["id"] = str(uuid.uuid4())
|
||
|
||
headers = {}
|
||
if self._session_id:
|
||
headers["mcp-session-id"] = self._session_id
|
||
|
||
try:
|
||
resp = self.session.post(
|
||
self.base_url, json=payload, timeout=MCP_TIMEOUT, headers=headers
|
||
)
|
||
# 保存 session id
|
||
if "mcp-session-id" in resp.headers:
|
||
self._session_id = resp.headers["mcp-session-id"]
|
||
|
||
resp.raise_for_status()
|
||
|
||
# 通知不一定有响应体
|
||
if is_notification:
|
||
return {"status": "notified"}
|
||
|
||
data = resp.json()
|
||
if "error" in data:
|
||
logger.error("MCP error: %s", data["error"])
|
||
return {"error": data["error"]}
|
||
return data.get("result", data)
|
||
except requests.exceptions.ConnectionError:
|
||
logger.error("MCP 服务未启动或无法连接: %s", self.base_url)
|
||
return {"error": "MCP 服务未启动,请先启动 xiaohongshu-mcp"}
|
||
except requests.exceptions.Timeout:
|
||
logger.error("MCP 请求超时")
|
||
return {"error": "MCP 请求超时,请稍后重试"}
|
||
except Exception as e:
|
||
logger.error("MCP 调用异常: %s", e)
|
||
return {"error": str(e)}
|
||
|
||
def _ensure_initialized(self):
|
||
"""确保 MCP 连接已初始化"""
|
||
if not self._initialized:
|
||
result = self._call("initialize", {
|
||
"protocolVersion": "2024-11-05",
|
||
"capabilities": {},
|
||
"clientInfo": {"name": "xhs-autobot", "version": "2.0.0"}
|
||
})
|
||
if "error" not in result:
|
||
# 发送 initialized 通知(JSON-RPC 通知不带 id)
|
||
self._call("notifications/initialized", {},
|
||
is_notification=True)
|
||
self._initialized = True
|
||
return result
|
||
return {"status": "already_initialized"}
|
||
|
||
def _reset(self):
|
||
"""重置初始化状态(下次调用会重新握手)"""
|
||
self._initialized = False
|
||
self._session_id = None
|
||
|
||
def _call_tool(self, tool_name: str, arguments: dict = None) -> dict:
|
||
"""调用 MCP 工具,400 错误时自动重试一次"""
|
||
self._ensure_initialized()
|
||
result = self._call("tools/call", {
|
||
"name": tool_name,
|
||
"arguments": arguments or {}
|
||
})
|
||
|
||
# 如果返回 400 相关错误,重置并重试一次
|
||
if isinstance(result, dict) and "error" in result:
|
||
err_msg = str(result["error"])
|
||
if "400" in err_msg or "Bad Request" in err_msg:
|
||
logger.warning("MCP 400 错误,重置会话后重试: %s", tool_name)
|
||
self._reset()
|
||
self._ensure_initialized()
|
||
result = self._call("tools/call", {
|
||
"name": tool_name,
|
||
"arguments": arguments or {}
|
||
})
|
||
|
||
# 提取文本和图片内容
|
||
if isinstance(result, dict) and "content" in result:
|
||
texts = []
|
||
images = []
|
||
for item in result["content"]:
|
||
if item.get("type") == "text":
|
||
texts.append(item["text"])
|
||
elif item.get("type") == "image":
|
||
# MCP 返回的 base64 图片
|
||
img_data = item.get("data", "")
|
||
if img_data:
|
||
images.append(img_data)
|
||
out = {"success": True, "text": "\n".join(texts), "raw": result}
|
||
if images:
|
||
out["images"] = images
|
||
return out
|
||
return result
|
||
|
||
# ---------- 登录 ----------
|
||
|
||
def get_login_qrcode(self) -> dict:
|
||
"""获取登录二维码,返回 {success, text, qr_image(PIL.Image)}"""
|
||
result = self._call_tool("get_login_qrcode")
|
||
if "error" in result:
|
||
return result
|
||
# 尝试解析 base64 图片
|
||
qr_image = None
|
||
if "images" in result and result["images"]:
|
||
try:
|
||
img_bytes = base64.b64decode(result["images"][0])
|
||
qr_image = Image.open(io.BytesIO(img_bytes))
|
||
except Exception as e:
|
||
logger.warning("二维码图片解析失败: %s", e)
|
||
result["qr_image"] = qr_image
|
||
return result
|
||
|
||
def check_login_status(self) -> dict:
|
||
"""检查小红书登录状态"""
|
||
return self._call_tool("check_login_status")
|
||
|
||
# ---------- 连接状态 ----------
|
||
|
||
def check_connection(self) -> tuple[bool, str]:
|
||
"""检查 MCP 服务是否可连接"""
|
||
result = self._call_tool("check_login_status")
|
||
if "error" in result:
|
||
return False, result["error"]
|
||
return True, result.get("text", "已连接")
|
||
|
||
# ---------- 搜索 ----------
|
||
|
||
def search_feeds(self, keyword: str, sort_by: str = "综合",
|
||
note_type: str = "不限", publish_time: str = "不限") -> dict:
|
||
"""搜索小红书内容"""
|
||
args = {
|
||
"keyword": keyword,
|
||
"filters": {
|
||
"sort_by": sort_by,
|
||
"note_type": note_type,
|
||
"publish_time": publish_time,
|
||
}
|
||
}
|
||
return self._call_tool("search_feeds", args)
|
||
|
||
# ---------- 推荐列表 ----------
|
||
|
||
def list_feeds(self) -> dict:
|
||
"""获取首页推荐列表"""
|
||
return self._call_tool("list_feeds")
|
||
|
||
# ---------- 笔记列表解析 ----------
|
||
|
||
@staticmethod
|
||
def _parse_feed_entries(text: str) -> list[dict]:
|
||
"""从 MCP 返回文本中解析笔记条目为结构化列表"""
|
||
entries = []
|
||
|
||
# 方式1: 尝试直接 JSON 解析
|
||
try:
|
||
data = json.loads(text)
|
||
feeds = []
|
||
if isinstance(data, dict) and "feeds" in data:
|
||
feeds = data["feeds"]
|
||
elif isinstance(data, list):
|
||
feeds = data
|
||
|
||
for feed in feeds:
|
||
note = feed.get("noteCard", {})
|
||
user = note.get("user", {})
|
||
interact = note.get("interactInfo", {})
|
||
entries.append({
|
||
"feed_id": feed.get("id", ""),
|
||
"xsec_token": feed.get("xsecToken", ""),
|
||
"title": note.get("displayTitle", "未知标题"),
|
||
"author": user.get("nickname", user.get("nickName", "")),
|
||
"user_id": user.get("userId", ""),
|
||
"likes": interact.get("likedCount", "0"),
|
||
"type": note.get("type", ""),
|
||
})
|
||
if entries:
|
||
return entries
|
||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||
pass
|
||
|
||
# 方式2: 正则提取 —— 适配 MCP 的文本格式
|
||
# 匹配 feed_id (24位十六进制)
|
||
feed_ids = re.findall(r'(?:feed_id|id)["\s::]+([0-9a-f]{24})', text, re.I)
|
||
# 匹配 xsecToken
|
||
tokens = re.findall(r'(?:xsec_?[Tt]oken)["\s::]+([A-Za-z0-9+/=_-]{20,})', text, re.I)
|
||
# 匹配标题
|
||
titles = re.findall(r'(?:title|标题)["\s::]+(.+?)(?:\n|$)', text, re.I)
|
||
# 匹配 userId
|
||
user_ids = re.findall(r'(?:user_?[Ii]d|userId)["\s::]+([0-9a-f]{24})', text, re.I)
|
||
|
||
count = max(len(feed_ids), len(tokens))
|
||
for i in range(count):
|
||
entries.append({
|
||
"feed_id": feed_ids[i] if i < len(feed_ids) else "",
|
||
"xsec_token": tokens[i] if i < len(tokens) else "",
|
||
"title": titles[i].strip() if i < len(titles) else f"笔记 {i+1}",
|
||
"author": "",
|
||
"user_id": user_ids[i] if i < len(user_ids) else "",
|
||
"likes": "",
|
||
"type": "",
|
||
})
|
||
|
||
return entries
|
||
|
||
def list_feeds_parsed(self) -> list[dict]:
|
||
"""获取首页推荐并解析为结构化列表"""
|
||
result = self.list_feeds()
|
||
if "error" in result:
|
||
return []
|
||
return self._parse_feed_entries(result.get("text", ""))
|
||
|
||
def search_feeds_parsed(self, keyword: str, sort_by: str = "综合") -> list[dict]:
|
||
"""搜索笔记并解析为结构化列表"""
|
||
result = self.search_feeds(keyword, sort_by=sort_by)
|
||
if "error" in result:
|
||
return []
|
||
return self._parse_feed_entries(result.get("text", ""))
|
||
|
||
@staticmethod
|
||
def _extract_comment_obj(c: dict) -> dict:
|
||
"""从单个评论 JSON 对象提取结构化数据"""
|
||
user_info = c.get("userInfo") or c.get("user") or {}
|
||
return {
|
||
"comment_id": str(c.get("id", c.get("commentId", ""))),
|
||
"user_id": user_info.get("userId", user_info.get("user_id", "")),
|
||
"nickname": user_info.get("nickname", user_info.get("nickName", "未知")),
|
||
"content": c.get("content", ""),
|
||
"sub_comment_count": c.get("subCommentCount", 0),
|
||
}
|
||
|
||
@staticmethod
|
||
def _find_comment_list(data: dict) -> list:
|
||
"""在多种嵌套结构中定位评论列表"""
|
||
if not isinstance(data, dict):
|
||
return []
|
||
# 格式1: {"data": {"comments": {"list": [...]}}} —— 实际 MCP 返回
|
||
d = data.get("data", {})
|
||
if isinstance(d, dict):
|
||
cm = d.get("comments", {})
|
||
if isinstance(cm, dict) and "list" in cm:
|
||
return cm["list"]
|
||
if isinstance(cm, list):
|
||
return cm
|
||
# 格式2: {"comments": {"list": [...]}}
|
||
cm = data.get("comments", {})
|
||
if isinstance(cm, dict) and "list" in cm:
|
||
return cm["list"]
|
||
if isinstance(cm, list):
|
||
return cm
|
||
# 格式3: {"data": [{...}, ...]} (直接列表)
|
||
if isinstance(d, list):
|
||
return d
|
||
return []
|
||
|
||
@classmethod
|
||
def _parse_comments(cls, text: str) -> list[dict]:
|
||
"""从笔记详情文本中解析评论列表为结构化数据
|
||
|
||
返回: [{comment_id, user_id, nickname, content, sub_comment_count}, ...]
|
||
"""
|
||
comments = []
|
||
|
||
# 方式1: 尝试 JSON 解析(支持多种嵌套格式)
|
||
try:
|
||
data = json.loads(text)
|
||
raw_comments = []
|
||
if isinstance(data, list):
|
||
raw_comments = data
|
||
elif isinstance(data, dict):
|
||
raw_comments = cls._find_comment_list(data)
|
||
|
||
for c in raw_comments:
|
||
if isinstance(c, dict) and c.get("content"):
|
||
comments.append(cls._extract_comment_obj(c))
|
||
if comments:
|
||
return comments
|
||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||
pass
|
||
|
||
# 方式2: 正则提取 —— 仅当 JSON 完全失败时使用
|
||
# 逐个评论块提取,避免跨评论字段错位
|
||
# 匹配 JSON 对象中相邻的 id + content + userInfo 组合
|
||
comment_blocks = re.finditer(
|
||
r'"id"\s*:\s*"([0-9a-fA-F]{20,26})"[^}]*?'
|
||
r'"content"\s*:\s*"([^"]{1,500})"[^}]*?'
|
||
r'"userInfo"\s*:\s*\{[^}]*?"userId"\s*:\s*"([0-9a-fA-F]{20,26})"'
|
||
r'[^}]*?"nickname"\s*:\s*"([^"]{1,30})"',
|
||
text, re.DOTALL
|
||
)
|
||
for m in comment_blocks:
|
||
comments.append({
|
||
"comment_id": m.group(1),
|
||
"user_id": m.group(3),
|
||
"nickname": m.group(4),
|
||
"content": m.group(2),
|
||
"sub_comment_count": 0,
|
||
})
|
||
|
||
return comments
|
||
|
||
# ---------- 帖子详情 ----------
|
||
|
||
def get_feed_detail(self, feed_id: str, xsec_token: str,
|
||
load_all_comments: bool = False) -> dict:
|
||
"""获取笔记详情"""
|
||
args = {
|
||
"feed_id": feed_id,
|
||
"xsec_token": xsec_token,
|
||
"load_all_comments": load_all_comments,
|
||
}
|
||
return self._call_tool("get_feed_detail", args)
|
||
|
||
def get_feed_comments(self, feed_id: str, xsec_token: str,
|
||
load_all: bool = True) -> list[dict]:
|
||
"""获取笔记评论列表(结构化)
|
||
|
||
直接返回解析好的评论列表,优先从 raw JSON 解析
|
||
"""
|
||
result = self.get_feed_detail(feed_id, xsec_token, load_all_comments=load_all)
|
||
if "error" in result:
|
||
return []
|
||
|
||
# 优先从 raw 结构中直接提取
|
||
raw = result.get("raw", {})
|
||
if raw and isinstance(raw, dict):
|
||
for item in raw.get("content", []):
|
||
if item.get("type") == "text":
|
||
try:
|
||
data = json.loads(item["text"])
|
||
comment_list = self._find_comment_list(data)
|
||
if comment_list:
|
||
return [self._extract_comment_obj(c)
|
||
for c in comment_list
|
||
if isinstance(c, dict) and c.get("content")]
|
||
except (json.JSONDecodeError, KeyError, TypeError):
|
||
pass
|
||
|
||
# 回退到 text 解析
|
||
text = result.get("text", "")
|
||
return self._parse_comments(text) if text else []
|
||
|
||
# ---------- 发布 ----------
|
||
|
||
def publish_content(self, title: str, content: str, images: list[str],
|
||
tags: list[str] = None, schedule_at: str = None) -> dict:
|
||
"""发布图文内容"""
|
||
args = {
|
||
"title": title,
|
||
"content": content,
|
||
"images": images,
|
||
}
|
||
if tags:
|
||
args["tags"] = tags
|
||
if schedule_at:
|
||
args["schedule_at"] = schedule_at
|
||
return self._call_tool("publish_content", args)
|
||
|
||
def publish_video(self, title: str, content: str, video_path: str,
|
||
tags: list[str] = None, schedule_at: str = None) -> dict:
|
||
"""发布视频内容"""
|
||
args = {
|
||
"title": title,
|
||
"content": content,
|
||
"video": video_path,
|
||
}
|
||
if tags:
|
||
args["tags"] = tags
|
||
if schedule_at:
|
||
args["schedule_at"] = schedule_at
|
||
return self._call_tool("publish_with_video", args)
|
||
|
||
# ---------- 评论 ----------
|
||
|
||
def post_comment(self, feed_id: str, xsec_token: str, comment: str) -> dict:
|
||
"""发表评论"""
|
||
return self._call_tool("post_comment_to_feed", {
|
||
"feed_id": feed_id,
|
||
"xsec_token": xsec_token,
|
||
"content": comment,
|
||
})
|
||
|
||
def reply_comment(self, feed_id: str, xsec_token: str,
|
||
comment_id: str, user_id: str, content: str) -> dict:
|
||
"""回复评论"""
|
||
return self._call_tool("reply_comment_in_feed", {
|
||
"feed_id": feed_id,
|
||
"xsec_token": xsec_token,
|
||
"comment_id": comment_id,
|
||
"user_id": user_id,
|
||
"content": content,
|
||
})
|
||
|
||
# ---------- 互动 ----------
|
||
|
||
def like_feed(self, feed_id: str, xsec_token: str, unlike: bool = False) -> dict:
|
||
"""点赞/取消点赞"""
|
||
return self._call_tool("like_feed", {
|
||
"feed_id": feed_id,
|
||
"xsec_token": xsec_token,
|
||
"unlike": unlike,
|
||
})
|
||
|
||
def favorite_feed(self, feed_id: str, xsec_token: str,
|
||
unfavorite: bool = False) -> dict:
|
||
"""收藏/取消收藏"""
|
||
return self._call_tool("favorite_feed", {
|
||
"feed_id": feed_id,
|
||
"xsec_token": xsec_token,
|
||
"unfavorite": unfavorite,
|
||
})
|
||
|
||
# ---------- 用户 ----------
|
||
|
||
def get_user_profile(self, user_id: str, xsec_token: str) -> dict:
|
||
"""获取用户主页信息"""
|
||
return self._call_tool("user_profile", {
|
||
"user_id": user_id,
|
||
"xsec_token": xsec_token,
|
||
})
|
||
|
||
# ---------- 登录管理 ----------
|
||
|
||
def delete_cookies(self) -> dict:
|
||
"""删除 cookies,重置登录状态"""
|
||
return self._call_tool("delete_cookies", {})
|