xhs_factory/services/hotspot.py
zhoujie 4d83c0f4a9
Some checks failed
CI / Lint (ruff) (push) Has been cancelled
CI / Import Check (push) Has been cancelled
feat(scheduler): 新增热点自动采集功能并优化发布路径
- 新增热点自动采集后台线程,支持定时搜索关键词并执行 AI 分析,结果缓存至结构化状态
- 新增热点分析状态管理接口,提供线程安全的 `get_last_analysis` 和 `set_last_analysis` 方法
- 新增热点数据桥接函数 `feed_hotspot_to_engine`,将分析结果注入 TopicEngine 实现热点加权推荐
- 新增热点选题下拉组件,分析完成后自动填充推荐选题,选中后自动写入选题输入框
- 优化 `generate_from_hotspot` 函数,自动获取结构化分析摘要并增强生成上下文
- 新增热点自动采集配置节点,支持通过 `config.json` 管理关键词和采集间隔

♻️ refactor(queue): 实现智能排期引擎并统一发布路径

- 新增智能排期引擎,基于 `AnalyticsService` 的 `time_weights` 自动计算最优发布时段
- 新增 `PublishQueue.suggest_schedule_time` 和 `auto_schedule_item` 方法,支持时段冲突检测和内容分布控制
- 修改 `generate_to_queue` 函数,新增 `auto_schedule` 和 `auto_approve` 参数,支持自动排期和自动审核
- 重构 `_scheduler_loop` 的自动发布分支,改为调用 `generate_to_queue` 通过队列发布,统一发布路径
- 重构 `auto_publish_once` 函数,移除直接发布逻辑,改为生成内容入队并返回队列信息
- 新增队列时段使用情况查询方法 `get_slot_usage`,支持 UI 热力图展示

📝 docs(openspec): 新增内容排期优化和热点探测优化规范文档

- 新增 `smart-schedule-engine` 规范,定义智能排期引擎的功能需求和场景
- 新增 `unified-publish-path` 规范,定义统一发布路径的改造方案
- 新增 `hotspot-analysis-state` 规范,定义热点分析状态存储的线程安全接口
- 新增 `hotspot-auto-collector` 规范,定义定时热点自动采集的任务流程
- 新增 `hotspot-engine-bridge` 规范,定义热点数据注入 TopicEngine 的桥接机制
- 新增 `hotspot-topic-selector` 规范,定义热点选题下拉组件的交互行为
- 更新 `services-queue`、`services-scheduler` 和 `services-hotspot` 规范,反映功能修改和新增参数

🔧 chore(config): 新增热点自动采集默认配置

- 在 `DEFAULT_CONFIG` 中新增 `hotspot_auto_collect` 配置节点,包含 `enabled`、`keywords` 和 `interval_hours` 字段
- 提供默认关键词列表 `["穿搭", "美妆", "好物"]` 和默认采集间隔 4 小时

🐛 fix(llm): 增强 JSON 解析容错能力

- 新增 `_try_fix_truncated_json` 方法,尝试修复被 token 限制截断的 JSON 输出
- 支持多种截断场景的自动补全,包括字符串值、数组和嵌套对象的截断修复
- 提高 LLM 分析热点等返回 JSON 的函数的稳定性

💄 style(ui): 优化队列管理和热点探测界面

- 在队列生成区域新增自动排期复选框,勾选后隐藏手动排期输入框
- 在日历视图旁新增推荐时段 Markdown 面板,展示各时段权重和建议热力图
- 在热点探测 Tab 新增推荐选题下拉组件,分析完成后动态填充选项
- 在热点探测 Tab 新增热点自动采集控制区域,支持启动、停止和配置采集参数
2026-02-28 22:22:27 +08:00

274 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
services/hotspot.py
热点探测、热点生成、笔记列表缓存(供评论管家主动评论使用)
"""
import copy
import threading
import logging
import gradio as gr
from .llm_service import LLMService
from .mcp_client import get_mcp_client
from .connection import _get_llm_config
from .persona import _resolve_persona
logger = logging.getLogger("autobot")
# ---- 共用: 线程安全缓存 ----
# 缓存互斥锁,防止并发回调产生竞态(所有缓存共用)
_cache_lock = threading.RLock()
# 主动评论缓存
_cached_proactive_entries: list[dict] = []
# 我的笔记评论缓存
_cached_my_note_entries: list[dict] = []
# ==================================================
# Tab 2: 热点探测
# ==================================================
# 最近一次 LLM 热点分析的结构化结果(线程安全,复用 _cache_lock
_last_analysis: dict | None = None
def get_last_analysis() -> dict | None:
"""线程安全地获取最近一次热点分析结果的深拷贝"""
with _cache_lock:
if _last_analysis is None:
return None
return copy.deepcopy(_last_analysis)
def set_last_analysis(data: dict) -> None:
"""线程安全地更新热点分析结果(合并 hot_topics / suggestions 并去重)"""
global _last_analysis
with _cache_lock:
if _last_analysis is None:
_last_analysis = copy.deepcopy(data)
else:
# 合并 hot_topics
existing_topics = _last_analysis.get("hot_topics", [])
new_topics = data.get("hot_topics", [])
seen = set(existing_topics)
for t in new_topics:
if t not in seen:
existing_topics.append(t)
seen.add(t)
_last_analysis["hot_topics"] = existing_topics
# 合并 suggestions按 topic 去重)
existing_sug = _last_analysis.get("suggestions", [])
existing_sug_topics = {s.get("topic", "") for s in existing_sug}
for s in data.get("suggestions", []):
if s.get("topic", "") not in existing_sug_topics:
existing_sug.append(s)
existing_sug_topics.add(s.get("topic", ""))
_last_analysis["suggestions"] = existing_sug
# 其他字段以最新为准
for key in data:
if key not in ("hot_topics", "suggestions"):
_last_analysis[key] = data[key]
def search_hotspots(keyword, sort_by, mcp_url):
"""搜索小红书热门内容"""
if not keyword:
return "❌ 请输入搜索关键词", ""
try:
client = get_mcp_client(mcp_url)
result = client.search_feeds(keyword, sort_by=sort_by)
if "error" in result:
return f"❌ 搜索失败: {result['error']}", ""
text = result.get("text", "无结果")
return "✅ 搜索完成", text
except Exception as e:
logger.error("热点搜索失败: %s", e)
return f"❌ 搜索失败: {e}", ""
def analyze_and_suggest(model, keyword, search_result):
"""AI 分析热点并给出建议,同时缓存结构化结果"""
if not search_result:
return "❌ 请先搜索", "", "", gr.update(choices=[], value=None)
api_key, base_url, _ = _get_llm_config()
if not api_key:
return "❌ 请先配置 LLM 提供商", "", "", gr.update(choices=[], value=None)
try:
svc = LLMService(api_key, base_url, model)
analysis = svc.analyze_hotspots(search_result)
# 缓存结构化分析结果(在渲染 Markdown 之前)
set_last_analysis(analysis)
topics = "\n".join(f"{t}" for t in analysis.get("hot_topics", []))
patterns = "\n".join(f"{p}" for p in analysis.get("title_patterns", []))
suggestions_list = analysis.get("suggestions", [])
suggestions = "\n".join(
f"**{s['topic']}** - {s['reason']}"
for s in suggestions_list
)
structure = analysis.get("content_structure", "")
summary = (
f"## 🔥 热门选题\n{topics}\n\n"
f"## 📝 标题套路\n{patterns}\n\n"
f"## 📐 内容结构\n{structure}\n\n"
f"## 💡 推荐选题\n{suggestions}"
)
# 构建选题下拉选项
topic_choices = [s["topic"] for s in suggestions_list if s.get("topic")]
dropdown_update = gr.update(
choices=topic_choices,
value=topic_choices[0] if topic_choices else None,
)
return "✅ 分析完成", summary, keyword, dropdown_update
except Exception as e:
logger.error("热点分析失败: %s", e)
return f"❌ 分析失败: {e}", "", "", gr.update(choices=[], value=None)
def generate_from_hotspot(model, topic_from_hotspot, style, search_result, sd_model_name, persona_text):
"""基于热点分析生成文案(自动适配 SD 模型,支持人设,增强分析上下文)"""
if not topic_from_hotspot:
return "", "", "", "", "❌ 请先选择或输入选题"
api_key, base_url, _ = _get_llm_config()
if not api_key:
return "", "", "", "", "❌ 请先配置 LLM 提供商"
try:
svc = LLMService(api_key, base_url, model)
persona = _resolve_persona(persona_text) if persona_text else None
# 构建增强参考上下文:结构化分析摘要 + 原始搜索片段
analysis = get_last_analysis()
reference_parts = []
if analysis:
topics_str = ", ".join(analysis.get("hot_topics", [])[:5])
sug_str = "; ".join(
s.get("topic", "") for s in analysis.get("suggestions", [])[:5]
)
structure = analysis.get("content_structure", "")
analysis_summary = (
f"[热点分析摘要] 热门选题: {topics_str}\n"
f"推荐方向: {sug_str}\n"
f"内容结构建议: {structure}\n\n"
)
reference_parts.append(analysis_summary)
if search_result:
reference_parts.append(search_result)
combined_reference = "".join(reference_parts)[:3000]
data = svc.generate_copy_with_reference(
topic=topic_from_hotspot,
style=style,
reference_notes=combined_reference,
sd_model_name=sd_model_name,
persona=persona,
)
tags = data.get("tags", [])
return (
data.get("title", ""),
data.get("content", ""),
data.get("sd_prompt", ""),
", ".join(tags),
"✅ 基于热点的文案已生成",
)
except Exception as e:
return "", "", "", "", f"❌ 生成失败: {e}"
def feed_hotspot_to_engine(topic_engine) -> list[dict]:
"""将缓存的热点分析结果注入 TopicEngine返回热点加权推荐列表"""
data = get_last_analysis()
return topic_engine.recommend_topics(hotspot_data=data)
# ==================================================
# Tab 3: 评论管家
# ==================================================
# ---- 共用: 笔记列表缓存(线程安全)----
def _set_cache(name: str, entries: list):
"""线程安全地更新笔记列表缓存"""
global _cached_proactive_entries, _cached_my_note_entries
with _cache_lock:
if name == "proactive":
_cached_proactive_entries = list(entries)
else:
_cached_my_note_entries = list(entries)
def _get_cache(name: str) -> list:
"""线程安全地获取笔记列表缓存快照(返回副本)"""
with _cache_lock:
if name == "proactive":
return list(_cached_proactive_entries)
return list(_cached_my_note_entries)
def _fetch_and_cache(keyword, mcp_url, cache_name="proactive"):
"""通用: 获取笔记列表并线程安全地缓存"""
try:
client = get_mcp_client(mcp_url)
if keyword and keyword.strip():
entries = client.search_feeds_parsed(keyword.strip())
src = f"搜索「{keyword.strip()}"
else:
entries = client.list_feeds_parsed()
src = "首页推荐"
_set_cache(cache_name, entries)
if not entries:
return gr.update(choices=[], value=None), f"⚠️ 从{src}未找到笔记"
choices = []
for i, e in enumerate(entries):
title_short = (e["title"] or "无标题")[:28]
label = f"[{i+1}] {title_short} | @{e['author'] or '未知'} | ❤ {e['likes']}"
choices.append(label)
return (
gr.update(choices=choices, value=choices[0]),
f"✅ 从{src}获取 {len(entries)} 条笔记",
)
except Exception as e:
_set_cache(cache_name, [])
return gr.update(choices=[], value=None), f"{e}"
def _pick_from_cache(selected, cache_name="proactive"):
"""通用: 从缓存中提取选中条目的 feed_id / xsec_token / title线程安全快照"""
cache = _get_cache(cache_name)
if not selected or not cache:
return "", "", ""
try:
# 尝试从 [N] 前缀提取序号
idx = int(selected.split("]")[0].replace("[", "")) - 1
if 0 <= idx < len(cache):
e = cache[idx]
return e["feed_id"], e["xsec_token"], e.get("title", "")
except (ValueError, IndexError):
pass
# 回退: 模糊匹配标题
for e in cache:
if e.get("title", "")[:15] in selected:
return e["feed_id"], e["xsec_token"], e.get("title", "")
return "", "", ""
# ---- 模块 A: 主动评论他人 ----
def fetch_proactive_notes(keyword, mcp_url):
return _fetch_and_cache(keyword, mcp_url, "proactive")
def on_proactive_note_selected(selected):
return _pick_from_cache(selected, "proactive")