xhs_factory/services/hotspot.py

"""
services/hotspot.py
热点探测、热点生成、笔记列表缓存（供评论管家主动评论使用）
"""
import copy
import threading
import logging

import gradio as gr

from .llm_service import LLMService
from .mcp_client import get_mcp_client
from .connection import _get_llm_config
from .persona import _resolve_persona

logger = logging.getLogger("autobot")

# ---- 共用: 线程安全缓存 ----
# 缓存互斥锁，防止并发回调产生竞态（所有缓存共用）
_cache_lock = threading.RLock()
# 主动评论缓存
_cached_proactive_entries: list[dict] = []
# 我的笔记评论缓存
_cached_my_note_entries: list[dict] = []

# ==================================================
#                Tab 2: 热点探测
# ==================================================

# 最近一次 LLM 热点分析的结构化结果（线程安全，复用 _cache_lock）
_last_analysis: dict | None = None


def get_last_analysis() -> dict | None:
    """线程安全地获取最近一次热点分析结果的深拷贝"""
    with _cache_lock:
        if _last_analysis is None:
            return None
        return copy.deepcopy(_last_analysis)


def set_last_analysis(data: dict) -> None:
    """线程安全地更新热点分析结果（合并 hot_topics / suggestions 并去重）"""
    global _last_analysis
    with _cache_lock:
        if _last_analysis is None:
            _last_analysis = copy.deepcopy(data)
        else:
            # 合并 hot_topics
            existing_topics = _last_analysis.get("hot_topics", [])
            new_topics = data.get("hot_topics", [])
            seen = set(existing_topics)
            for t in new_topics:
                if t not in seen:
                    existing_topics.append(t)
                    seen.add(t)
            _last_analysis["hot_topics"] = existing_topics

            # 合并 suggestions（按 topic 去重）
            existing_sug = _last_analysis.get("suggestions", [])
            existing_sug_topics = {s.get("topic", "") for s in existing_sug}
            for s in data.get("suggestions", []):
                if s.get("topic", "") not in existing_sug_topics:
                    existing_sug.append(s)
                    existing_sug_topics.add(s.get("topic", ""))
            _last_analysis["suggestions"] = existing_sug

            # 其他字段以最新为准
            for key in data:
                if key not in ("hot_topics", "suggestions"):
                    _last_analysis[key] = data[key]


def search_hotspots(keyword, sort_by, mcp_url):
    """搜索小红书热门内容"""
    if not keyword:
        return "❌ 请输入搜索关键词", ""
    try:
        client = get_mcp_client(mcp_url)
        result = client.search_feeds(keyword, sort_by=sort_by)
        if "error" in result:
            return f"❌ 搜索失败: {result['error']}", ""
        text = result.get("text", "无结果")
        return "✅ 搜索完成", text
    except Exception as e:
        logger.error("热点搜索失败: %s", e)
        return f"❌ 搜索失败: {e}", ""


def analyze_and_suggest(model, keyword, search_result):
    """AI 分析热点并给出建议，同时缓存结构化结果"""
    if not search_result:
        return "❌ 请先搜索", "", "", gr.update(choices=[], value=None)
    api_key, base_url, _ = _get_llm_config()
    if not api_key:
        return "❌ 请先配置 LLM 提供商", "", "", gr.update(choices=[], value=None)
    try:
        svc = LLMService(api_key, base_url, model)
        analysis = svc.analyze_hotspots(search_result)

        # 缓存结构化分析结果（在渲染 Markdown 之前）
        set_last_analysis(analysis)

        topics = "\n".join(f"• {t}" for t in analysis.get("hot_topics", []))
        patterns = "\n".join(f"• {p}" for p in analysis.get("title_patterns", []))
        suggestions_list = analysis.get("suggestions", [])
        suggestions = "\n".join(
            f"**{s['topic']}** - {s['reason']}"
            for s in suggestions_list
        )
        structure = analysis.get("content_structure", "")

        summary = (
            f"## 🔥 热门选题\n{topics}\n\n"
            f"## 📝 标题套路\n{patterns}\n\n"
            f"## 📐 内容结构\n{structure}\n\n"
            f"## 💡 推荐选题\n{suggestions}"
        )

        # 构建选题下拉选项
        topic_choices = [s["topic"] for s in suggestions_list if s.get("topic")]
        dropdown_update = gr.update(
            choices=topic_choices,
            value=topic_choices[0] if topic_choices else None,
        )

        return "✅ 分析完成", summary, keyword, dropdown_update
    except Exception as e:
        logger.error("热点分析失败: %s", e)
        return f"❌ 分析失败: {e}", "", "", gr.update(choices=[], value=None)


def generate_from_hotspot(model, topic_from_hotspot, style, search_result, sd_model_name, persona_text):
    """基于热点分析生成文案（自动适配 SD 模型，支持人设，增强分析上下文）"""
    if not topic_from_hotspot:
        return "", "", "", "", "❌ 请先选择或输入选题"
    api_key, base_url, _ = _get_llm_config()
    if not api_key:
        return "", "", "", "", "❌ 请先配置 LLM 提供商"
    try:
        svc = LLMService(api_key, base_url, model)
        persona = _resolve_persona(persona_text) if persona_text else None

        # 构建增强参考上下文：结构化分析摘要 + 原始搜索片段
        analysis = get_last_analysis()
        reference_parts = []
        if analysis:
            topics_str = ", ".join(analysis.get("hot_topics", [])[:5])
            sug_str = "; ".join(
                s.get("topic", "") for s in analysis.get("suggestions", [])[:5]
            )
            structure = analysis.get("content_structure", "")
            analysis_summary = (
                f"[热点分析摘要] 热门选题: {topics_str}\n"
                f"推荐方向: {sug_str}\n"
                f"内容结构建议: {structure}\n\n"
            )
            reference_parts.append(analysis_summary)
        if search_result:
            reference_parts.append(search_result)
        combined_reference = "".join(reference_parts)[:3000]

        data = svc.generate_copy_with_reference(
            topic=topic_from_hotspot,
            style=style,
            reference_notes=combined_reference,
            sd_model_name=sd_model_name,
            persona=persona,
        )
        tags = data.get("tags", [])
        return (
            data.get("title", ""),
            data.get("content", ""),
            data.get("sd_prompt", ""),
            ", ".join(tags),
            "✅ 基于热点的文案已生成",
        )
    except Exception as e:
        return "", "", "", "", f"❌ 生成失败: {e}"


def feed_hotspot_to_engine(topic_engine) -> list[dict]:
    """将缓存的热点分析结果注入 TopicEngine，返回热点加权推荐列表"""
    data = get_last_analysis()
    return topic_engine.recommend_topics(hotspot_data=data)


# ==================================================
#              Tab 3: 评论管家
# ==================================================

# ---- 共用: 笔记列表缓存（线程安全）----


def _set_cache(name: str, entries: list):
    """线程安全地更新笔记列表缓存"""
    global _cached_proactive_entries, _cached_my_note_entries
    with _cache_lock:
        if name == "proactive":
            _cached_proactive_entries = list(entries)
        else:
            _cached_my_note_entries = list(entries)


def _get_cache(name: str) -> list:
    """线程安全地获取笔记列表缓存快照（返回副本）"""
    with _cache_lock:
        if name == "proactive":
            return list(_cached_proactive_entries)
        return list(_cached_my_note_entries)


def _fetch_and_cache(keyword, mcp_url, cache_name="proactive"):
    """通用: 获取笔记列表并线程安全地缓存"""
    try:
        client = get_mcp_client(mcp_url)
        if keyword and keyword.strip():
            entries = client.search_feeds_parsed(keyword.strip())
            src = f"搜索「{keyword.strip()}」"
        else:
            entries = client.list_feeds_parsed()
            src = "首页推荐"

        _set_cache(cache_name, entries)

        if not entries:
            return gr.update(choices=[], value=None), f"⚠️ 从{src}未找到笔记"

        choices = []
        for i, e in enumerate(entries):
            title_short = (e["title"] or "无标题")[:28]
            label = f"[{i+1}] {title_short}  |  @{e['author'] or '未知'}  |  ❤ {e['likes']}"
            choices.append(label)

        return (
            gr.update(choices=choices, value=choices[0]),
            f"✅ 从{src}获取 {len(entries)} 条笔记",
        )
    except Exception as e:
        _set_cache(cache_name, [])
        return gr.update(choices=[], value=None), f"❌ {e}"


def _pick_from_cache(selected, cache_name="proactive"):
    """通用: 从缓存中提取选中条目的 feed_id / xsec_token / title（线程安全快照）"""
    cache = _get_cache(cache_name)
    if not selected or not cache:
        return "", "", ""
    try:
        # 尝试从 [N] 前缀提取序号
        idx = int(selected.split("]")[0].replace("[", "")) - 1
        if 0 <= idx < len(cache):
            e = cache[idx]
            return e["feed_id"], e["xsec_token"], e.get("title", "")
    except (ValueError, IndexError):
        pass
    # 回退: 模糊匹配标题
    for e in cache:
        if e.get("title", "")[:15] in selected:
            return e["feed_id"], e["xsec_token"], e.get("title", "")
    return "", "", ""


# ---- 模块 A: 主动评论他人 ----

def fetch_proactive_notes(keyword, mcp_url):
    return _fetch_and_cache(keyword, mcp_url, "proactive")


def on_proactive_note_selected(selected):
    return _pick_from_cache(selected, "proactive")