xhs_factory/services/profile.py

"""
services/profile.py
小红书账号 Profile 解析与可视化函数
"""
import re
import json
import logging

import matplotlib
import matplotlib.pyplot as plt

from mcp_client import get_mcp_client

_font_candidates = ["Microsoft YaHei", "SimHei", "PingFang SC", "WenQuanYi Micro Hei"]
for _fn in _font_candidates:
    try:
        matplotlib.font_manager.findfont(_fn, fallback_to_default=False)
        plt.rcParams["font.sans-serif"] = [_fn]
        break
    except Exception:
        continue
plt.rcParams["axes.unicode_minus"] = False

logger = logging.getLogger("autobot")

# ==================================================
#              Tab 4: 数据看板 (我的账号)
# ==================================================


def _parse_profile_json(text: str):
    """尝试从文本中解析用户 profile JSON"""
    if not text:
        return None
    # 直接 JSON
    try:
        return json.loads(text)
    except (json.JSONDecodeError, TypeError):
        pass
    # 可能包含 Markdown 代码块
    m = re.search(r'```(?:json)?\s*\n([\s\S]+?)\n```', text)
    if m:
        try:
            return json.loads(m.group(1))
        except (json.JSONDecodeError, TypeError):
            pass
    return None


def _parse_count(val) -> float:
    """解析数字字符串, 支持 '1.2万' 格式"""
    if isinstance(val, (int, float)):
        return float(val)
    s = str(val).strip()
    if "万" in s:
        try:
            return float(s.replace("万", "")) * 10000
        except ValueError:
            pass
    try:
        return float(s)
    except ValueError:
        return 0.0


def fetch_my_profile(user_id, xsec_token, mcp_url):
    """获取我的账号数据, 返回结构化信息 + 可视化图表"""
    if not user_id or not xsec_token:
        return "❌ 请填写你的用户 ID 和 xsec_token", "", None, None, None
    try:
        client = get_mcp_client(mcp_url)
        result = client.get_user_profile(user_id, xsec_token)
        if "error" in result:
            return f"❌ {result['error']}", "", None, None, None

        raw = result.get("raw", {})
        text = result.get("text", "")

        # 尝试从 raw 或 text 解析 JSON
        data = None
        if raw and isinstance(raw, dict):
            content_list = raw.get("content", [])
            for item in content_list:
                if item.get("type") == "text":
                    data = _parse_profile_json(item.get("text", ""))
                    if data:
                        break
        if not data:
            data = _parse_profile_json(text)
        if not data:
            return "✅ 数据加载完成 (纯文本)", text, None, None, None

        # ---- 提取基本信息 (注意 MCP 对新号可能返回 null) ----
        basic = data.get("userBasicInfo") or {}
        interactions = data.get("interactions") or []
        feeds = data.get("feeds") or []

        gender_map = {0: "未知", 1: "男", 2: "女"}
        info_lines = [
            f"## 👤 {basic.get('nickname', '未知')}",
            f"- **小红书号**: {basic.get('redId', '-')}",
            f"- **性别**: {gender_map.get(basic.get('gender', 0), '未知')}",
            f"- **IP 属地**: {basic.get('ipLocation', '-')}",
            f"- **简介**: {basic.get('desc', '-')}",
            "",
            "### 📊 核心数据",
        ]
        for inter in interactions:
            info_lines.append(f"- **{inter.get('name', '')}**: {inter.get('count', '0')}")

        info_lines.append(f"\n### 📝 展示笔记: {len(feeds)} 篇")
        profile_md = "\n".join(info_lines)

        # ---- 互动数据柱状图 ----
        fig_interact = None
        if interactions:
            inter_data = {i["name"]: _parse_count(i["count"]) for i in interactions}
            fig_interact, ax = plt.subplots(figsize=(4, 3), dpi=100)
            labels = list(inter_data.keys())
            values = list(inter_data.values())
            colors = ["#FF6B6B", "#4ECDC4", "#45B7D1"][:len(labels)]
            ax.bar(labels, values, color=colors, edgecolor="white", linewidth=0.5)
            ax.set_title("账号核心指标", fontsize=12, fontweight="bold")
            for i, v in enumerate(values):
                display = f"{v/10000:.1f}万" if v >= 10000 else str(int(v))
                ax.text(i, v + max(values) * 0.02, display, ha="center", fontsize=9)
            ax.set_ylabel("")
            ax.spines["top"].set_visible(False)
            ax.spines["right"].set_visible(False)
            fig_interact.tight_layout()

        # ---- 笔记点赞分布图 ----
        fig_notes = None
        if feeds:
            titles, likes = [], []
            for f in feeds[:15]:
                nc = f.get("noteCard") or {}
                t = (nc.get("displayTitle", "") or "无标题")[:12]
                lk = _parse_count((nc.get("interactInfo") or {}).get("likedCount", "0"))
                titles.append(t)
                likes.append(lk)

            fig_notes, ax2 = plt.subplots(figsize=(7, 3.5), dpi=100)
            ax2.barh(range(len(titles)), likes, color="#FF6B6B", edgecolor="white")
            ax2.set_yticks(range(len(titles)))
            ax2.set_yticklabels(titles, fontsize=8)
            ax2.set_title(f"笔记点赞排行 (Top {len(titles)})", fontsize=12, fontweight="bold")
            ax2.invert_yaxis()
            for i, v in enumerate(likes):
                display = f"{v/10000:.1f}万" if v >= 10000 else str(int(v))
                ax2.text(v + max(likes) * 0.01 if max(likes) > 0 else 0, i, display, va="center", fontsize=8)
            ax2.spines["top"].set_visible(False)
            ax2.spines["right"].set_visible(False)
            fig_notes.tight_layout()

        # ---- 笔记详情表格 (Markdown) ----
        table_lines = [
            "### 📋 笔记数据明细",
            "| # | 标题 | 类型 | ❤ 点赞 |",
            "|---|------|------|--------|",
        ]
        for i, f in enumerate(feeds):
            nc = f.get("noteCard") or {}
            t = (nc.get("displayTitle", "") or "无标题")[:25]
            tp = "📹 视频" if nc.get("type") == "video" else "📷 图文"
            lk = (nc.get("interactInfo") or {}).get("likedCount", "0")
            table_lines.append(f"| {i+1} | {t} | {tp} | {lk} |")
        notes_table = "\n".join(table_lines)

        return "✅ 数据加载完成", profile_md, fig_interact, fig_notes, notes_table

    except Exception as e:
        logger.error(f"获取我的数据失败: {e}")
        return f"❌ {e}", "", None, None, None