- 新增 `analytics_service.py` 模块,实现笔记数据采集、权重计算与智能分析功能 - 支持定时采集已发布笔记的互动数据(点赞、评论、收藏),并计算主题、风格、标签等多维度权重 - 提供加权随机选题功能,根据历史表现优先生成高互动潜力内容 - 集成 LLM 深度分析,生成内容策略建议与优化报告 - 新增「智能学习」UI 标签页,支持数据采集、权重计算、AI 分析与定时自动学习 ♻️ refactor(llm): 重构 LLM 服务以支持多模型智能适配与加权文案生成 - 扩展 `llm_service.py`,新增 `get_sd_prompt_guide()` 方法,根据当前 SD 模型动态生成绘图提示词指南 - 新增 `PROMPT_PERFORMANCE_ANALYSIS` 与 `PROMPT_WEIGHTED_COPYWRITING` 提示词模板,支持笔记表现分析与加权文案生成 - 重构 `generate_copy()`、`generate_copy_with_reference()` 方法,支持 `sd_model_name` 与 `persona` 参数,实现多模型适配与人设融合 - 新增 `analyze_note_performance()` 与 `generate_weighted_copy()` 方法,实现 AI 深度分析与智能加权创作 ♻️ refactor(sd): 重构 SD 服务以支持多模型配置系统与智能参数适配 - 重构 `sd_service.py`,引入 `SD_MODEL_PROFILES` 配置体系,支持 `majicmixRealistic`、`Realistic Vision`、`Juggernaut XL` 三款模型 - 新增 `detect_model_profile()`、`get_model_profile()`、`get_model_profile_info()` 方法,实现模型自动识别与档案信息展示 - 重构 `txt2img()` 与 `img2img()` 方法,自动根据当前模型应用最优参数、提示词前缀/后缀与反向提示词 - 更新 `get_sd_preset()` 方法,支持模型专属预设参数加载 🎨 style(config): 更新默认配置与人设池 - 更新 `config.json` 与 `config_manager.py`,将默认模型改为 `gemini-3-flash-preview`,默认人设改为「身材管理健身美女」 - 新增 `use_smart_weights` 配置项,控制是否启用智能加权发布 - 扩展 `PERSONA_POOL_MAP`,新增「身材管理健身美女」人设及其对应主题与关键词库 🔧 chore(main): 集成智能学习引擎并扩展自动发布链路 - 在 `main.py` 中实例化 `AnalyticsService`,并集成至各功能模块 - 扩展 `generate_copy()`、`generate_from_hotspot()`、`auto_publish_once()` 等方法,支持 `sd_model_name`、`persona`、`quality_mode_val` 参数传递 - 实现智能加权发布逻辑:当启用权重且数据可用时,自动选择高权重主题、风格与标签,并使用加权文案模板 - 新增「智能学习」标签页相关 UI 组件与事件处理函数,包括数据采集、权重计算、AI 分析、定时学习与加权主题预览 - 更新 SD 模型选择事件,实时显示模型档案信息卡 - 扩展自动调度器,支持智能权重、人设与画质模式的参数传递 📝 docs(changelog): 更新版本日志记录新功能与改进 - 在 `CHANGELOG.md` 中新增 `[2.1.0]` 与 `[2.2.0]` 版本记录 - 详细描述「智能学习引擎」与「多 SD 模型智能适配」两大核心功能 - 列出相关代码重构、配置更新与文件新增情况
625 lines
25 KiB
Python
625 lines
25 KiB
Python
"""
|
||
笔记数据分析 & 智能权重学习模块
|
||
定时抓取已发布笔记的互动数据,自动学习哪些内容受欢迎,生成加权主题池
|
||
"""
|
||
import json
|
||
import os
|
||
import re
|
||
import time
|
||
import logging
|
||
import math
|
||
from datetime import datetime, timedelta
|
||
from collections import defaultdict
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
ANALYTICS_FILE = "analytics_data.json"
|
||
WEIGHTS_FILE = "content_weights.json"
|
||
|
||
|
||
def _safe_int(val) -> int:
|
||
"""将 '1.2万' / '1234' / 1234 等格式转为整数"""
|
||
if isinstance(val, (int, float)):
|
||
return int(val)
|
||
if not val:
|
||
return 0
|
||
s = str(val).strip()
|
||
if "万" in s:
|
||
try:
|
||
return int(float(s.replace("万", "")) * 10000)
|
||
except ValueError:
|
||
return 0
|
||
try:
|
||
return int(float(s))
|
||
except ValueError:
|
||
return 0
|
||
|
||
|
||
class AnalyticsService:
|
||
"""笔记表现分析 & 权重学习引擎"""
|
||
|
||
def __init__(self, workspace_dir: str = "xhs_workspace"):
|
||
self.workspace_dir = workspace_dir
|
||
self.analytics_path = os.path.join(workspace_dir, ANALYTICS_FILE)
|
||
self.weights_path = os.path.join(workspace_dir, WEIGHTS_FILE)
|
||
self._analytics_data = self._load_json(self.analytics_path, {"notes": {}, "last_analysis": ""})
|
||
self._weights = self._load_json(self.weights_path, {
|
||
"topic_weights": {},
|
||
"style_weights": {},
|
||
"tag_weights": {},
|
||
"title_pattern_weights": {},
|
||
"time_weights": {},
|
||
"last_updated": "",
|
||
"analysis_history": [],
|
||
})
|
||
|
||
# ========== 持久化 ==========
|
||
|
||
@staticmethod
|
||
def _load_json(path: str, default: dict) -> dict:
|
||
if os.path.exists(path):
|
||
try:
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
except (json.JSONDecodeError, IOError) as e:
|
||
logger.warning("加载 %s 失败: %s,使用默认值", path, e)
|
||
return default.copy()
|
||
|
||
def _save_analytics(self):
|
||
os.makedirs(self.workspace_dir, exist_ok=True)
|
||
with open(self.analytics_path, "w", encoding="utf-8") as f:
|
||
json.dump(self._analytics_data, f, ensure_ascii=False, indent=2)
|
||
|
||
def _save_weights(self):
|
||
os.makedirs(self.workspace_dir, exist_ok=True)
|
||
with open(self.weights_path, "w", encoding="utf-8") as f:
|
||
json.dump(self._weights, f, ensure_ascii=False, indent=2)
|
||
|
||
# ========== 数据采集 ==========
|
||
|
||
def collect_note_performance(self, mcp_client, user_id: str, xsec_token: str) -> dict:
|
||
"""
|
||
通过 MCP 获取我的所有笔记及其互动数据,存入 analytics_data.json
|
||
返回 {"total": N, "updated": M, "notes": [...]}
|
||
"""
|
||
logger.info("开始采集笔记表现数据 (user_id=%s)", user_id)
|
||
|
||
raw = mcp_client.get_user_profile(user_id, xsec_token)
|
||
text = ""
|
||
if isinstance(raw, dict):
|
||
content_list = raw.get("content", [])
|
||
for item in content_list:
|
||
if isinstance(item, dict) and item.get("type") == "text":
|
||
text = item.get("text", "")
|
||
break
|
||
if not text:
|
||
text = raw.get("text", "")
|
||
|
||
# 解析 JSON
|
||
data = None
|
||
for attempt_fn in [
|
||
lambda t: json.loads(t),
|
||
lambda t: json.loads(re.search(r'```(?:json)?\s*\n([\s\S]+?)\n```', t).group(1)),
|
||
lambda t: json.loads(re.search(r'(\{[\s\S]*\})', t).group(1)),
|
||
]:
|
||
try:
|
||
data = attempt_fn(text)
|
||
if data:
|
||
break
|
||
except Exception:
|
||
continue
|
||
|
||
if not data:
|
||
return {"total": 0, "updated": 0, "error": "无法解析用户数据"}
|
||
|
||
feeds = data.get("feeds", [])
|
||
if not feeds:
|
||
return {"total": 0, "updated": 0, "error": "未找到笔记数据"}
|
||
|
||
notes_dict = self._analytics_data.get("notes", {})
|
||
updated = 0
|
||
note_summaries = []
|
||
|
||
for f in feeds:
|
||
nc = f.get("noteCard") or {}
|
||
note_id = nc.get("noteId") or f.get("noteId", "")
|
||
if not note_id:
|
||
continue
|
||
|
||
interact = nc.get("interactInfo") or {}
|
||
liked = _safe_int(interact.get("likedCount", 0))
|
||
# MCP 返回的用户主页笔记列表通常只有 likedCount
|
||
# 详情页才有评论数和收藏数,先用点赞数作为主指标
|
||
|
||
title = nc.get("displayTitle", "") or ""
|
||
note_type = nc.get("type", "normal") # normal / video
|
||
|
||
# 从本地备份的文案中提取主题、风格、标签
|
||
local_meta = self._find_local_meta(title)
|
||
|
||
note_data = {
|
||
"note_id": note_id,
|
||
"title": title,
|
||
"type": note_type,
|
||
"likes": liked,
|
||
"topic": local_meta.get("topic", ""),
|
||
"style": local_meta.get("style", ""),
|
||
"tags": local_meta.get("tags", []),
|
||
"sd_prompt": local_meta.get("sd_prompt", ""),
|
||
"collected_at": datetime.now().isoformat(),
|
||
}
|
||
|
||
# 更新或新增
|
||
old = notes_dict.get(note_id, {})
|
||
if old.get("likes", 0) != liked or not old:
|
||
updated += 1
|
||
notes_dict[note_id] = {**old, **note_data}
|
||
|
||
note_summaries.append(note_data)
|
||
|
||
self._analytics_data["notes"] = notes_dict
|
||
self._analytics_data["last_analysis"] = datetime.now().isoformat()
|
||
self._save_analytics()
|
||
|
||
logger.info("采集完成: 共 %d 篇笔记, 更新 %d 篇", len(feeds), updated)
|
||
return {"total": len(feeds), "updated": updated, "notes": note_summaries}
|
||
|
||
def collect_note_details(self, mcp_client, note_id: str, xsec_token: str):
|
||
"""获取单篇笔记的详细数据(点赞、评论数、收藏等)"""
|
||
try:
|
||
result = mcp_client.get_feed_detail(note_id, xsec_token, load_all_comments=False)
|
||
text = ""
|
||
if isinstance(result, dict):
|
||
for item in result.get("content", []):
|
||
if isinstance(item, dict) and item.get("type") == "text":
|
||
text = item.get("text", "")
|
||
break
|
||
if text:
|
||
data = None
|
||
try:
|
||
data = json.loads(text)
|
||
except Exception:
|
||
m = re.search(r'(\{[\s\S]*\})', text)
|
||
if m:
|
||
try:
|
||
data = json.loads(m.group(1))
|
||
except Exception:
|
||
pass
|
||
if data:
|
||
interact = data.get("interactInfo") or {}
|
||
comments = data.get("comments", [])
|
||
return {
|
||
"likes": _safe_int(interact.get("likedCount", 0)),
|
||
"comments_count": _safe_int(interact.get("commentCount", len(comments))),
|
||
"collects": _safe_int(interact.get("collectedCount", 0)),
|
||
"shares": _safe_int(interact.get("shareCount", 0)),
|
||
}
|
||
except Exception as e:
|
||
logger.warning("获取笔记 %s 详情失败: %s", note_id, e)
|
||
return None
|
||
|
||
def _find_local_meta(self, title: str) -> dict:
|
||
"""从本地 xhs_workspace 中查找匹配标题的备份文案,提取 topic/style/tags"""
|
||
result = {"topic": "", "style": "", "tags": [], "sd_prompt": ""}
|
||
if not title:
|
||
return result
|
||
|
||
# 搜索备份目录
|
||
try:
|
||
for dirname in os.listdir(self.workspace_dir):
|
||
dir_path = os.path.join(self.workspace_dir, dirname)
|
||
if not os.path.isdir(dir_path) or dirname.startswith("_"):
|
||
continue
|
||
txt_path = os.path.join(dir_path, "文案.txt")
|
||
if not os.path.exists(txt_path):
|
||
continue
|
||
try:
|
||
with open(txt_path, "r", encoding="utf-8") as f:
|
||
content = f.read()
|
||
# 检查标题是否匹配
|
||
if title[:10] in content or title in dirname:
|
||
# 提取元数据
|
||
for line in content.split("\n"):
|
||
if line.startswith("风格:"):
|
||
result["style"] = line.split(":", 1)[1].strip()
|
||
elif line.startswith("主题:"):
|
||
result["topic"] = line.split(":", 1)[1].strip()
|
||
elif line.startswith("标签:"):
|
||
tags_str = line.split(":", 1)[1].strip()
|
||
result["tags"] = [t.strip() for t in tags_str.split(",") if t.strip()]
|
||
elif line.startswith("SD Prompt:"):
|
||
result["sd_prompt"] = line.split(":", 1)[1].strip()
|
||
break
|
||
except Exception:
|
||
continue
|
||
except Exception:
|
||
pass
|
||
return result
|
||
|
||
# ========== 权重计算 ==========
|
||
|
||
def calculate_weights(self) -> dict:
|
||
"""
|
||
根据已采集的笔记表现数据,计算各维度权重
|
||
使用 互动得分 = likes * 1.0 + comments * 2.0 + collects * 1.5 加权
|
||
返回权重摘要
|
||
"""
|
||
notes = self._analytics_data.get("notes", {})
|
||
if not notes:
|
||
return {"error": "暂无笔记数据,请先采集"}
|
||
|
||
# 计算每篇笔记的综合得分
|
||
scored_notes = []
|
||
for nid, note in notes.items():
|
||
likes = note.get("likes", 0)
|
||
comments_count = note.get("comments_count", 0)
|
||
collects = note.get("collects", 0)
|
||
# 综合得分: 点赞权重 1.0, 评论权重 2.0(评论代表深度互动), 收藏权重 1.5
|
||
score = likes * 1.0 + comments_count * 2.0 + collects * 1.5
|
||
# 至少用点赞数保底
|
||
if score == 0:
|
||
score = likes
|
||
scored_notes.append({**note, "score": score, "note_id": nid})
|
||
|
||
if not scored_notes:
|
||
return {"error": "没有可分析的笔记"}
|
||
|
||
# 按得分排序
|
||
scored_notes.sort(key=lambda x: x["score"], reverse=True)
|
||
max_score = scored_notes[0]["score"] if scored_notes[0]["score"] > 0 else 1
|
||
|
||
# ---- 主题权重 ----
|
||
topic_scores = defaultdict(float)
|
||
topic_counts = defaultdict(int)
|
||
for note in scored_notes:
|
||
topic = note.get("topic", "").strip()
|
||
if topic:
|
||
topic_scores[topic] += note["score"]
|
||
topic_counts[topic] += 1
|
||
|
||
topic_weights = {}
|
||
for topic, total_score in topic_scores.items():
|
||
avg_score = total_score / topic_counts[topic]
|
||
# 归一化到 0-100
|
||
weight = min(100, int((avg_score / max_score) * 100)) if max_score > 0 else 50
|
||
# 多篇验证的加分
|
||
if topic_counts[topic] >= 3:
|
||
weight = min(100, weight + 10)
|
||
elif topic_counts[topic] >= 2:
|
||
weight = min(100, weight + 5)
|
||
topic_weights[topic] = {
|
||
"weight": weight,
|
||
"count": topic_counts[topic],
|
||
"avg_score": round(avg_score, 1),
|
||
"total_score": round(total_score, 1),
|
||
}
|
||
|
||
# ---- 风格权重 ----
|
||
style_scores = defaultdict(float)
|
||
style_counts = defaultdict(int)
|
||
for note in scored_notes:
|
||
style = note.get("style", "").strip()
|
||
if style:
|
||
style_scores[style] += note["score"]
|
||
style_counts[style] += 1
|
||
|
||
style_weights = {}
|
||
for style, total_score in style_scores.items():
|
||
avg = total_score / style_counts[style]
|
||
weight = min(100, int((avg / max_score) * 100)) if max_score > 0 else 50
|
||
style_weights[style] = {
|
||
"weight": weight,
|
||
"count": style_counts[style],
|
||
"avg_score": round(avg, 1),
|
||
}
|
||
|
||
# ---- 标签权重 ----
|
||
tag_scores = defaultdict(float)
|
||
tag_counts = defaultdict(int)
|
||
for note in scored_notes:
|
||
for tag in note.get("tags", []):
|
||
tag = tag.strip().lstrip("#")
|
||
if tag:
|
||
tag_scores[tag] += note["score"]
|
||
tag_counts[tag] += 1
|
||
|
||
tag_weights = {}
|
||
for tag, total_score in tag_scores.items():
|
||
avg = total_score / tag_counts[tag]
|
||
weight = min(100, int((avg / max_score) * 100)) if max_score > 0 else 50
|
||
tag_weights[tag] = {"weight": weight, "count": tag_counts[tag]}
|
||
|
||
# 排序后取 Top
|
||
tag_weights = dict(sorted(tag_weights.items(), key=lambda x: x[1]["weight"], reverse=True)[:30])
|
||
|
||
# ---- 标题模式权重 (提取 emoji/句式/长度特征) ----
|
||
title_patterns = defaultdict(list)
|
||
for note in scored_notes:
|
||
title = note.get("title", "")
|
||
if not title:
|
||
continue
|
||
# 检测标题特征
|
||
has_emoji = bool(re.search(r'[\U0001F600-\U0001F9FF\u2600-\u27BF]', title))
|
||
has_question = "?" in title or "?" in title
|
||
has_exclaim = "!" in title or "!" in title
|
||
has_ellipsis = "..." in title or "…" in title
|
||
length_bucket = "短(≤10)" if len(title) <= 10 else ("中(11-15)" if len(title) <= 15 else "长(16-20)")
|
||
|
||
for feature, val in [
|
||
("含emoji", has_emoji), ("疑问句式", has_question),
|
||
("感叹句式", has_exclaim), ("省略句式", has_ellipsis),
|
||
]:
|
||
if val:
|
||
title_patterns[feature].append(note["score"])
|
||
title_patterns[f"长度:{length_bucket}"].append(note["score"])
|
||
|
||
title_pattern_weights = {}
|
||
for pattern, scores in title_patterns.items():
|
||
avg = sum(scores) / len(scores) if scores else 0
|
||
title_pattern_weights[pattern] = {
|
||
"weight": min(100, int((avg / max_score) * 100)) if max_score > 0 else 50,
|
||
"count": len(scores),
|
||
"avg_score": round(avg, 1),
|
||
}
|
||
|
||
# ---- 发布时间权重 ----
|
||
time_scores = defaultdict(list)
|
||
for note in scored_notes:
|
||
collected = note.get("collected_at", "")
|
||
if collected:
|
||
try:
|
||
dt = datetime.fromisoformat(collected)
|
||
hour_bucket = f"{(dt.hour // 3) * 3:02d}-{(dt.hour // 3) * 3 + 3:02d}时"
|
||
time_scores[hour_bucket].append(note["score"])
|
||
except Exception:
|
||
pass
|
||
|
||
time_weights = {}
|
||
for bucket, scores in time_scores.items():
|
||
avg = sum(scores) / len(scores) if scores else 0
|
||
time_weights[bucket] = {
|
||
"weight": min(100, int((avg / max_score) * 100)) if max_score > 0 else 50,
|
||
"count": len(scores),
|
||
}
|
||
|
||
# ---- 保存权重 ----
|
||
self._weights.update({
|
||
"topic_weights": dict(sorted(topic_weights.items(), key=lambda x: x[1]["weight"], reverse=True)),
|
||
"style_weights": dict(sorted(style_weights.items(), key=lambda x: x[1]["weight"], reverse=True)),
|
||
"tag_weights": tag_weights,
|
||
"title_pattern_weights": title_pattern_weights,
|
||
"time_weights": time_weights,
|
||
"last_updated": datetime.now().isoformat(),
|
||
"total_notes_analyzed": len(scored_notes),
|
||
"top_note": {
|
||
"title": scored_notes[0].get("title", ""),
|
||
"score": scored_notes[0].get("score", 0),
|
||
"likes": scored_notes[0].get("likes", 0),
|
||
} if scored_notes else {},
|
||
})
|
||
|
||
# 追加分析历史
|
||
history = self._weights.get("analysis_history", [])
|
||
history.append({
|
||
"time": datetime.now().isoformat(),
|
||
"total_notes": len(scored_notes),
|
||
"avg_score": round(sum(n["score"] for n in scored_notes) / len(scored_notes), 1),
|
||
"top_topic": list(topic_weights.keys())[0] if topic_weights else "",
|
||
})
|
||
# 只保留最近 50 条
|
||
self._weights["analysis_history"] = history[-50:]
|
||
self._save_weights()
|
||
|
||
return {
|
||
"total_notes": len(scored_notes),
|
||
"top_topics": list(topic_weights.items())[:10],
|
||
"top_styles": list(style_weights.items())[:5],
|
||
"top_tags": list(tag_weights.items())[:10],
|
||
"title_patterns": title_pattern_weights,
|
||
"top_note": scored_notes[0] if scored_notes else None,
|
||
}
|
||
|
||
# ========== 加权主题选择 ==========
|
||
|
||
def get_weighted_topic(self, base_topics: list[str] = None) -> str:
|
||
"""
|
||
根据权重从主题池中加权随机选择一个主题
|
||
如果没有权重数据, 退回均匀随机
|
||
"""
|
||
import random
|
||
|
||
topic_weights = self._weights.get("topic_weights", {})
|
||
if not topic_weights:
|
||
# 无权重数据,从基础池中随机
|
||
return random.choice(base_topics) if base_topics else "日常分享"
|
||
|
||
# 合并: 已有权重的主题 + base_topics 中新的主题
|
||
all_topics = {}
|
||
for topic, info in topic_weights.items():
|
||
all_topics[topic] = info.get("weight", 50)
|
||
|
||
if base_topics:
|
||
for t in base_topics:
|
||
if t not in all_topics:
|
||
all_topics[t] = 30 # 新主题给一个基础权重
|
||
|
||
# 加权随机选择
|
||
topics = list(all_topics.keys())
|
||
weights = [max(1, all_topics[t]) for t in topics] # 确保权重 >= 1
|
||
chosen = random.choices(topics, weights=weights, k=1)[0]
|
||
|
||
logger.info("加权选题: %s (权重: %s)", chosen, all_topics.get(chosen, "?"))
|
||
return chosen
|
||
|
||
def get_weighted_style(self, base_styles: list[str] = None) -> str:
|
||
"""根据权重选择风格"""
|
||
import random
|
||
|
||
style_weights = self._weights.get("style_weights", {})
|
||
if not style_weights:
|
||
return random.choice(base_styles) if base_styles else "真实分享"
|
||
|
||
all_styles = {}
|
||
for style, info in style_weights.items():
|
||
all_styles[style] = info.get("weight", 50)
|
||
|
||
if base_styles:
|
||
for s in base_styles:
|
||
if s not in all_styles:
|
||
all_styles[s] = 30
|
||
|
||
styles = list(all_styles.keys())
|
||
weights = [max(1, all_styles[s]) for s in styles]
|
||
return random.choices(styles, weights=weights, k=1)[0]
|
||
|
||
def get_top_tags(self, n: int = 8) -> list[str]:
|
||
"""获取权重最高的 N 个标签"""
|
||
tag_weights = self._weights.get("tag_weights", {})
|
||
if not tag_weights:
|
||
return []
|
||
sorted_tags = sorted(tag_weights.items(), key=lambda x: x[1].get("weight", 0), reverse=True)
|
||
return [t[0] for t in sorted_tags[:n]]
|
||
|
||
def get_title_advice(self) -> str:
|
||
"""根据标题模式权重生成建议"""
|
||
patterns = self._weights.get("title_pattern_weights", {})
|
||
if not patterns:
|
||
return "暂无标题分析数据"
|
||
|
||
sorted_p = sorted(patterns.items(), key=lambda x: x[1].get("weight", 0), reverse=True)
|
||
advice_parts = []
|
||
for p_name, p_info in sorted_p[:5]:
|
||
advice_parts.append(f" • {p_name}: 权重 {p_info['weight']}分 (出现{p_info['count']}次)")
|
||
return "\n".join(advice_parts)
|
||
|
||
# ========== LLM 深度分析 ==========
|
||
|
||
def generate_llm_analysis_prompt(self) -> str:
|
||
"""生成给 LLM 分析笔记表现的 prompt 数据部分"""
|
||
notes = self._analytics_data.get("notes", {})
|
||
if not notes:
|
||
return ""
|
||
|
||
# 按点赞排序
|
||
sorted_notes = sorted(notes.values(), key=lambda x: x.get("likes", 0), reverse=True)
|
||
|
||
lines = []
|
||
for i, note in enumerate(sorted_notes[:20]):
|
||
lines.append(
|
||
f"#{i+1} 「{note.get('title', '无标题')}」\n"
|
||
f" 点赞: {note.get('likes', 0)} | 主题: {note.get('topic', '未知')} | "
|
||
f"风格: {note.get('style', '未知')}\n"
|
||
f" 标签: {', '.join(note.get('tags', []))}"
|
||
)
|
||
return "\n".join(lines)
|
||
|
||
# ========== 报告生成 ==========
|
||
|
||
def generate_report(self) -> str:
|
||
"""生成 Markdown 格式的分析报告"""
|
||
weights = self._weights
|
||
notes = self._analytics_data.get("notes", {})
|
||
|
||
if not notes:
|
||
return "## 📊 暂无分析数据\n\n请先点击「采集数据」获取笔记表现数据,再点击「计算权重」。"
|
||
|
||
total = len(notes)
|
||
last_updated = weights.get("last_updated", "未知")
|
||
|
||
# Top Note
|
||
top_note = weights.get("top_note", {})
|
||
top_note_str = f"**{top_note.get('title', '')}** (❤️ {top_note.get('likes', 0)})" if top_note else "暂无"
|
||
|
||
lines = [
|
||
f"## 📊 智能内容学习报告",
|
||
f"",
|
||
f"🕐 最后更新: {last_updated[:19] if last_updated else '从未'}",
|
||
f"📝 分析笔记数: **{total}** 篇",
|
||
f"🏆 最佳笔记: {top_note_str}",
|
||
"",
|
||
"---",
|
||
"",
|
||
]
|
||
|
||
# 主题权重
|
||
topic_w = weights.get("topic_weights", {})
|
||
if topic_w:
|
||
lines.append("### 🎯 主题权重排行")
|
||
lines.append("| 排名 | 主题 | 权重 | 笔记数 | 平均得分 |")
|
||
lines.append("|:---:|------|:---:|:---:|:---:|")
|
||
for idx, (topic, info) in enumerate(list(topic_w.items())[:10]):
|
||
bar = "█" * (info["weight"] // 10) + "░" * (10 - info["weight"] // 10)
|
||
lines.append(
|
||
f"| {idx+1} | {topic} | {bar} {info['weight']} | {info['count']} | {info['avg_score']} |"
|
||
)
|
||
lines.append("")
|
||
|
||
# 风格权重
|
||
style_w = weights.get("style_weights", {})
|
||
if style_w:
|
||
lines.append("### 🎨 风格权重排行")
|
||
for style, info in list(style_w.items())[:5]:
|
||
bar = "█" * (info["weight"] // 10) + "░" * (10 - info["weight"] // 10)
|
||
lines.append(f"- **{style}**: {bar} {info['weight']}分 ({info['count']}篇)")
|
||
lines.append("")
|
||
|
||
# 标签权重
|
||
tag_w = weights.get("tag_weights", {})
|
||
if tag_w:
|
||
lines.append("### 🏷️ 高权重标签 (Top 10)")
|
||
top_tags = list(tag_w.items())[:10]
|
||
tag_strs = [f"`#{t}` ({info['weight']})" for t, info in top_tags]
|
||
lines.append(" | ".join(tag_strs))
|
||
lines.append("")
|
||
|
||
# 标题模式
|
||
title_p = weights.get("title_pattern_weights", {})
|
||
if title_p:
|
||
lines.append("### ✏️ 标题模式分析")
|
||
sorted_p = sorted(title_p.items(), key=lambda x: x[1].get("weight", 0), reverse=True)
|
||
for p_name, p_info in sorted_p[:6]:
|
||
lines.append(f"- **{p_name}**: 权重 {p_info['weight']} (出现 {p_info['count']} 次)")
|
||
lines.append("")
|
||
|
||
# 建议
|
||
lines.append("---")
|
||
lines.append("### 💡 智能建议")
|
||
if topic_w:
|
||
top_3 = list(topic_w.keys())[:3]
|
||
lines.append(f"- 📌 **高权重主题**: 优先创作 → {', '.join(top_3)}")
|
||
if tag_w:
|
||
hot_tags = [f"#{t}" for t in list(tag_w.keys())[:5]]
|
||
lines.append(f"- 🏷️ **推荐标签**: {' '.join(hot_tags)}")
|
||
if title_p:
|
||
best_pattern = max(title_p.items(), key=lambda x: x[1].get("weight", 0))
|
||
lines.append(f"- ✏️ **标题建议**: 多用「{best_pattern[0]}」(权重{best_pattern[1]['weight']})")
|
||
|
||
lines.append("")
|
||
lines.append(f"> 💡 启用「智能加权发布」后,自动发布将按权重倾斜生成高表现内容")
|
||
|
||
return "\n".join(lines)
|
||
|
||
def get_weighted_topics_display(self) -> str:
|
||
"""获取加权后的主题列表(用于UI显示)"""
|
||
topic_w = self._weights.get("topic_weights", {})
|
||
if not topic_w:
|
||
return ""
|
||
# 按权重排序,返回逗号分隔
|
||
sorted_topics = sorted(topic_w.items(), key=lambda x: x[1].get("weight", 0), reverse=True)
|
||
return ", ".join([t[0] for t in sorted_topics[:15]])
|
||
|
||
@property
|
||
def has_weights(self) -> bool:
|
||
"""是否已有权重数据"""
|
||
return bool(self._weights.get("topic_weights"))
|
||
|
||
@property
|
||
def weights_summary(self) -> str:
|
||
"""一行权重摘要"""
|
||
tw = self._weights.get("topic_weights", {})
|
||
total = self._weights.get("total_notes_analyzed", 0)
|
||
if not tw:
|
||
return "暂无权重数据"
|
||
top = list(tw.keys())[:3]
|
||
return f"{total}篇笔记 | 热门: {', '.join(top)}"
|