- 新增智能选题引擎 `TopicEngine`,整合热点数据与历史权重,提供多维度评分和创作角度建议 - 新增内容模板系统 `ContentTemplate`,支持从 JSON 文件加载模板并应用于文案生成 - 新增批量创作功能 `batch_generate_copy`,支持串行生成多篇文案并自动入草稿队列 - 升级文案质量流水线:实现 Prompt 分层架构(基础层 + 风格层 + 人设层)、LLM 自检与改写机制、深度去 AI 化后处理 - 优化图文协同:新增封面图策略选择、SD prompt 与文案语义联动、图文匹配度评估 - 集成数据闭环:在文案生成中自动注入 `AnalyticsService` 权重数据,实现发布 → 数据回收 → 优化创作的完整循环 - 更新 UI 组件:新增选题推荐展示区、批量创作折叠面板、封面图策略选择器和图文匹配度评分展示 ♻️ refactor(llm): 重构 Prompt 架构并增强去 AI 化处理 - 将 `PROMPT_COPYWRITING` 拆分为分层架构(基础层 + 风格层 + 人设层),提高维护性和灵活性 - 增强 `_humanize_content` 方法:新增语气词注入、标点不规范化、段落节奏打散和 emoji 密度控制 - 新增 `_self_check` 和 `_self_check_rewrite` 方法,实现文案 AI 痕迹自检与自动改写 - 新增 `evaluate_image_text_match` 方法,支持文案与 SD prompt 的语义匹配度评估(可选,失败不阻塞) - 新增封面图策略配置 `COVER_STRATEGIES` 和情感基调映射 `EMOTION_SD_MAP` 📝 docs(openspec): 归档内容创作优化提案和详细规格 - 新增 `openspec/changes/archive/2026-02-28-optimize-content-creation/` 目录,包含设计文档、提案、规格说明和任务清单 - 新增 `openspec/specs/` 下的批量创作、文案质量流水线、图文协同、服务内容和智能选题引擎规格文档 - 更新 `openspec/specs/services-content/spec.md`,反映新增的批量创作和智能选题入口函数 🔧 chore(config): 更新服务配置和 UI 集成 - 在 `services/content.py` 中集成权重数据自动注入逻辑,实现数据驱动创作 - 在 `ui/app.py` 中新增选题推荐、批量生成和图文匹配度评估的回调函数 - 在 `ui/tab_create.py` 中新增智能选题推荐区、批量创作面板和图文匹配度评估组件 - 修复 `services/sd_service.py` 中的头像文件路径问题,确保目录存在
463 lines
15 KiB
Python
463 lines
15 KiB
Python
"""
|
||
services/topic_engine.py
|
||
智能选题引擎 — 聚合热点数据 + 历史权重,推荐高潜力选题
|
||
"""
|
||
import logging
|
||
import os
|
||
import json
|
||
import re
|
||
from datetime import datetime, timedelta
|
||
|
||
logger = logging.getLogger("autobot")
|
||
|
||
|
||
class TopicEngine:
|
||
"""
|
||
智能选题推荐引擎
|
||
|
||
职责: 聚合热点探测结果与历史互动权重,为用户推荐高潜力选题。
|
||
不直接访问 MCP / LLM,通过注入的 AnalyticsService 获取数据。
|
||
"""
|
||
|
||
def __init__(self, analytics_service):
|
||
"""
|
||
Args:
|
||
analytics_service: AnalyticsService 实例,提供权重和笔记数据
|
||
"""
|
||
self.analytics = analytics_service
|
||
|
||
# ========== 核心: 多维度评分 ==========
|
||
|
||
def score_topic(self, topic: str, hotspot_data: dict = None) -> dict:
|
||
"""
|
||
为单个候选主题计算综合评分
|
||
|
||
维度:
|
||
- hotspot_score (0-40): 热点热度
|
||
- weight_score (0-30): 历史互动权重
|
||
- scarcity_score(0-20): 内容稀缺度
|
||
- timeliness_score(0-10): 时效性
|
||
|
||
Args:
|
||
topic: 候选主题文本
|
||
hotspot_data: 可选的热点分析数据(包含 hot_topics, suggestions 等)
|
||
|
||
Returns:
|
||
dict with total_score, hotspot_score, weight_score, scarcity_score, timeliness_score
|
||
"""
|
||
hotspot_score = self._calc_hotspot_score(topic, hotspot_data)
|
||
weight_score = self._calc_weight_score(topic)
|
||
scarcity_score = self._calc_scarcity_score(topic)
|
||
timeliness_score = self._calc_timeliness_score(topic)
|
||
|
||
total = hotspot_score + weight_score + scarcity_score + timeliness_score
|
||
|
||
return {
|
||
"total_score": total,
|
||
"hotspot_score": hotspot_score,
|
||
"weight_score": weight_score,
|
||
"scarcity_score": scarcity_score,
|
||
"timeliness_score": timeliness_score,
|
||
}
|
||
|
||
# ========== 推荐主题列表 ==========
|
||
|
||
def recommend_topics(self, count: int = 5, hotspot_data: dict = None) -> list[dict]:
|
||
"""
|
||
推荐排序后的选题列表
|
||
|
||
逻辑:
|
||
1. 收集候选主题 (热点 + 权重主题)
|
||
2. 对每个主题评分
|
||
3. 去重 (语义相近合并)
|
||
4. 按总分降序取 top-N
|
||
5. 为每个主题生成创作角度建议
|
||
|
||
Args:
|
||
count: 返回推荐数量 (默认 5)
|
||
hotspot_data: 可选的热点分析数据
|
||
|
||
Returns:
|
||
list of dict, 每项包含:
|
||
topic, score, reason, source, angles,
|
||
score_detail (各维度分数)
|
||
"""
|
||
candidates = self._collect_candidates(hotspot_data)
|
||
|
||
if not candidates:
|
||
logger.warning("选题引擎: 无候选主题可推荐")
|
||
return []
|
||
|
||
# 评分
|
||
scored = []
|
||
for topic, source in candidates:
|
||
detail = self.score_topic(topic, hotspot_data)
|
||
scored.append({
|
||
"topic": topic,
|
||
"score": detail["total_score"],
|
||
"source": source,
|
||
"score_detail": detail,
|
||
})
|
||
|
||
# 去重
|
||
scored = self._deduplicate(scored)
|
||
|
||
# 排序
|
||
scored.sort(key=lambda x: x["score"], reverse=True)
|
||
scored = scored[:count]
|
||
|
||
# 生成 reason 和 angles
|
||
for item in scored:
|
||
item["reason"] = self._generate_reason(item)
|
||
item["angles"] = self._generate_angles(item["topic"], item["source"])
|
||
|
||
return scored
|
||
|
||
# ========== 候选收集 ==========
|
||
|
||
def _collect_candidates(self, hotspot_data: dict = None) -> list[tuple[str, str]]:
|
||
"""
|
||
收集所有候选主题,返回 [(topic, source), ...]
|
||
|
||
source: "hotspot" | "weight" | "trend"
|
||
"""
|
||
candidates = []
|
||
seen = set()
|
||
|
||
# 1. 从热点数据收集
|
||
if hotspot_data:
|
||
for topic in hotspot_data.get("hot_topics", []):
|
||
topic_clean = self._clean_topic(topic)
|
||
if topic_clean and topic_clean not in seen:
|
||
candidates.append((topic_clean, "hotspot"))
|
||
seen.add(topic_clean)
|
||
|
||
for suggestion in hotspot_data.get("suggestions", []):
|
||
topic_clean = self._clean_topic(suggestion.get("topic", ""))
|
||
if topic_clean and topic_clean not in seen:
|
||
candidates.append((topic_clean, "hotspot"))
|
||
seen.add(topic_clean)
|
||
|
||
# 2. 从权重数据收集
|
||
topic_weights = self.analytics._weights.get("topic_weights", {})
|
||
for topic, info in topic_weights.items():
|
||
topic_clean = self._clean_topic(topic)
|
||
if topic_clean and topic_clean not in seen:
|
||
candidates.append((topic_clean, "weight"))
|
||
seen.add(topic_clean)
|
||
|
||
# 3. 从分析历史提取趋势主题
|
||
history = self.analytics._weights.get("analysis_history", [])
|
||
for entry in history[-5:]:
|
||
top_topic = entry.get("top_topic", "")
|
||
if top_topic and top_topic not in seen:
|
||
candidates.append((top_topic, "trend"))
|
||
seen.add(top_topic)
|
||
|
||
return candidates
|
||
|
||
# ========== 评分子模块 ==========
|
||
|
||
def _calc_hotspot_score(self, topic: str, hotspot_data: dict = None) -> int:
|
||
"""热点热度评分 (0-40)"""
|
||
if not hotspot_data:
|
||
return 0
|
||
|
||
score = 0
|
||
|
||
# 检查是否在热门主题中
|
||
hot_topics = hotspot_data.get("hot_topics", [])
|
||
for i, ht in enumerate(hot_topics):
|
||
if self._topic_similar(topic, ht):
|
||
# 排名越靠前分越高
|
||
score = max(score, 40 - i * 5)
|
||
break
|
||
|
||
# 检查是否在推荐建议中
|
||
suggestions = hotspot_data.get("suggestions", [])
|
||
for suggestion in suggestions:
|
||
if self._topic_similar(topic, suggestion.get("topic", "")):
|
||
score = max(score, 30)
|
||
break
|
||
|
||
return min(40, score)
|
||
|
||
def _calc_weight_score(self, topic: str) -> int:
|
||
"""历史互动权重评分 (0-30)"""
|
||
topic_weights = self.analytics._weights.get("topic_weights", {})
|
||
if not topic_weights:
|
||
return 0
|
||
|
||
# 精确匹配
|
||
if topic in topic_weights:
|
||
weight = topic_weights[topic].get("weight", 0)
|
||
# weight 原始范围 0-100,映射到 0-30
|
||
return min(30, int(weight * 0.3))
|
||
|
||
# 模糊匹配
|
||
best_score = 0
|
||
for existing_topic, info in topic_weights.items():
|
||
if self._topic_similar(topic, existing_topic):
|
||
weight = info.get("weight", 0)
|
||
best_score = max(best_score, min(30, int(weight * 0.3)))
|
||
|
||
return best_score
|
||
|
||
def _calc_scarcity_score(self, topic: str) -> int:
|
||
"""
|
||
内容稀缺度评分 (0-20)
|
||
|
||
近 7 天已发布 >= 2 篇的主题: scarcity_score <= 5
|
||
"""
|
||
notes = self.analytics._analytics_data.get("notes", {})
|
||
seven_days_ago = (datetime.now() - timedelta(days=7)).isoformat()
|
||
|
||
recent_count = 0
|
||
for nid, note in notes.items():
|
||
collected = note.get("collected_at", "")
|
||
if collected >= seven_days_ago:
|
||
note_topic = note.get("topic", "")
|
||
if self._topic_similar(topic, note_topic):
|
||
recent_count += 1
|
||
|
||
if recent_count >= 2:
|
||
return min(5, max(0, 5 - recent_count)) # 发的越多越低
|
||
elif recent_count == 1:
|
||
return 12 # 有一篇,中等稀缺
|
||
else:
|
||
return 20 # 完全空白,高稀缺
|
||
|
||
def _calc_timeliness_score(self, topic: str) -> int:
|
||
"""
|
||
时效性评分 (0-10)
|
||
|
||
基于主题是否包含时效性关键词(季节、节日等)
|
||
"""
|
||
now = datetime.now()
|
||
month = now.month
|
||
|
||
# 季节关键词
|
||
season_keywords = {
|
||
"春": [2, 3, 4, 5],
|
||
"夏": [5, 6, 7, 8],
|
||
"秋": [8, 9, 10, 11],
|
||
"冬": [11, 12, 1, 2],
|
||
"早春": [2, 3],
|
||
"初夏": [5, 6],
|
||
"初秋": [8, 9],
|
||
}
|
||
|
||
# 节日关键词
|
||
festival_windows = {
|
||
"情人节": (2, 10, 2, 18),
|
||
"三八": (3, 1, 3, 12),
|
||
"妇女节": (3, 1, 3, 12),
|
||
"母亲节": (5, 5, 5, 15),
|
||
"618": (6, 1, 6, 20),
|
||
"七夕": (7, 20, 8, 15),
|
||
"中秋": (9, 1, 9, 30),
|
||
"国庆": (9, 25, 10, 10),
|
||
"双十一": (10, 20, 11, 15),
|
||
"双11": (10, 20, 11, 15),
|
||
"双十二": (12, 1, 12, 15),
|
||
"圣诞": (12, 15, 12, 28),
|
||
"元旦": (12, 25, 1, 5),
|
||
"年货": (1, 5, 2, 10),
|
||
"春节": (1, 10, 2, 10),
|
||
"开学": (8, 20, 9, 15),
|
||
}
|
||
|
||
score = 5 # 基础分
|
||
|
||
# 季节匹配
|
||
for keyword, months in season_keywords.items():
|
||
if keyword in topic and month in months:
|
||
score = max(score, 8)
|
||
break
|
||
|
||
# 节日窗口匹配
|
||
for keyword, (m1, d1, m2, d2) in festival_windows.items():
|
||
if keyword in topic:
|
||
start = datetime(now.year, m1, d1)
|
||
end = datetime(now.year, m2, d2)
|
||
# 处理跨年
|
||
if start > end:
|
||
if now >= start or now <= end:
|
||
score = 10
|
||
break
|
||
elif start <= now <= end:
|
||
score = 10
|
||
break
|
||
else:
|
||
score = max(score, 3) # 不在窗口期但有时效关键词
|
||
|
||
return score
|
||
|
||
# ========== 去重 ==========
|
||
|
||
def _deduplicate(self, scored: list[dict]) -> list[dict]:
|
||
"""
|
||
去重: 语义相近的主题合并,保留分数较高者
|
||
|
||
例: "春季穿搭" 和 "早春穿搭" 合并为高分项
|
||
"""
|
||
if len(scored) <= 1:
|
||
return scored
|
||
|
||
result = []
|
||
merged_indices = set()
|
||
|
||
for i in range(len(scored)):
|
||
if i in merged_indices:
|
||
continue
|
||
best = scored[i]
|
||
for j in range(i + 1, len(scored)):
|
||
if j in merged_indices:
|
||
continue
|
||
if self._topic_similar(scored[i]["topic"], scored[j]["topic"]):
|
||
merged_indices.add(j)
|
||
if scored[j]["score"] > best["score"]:
|
||
best = scored[j]
|
||
result.append(best)
|
||
|
||
return result
|
||
|
||
# ========== 辅助方法 ==========
|
||
|
||
@staticmethod
|
||
def _clean_topic(topic: str) -> str:
|
||
"""清理主题文本"""
|
||
if not topic:
|
||
return ""
|
||
# 去除序号、emoji、多余空格
|
||
t = re.sub(r'^[\d.、)\]】]+\s*', '', topic.strip())
|
||
t = re.sub(r'[•·●]', '', t)
|
||
return t.strip()
|
||
|
||
@staticmethod
|
||
def _topic_similar(a: str, b: str) -> bool:
|
||
"""
|
||
判断两个主题是否语义相近 (简单规则匹配)
|
||
|
||
策略:
|
||
1. 完全相同 → True
|
||
2. 一方包含另一方 → True
|
||
3. 去除修饰词后相同 → True
|
||
4. 共享核心词比例 > 60% → True
|
||
"""
|
||
if not a or not b:
|
||
return False
|
||
|
||
a_clean = a.strip().lower()
|
||
b_clean = b.strip().lower()
|
||
|
||
# 完全相同
|
||
if a_clean == b_clean:
|
||
return True
|
||
|
||
# 包含关系
|
||
if a_clean in b_clean or b_clean in a_clean:
|
||
return True
|
||
|
||
# 去修饰词
|
||
modifiers = ["早", "初", "晚", "新", "最", "超", "巨", "真的", "必看"]
|
||
a_core = a_clean
|
||
b_core = b_clean
|
||
for mod in modifiers:
|
||
a_core = a_core.replace(mod, "")
|
||
b_core = b_core.replace(mod, "")
|
||
if a_core and b_core and a_core == b_core:
|
||
return True
|
||
|
||
# 核心词重叠
|
||
# 按字分词 (中文简单分词)
|
||
a_chars = set(a_clean)
|
||
b_chars = set(b_clean)
|
||
if len(a_chars) >= 2 and len(b_chars) >= 2:
|
||
intersection = a_chars & b_chars
|
||
union = a_chars | b_chars
|
||
if len(intersection) / len(union) > 0.6:
|
||
return True
|
||
|
||
return False
|
||
|
||
@staticmethod
|
||
def _generate_reason(item: dict) -> str:
|
||
"""根据评分生成推荐理由"""
|
||
detail = item.get("score_detail", {})
|
||
parts = []
|
||
|
||
if detail.get("hotspot_score", 0) >= 25:
|
||
parts.append("当前热点话题")
|
||
if detail.get("weight_score", 0) >= 15:
|
||
parts.append("历史互动表现好")
|
||
if detail.get("scarcity_score", 0) >= 15:
|
||
parts.append("内容空白可抢占")
|
||
if detail.get("timeliness_score", 0) >= 8:
|
||
parts.append("时效性强")
|
||
|
||
source = item.get("source", "")
|
||
if source == "hotspot" and not parts:
|
||
parts.append("热点趋势推荐")
|
||
elif source == "weight" and not parts:
|
||
parts.append("基于历史表现推荐")
|
||
elif source == "trend" and not parts:
|
||
parts.append("持续趋势主题")
|
||
|
||
if not parts:
|
||
parts.append("综合推荐")
|
||
|
||
return ",".join(parts)
|
||
|
||
@staticmethod
|
||
def _generate_angles(topic: str, source: str) -> list[str]:
|
||
"""
|
||
为主题生成 1-3 个创作角度建议
|
||
|
||
注意: 这里用规则生成,不调用 LLM
|
||
"""
|
||
angles = []
|
||
|
||
# 通用角度模板
|
||
templates_by_type = {
|
||
"穿搭": [
|
||
f"从预算角度分享{topic}的平替选择",
|
||
f"身材不同如何驾驭{topic}",
|
||
f"一周{topic}不重样的实穿记录",
|
||
],
|
||
"美食": [
|
||
f"零失败的{topic}详细做法",
|
||
f"外卖 vs 自己做{topic}的对比",
|
||
f"{topic}的隐藏吃法",
|
||
],
|
||
"护肤": [
|
||
f"不同肤质的{topic}选择指南",
|
||
f"踩雷vs回购:{topic}真实体验",
|
||
f"平价替代大牌{topic}推荐",
|
||
],
|
||
"好物": [
|
||
f"用了半年的{topic}真实测评",
|
||
f"后悔没早买的{topic}清单",
|
||
f"从使用场景出发推荐{topic}",
|
||
],
|
||
}
|
||
|
||
# 根据主题关键词匹配模板
|
||
matched = False
|
||
for keyword, templates in templates_by_type.items():
|
||
if keyword in topic:
|
||
angles = templates[:3]
|
||
matched = True
|
||
break
|
||
|
||
if not matched:
|
||
# 通用角度
|
||
angles = [
|
||
f"个人真实体验分享{topic}",
|
||
f"新手入门{topic}的详细攻略",
|
||
f"关于{topic}的冷知识和避坑指南",
|
||
]
|
||
|
||
# 限制每个角度不超过 30 字
|
||
return [a[:30] for a in angles]
|