xhs_factory/services/queue_ops.py
zhoujie 4d83c0f4a9
Some checks failed
CI / Lint (ruff) (push) Has been cancelled
CI / Import Check (push) Has been cancelled
feat(scheduler): 新增热点自动采集功能并优化发布路径
- 新增热点自动采集后台线程,支持定时搜索关键词并执行 AI 分析,结果缓存至结构化状态
- 新增热点分析状态管理接口,提供线程安全的 `get_last_analysis` 和 `set_last_analysis` 方法
- 新增热点数据桥接函数 `feed_hotspot_to_engine`,将分析结果注入 TopicEngine 实现热点加权推荐
- 新增热点选题下拉组件,分析完成后自动填充推荐选题,选中后自动写入选题输入框
- 优化 `generate_from_hotspot` 函数,自动获取结构化分析摘要并增强生成上下文
- 新增热点自动采集配置节点,支持通过 `config.json` 管理关键词和采集间隔

♻️ refactor(queue): 实现智能排期引擎并统一发布路径

- 新增智能排期引擎,基于 `AnalyticsService` 的 `time_weights` 自动计算最优发布时段
- 新增 `PublishQueue.suggest_schedule_time` 和 `auto_schedule_item` 方法,支持时段冲突检测和内容分布控制
- 修改 `generate_to_queue` 函数,新增 `auto_schedule` 和 `auto_approve` 参数,支持自动排期和自动审核
- 重构 `_scheduler_loop` 的自动发布分支,改为调用 `generate_to_queue` 通过队列发布,统一发布路径
- 重构 `auto_publish_once` 函数,移除直接发布逻辑,改为生成内容入队并返回队列信息
- 新增队列时段使用情况查询方法 `get_slot_usage`,支持 UI 热力图展示

📝 docs(openspec): 新增内容排期优化和热点探测优化规范文档

- 新增 `smart-schedule-engine` 规范,定义智能排期引擎的功能需求和场景
- 新增 `unified-publish-path` 规范,定义统一发布路径的改造方案
- 新增 `hotspot-analysis-state` 规范,定义热点分析状态存储的线程安全接口
- 新增 `hotspot-auto-collector` 规范,定义定时热点自动采集的任务流程
- 新增 `hotspot-engine-bridge` 规范,定义热点数据注入 TopicEngine 的桥接机制
- 新增 `hotspot-topic-selector` 规范,定义热点选题下拉组件的交互行为
- 更新 `services-queue`、`services-scheduler` 和 `services-hotspot` 规范,反映功能修改和新增参数

🔧 chore(config): 新增热点自动采集默认配置

- 在 `DEFAULT_CONFIG` 中新增 `hotspot_auto_collect` 配置节点,包含 `enabled`、`keywords` 和 `interval_hours` 字段
- 提供默认关键词列表 `["穿搭", "美妆", "好物"]` 和默认采集间隔 4 小时

🐛 fix(llm): 增强 JSON 解析容错能力

- 新增 `_try_fix_truncated_json` 方法,尝试修复被 token 限制截断的 JSON 输出
- 支持多种截断场景的自动补全,包括字符串值、数组和嵌套对象的截断修复
- 提高 LLM 分析热点等返回 JSON 的函数的稳定性

💄 style(ui): 优化队列管理和热点探测界面

- 在队列生成区域新增自动排期复选框,勾选后隐藏手动排期输入框
- 在日历视图旁新增推荐时段 Markdown 面板,展示各时段权重和建议热力图
- 在热点探测 Tab 新增推荐选题下拉组件,分析完成后动态填充选项
- 在热点探测 Tab 新增热点自动采集控制区域,支持启动、停止和配置采集参数
2026-02-28 22:22:27 +08:00

379 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
services/queue_ops.py
发布队列操作:生成入队、状态管理、发布控制
"""
import os
import re
import time
import random
import logging
from PIL import Image
from .config_manager import ConfigManager, OUTPUT_DIR
from .publish_queue import (
PublishQueue, QueuePublisher,
STATUS_DRAFT, STATUS_APPROVED, STATUS_SCHEDULED, STATUS_PUBLISHING,
STATUS_PUBLISHED, STATUS_FAILED, STATUS_REJECTED, STATUS_LABELS,
)
from .llm_service import LLMService
from .sd_service import SDService
from .mcp_client import get_mcp_client
from .connection import _get_llm_config
from .persona import DEFAULT_TOPICS, DEFAULT_STYLES, _resolve_persona
from .content import generate_copy, generate_images
from .rate_limiter import _increment_stat, _clear_error_streak
cfg = ConfigManager()
logger = logging.getLogger("autobot")
# 模块级依赖(通过 configure() 注入)
_pub_queue: "PublishQueue | None" = None
_queue_publisher: "QueuePublisher | None" = None
_analytics = None
_log_fn = None
def configure(pub_queue, queue_publisher, analytics_svc, log_fn=None):
"""从 main.py 初始化段注入队列和分析服务"""
global _pub_queue, _queue_publisher, _analytics, _log_fn
_pub_queue = pub_queue
_queue_publisher = queue_publisher
_analytics = analytics_svc
_log_fn = log_fn
# 注册发布回调(在依赖注入完成后)
_queue_publisher.set_publish_callback(_queue_publish_callback)
_queue_publisher.set_log_callback(_log_fn or _log)
def _log(msg: str):
if _log_fn:
_log_fn(msg)
else:
logger.info("[queue] %s", msg)
# ==================================================
# 发布队列相关函数
# ==================================================
def generate_to_queue(topics_str, sd_url_val, sd_model_name, model, persona_text=None,
quality_mode_val=None, face_swap_on=False, count=1,
scheduled_time=None, auto_schedule=False, auto_approve=False):
"""批量生成内容 → 加入发布队列(不直接发布)
Args:
auto_schedule: 为每篇内容自动分配最优排期时间
auto_approve: 入队后自动审核通过
"""
try:
topics = [t.strip() for t in topics_str.split(",") if t.strip()] if topics_str else DEFAULT_TOPICS
use_weights = cfg.get("use_smart_weights", True) and _analytics.has_weights
api_key, base_url, _ = _get_llm_config()
if not api_key:
return "❌ LLM 未配置"
if not sd_url_val or not sd_model_name:
return "❌ SD WebUI 未连接或未选择模型"
count = max(1, min(int(count), 10))
results = []
for i in range(count):
try:
_log(f"📋 [队列生成] 正在生成第 {i+1}/{count} 篇...")
if use_weights:
topic = _analytics.get_weighted_topic(topics)
style = _analytics.get_weighted_style(DEFAULT_STYLES)
else:
topic = random.choice(topics)
style = random.choice(DEFAULT_STYLES)
svc = LLMService(api_key, base_url, model)
persona = _resolve_persona(persona_text) if persona_text else None
if use_weights:
weight_insights = f"高权重主题: {', '.join(list(_analytics._weights.get('topic_weights', {}).keys())[:5])}\n"
weight_insights += f"权重摘要: {_analytics.weights_summary}"
title_advice = _analytics.get_title_advice()
hot_tags = ", ".join(_analytics.get_top_tags(8))
try:
data = svc.generate_weighted_copy(topic, style, weight_insights, title_advice, hot_tags, sd_model_name=sd_model_name, persona=persona)
except Exception:
data = svc.generate_copy(topic, style, sd_model_name=sd_model_name, persona=persona)
else:
data = svc.generate_copy(topic, style, sd_model_name=sd_model_name, persona=persona)
title = (data.get("title", "") or "")[:20]
content = data.get("content", "")
sd_prompt = data.get("sd_prompt", "")
tags = data.get("tags", [])
if use_weights:
top_tags = _analytics.get_top_tags(5)
for t in top_tags:
if t not in tags:
tags.append(t)
tags = tags[:10]
if not title:
_log(f"⚠️ 第 {i+1} 篇文案生成失败,跳过")
continue
# 生成图片
sd_svc = SDService(sd_url_val)
face_image = None
if face_swap_on:
face_image = SDService.load_face_image()
images = sd_svc.txt2img(prompt=sd_prompt, model=sd_model_name,
face_image=face_image,
quality_mode=quality_mode_val or "快速 (约30秒)",
persona=persona)
if not images:
_log(f"⚠️ 第 {i+1} 篇图片生成失败,跳过")
continue
# 保存备份
ts = int(time.time())
safe_title = re.sub(r'[\\/*?:"<>|]', "", title)[:20]
backup_dir = os.path.join(OUTPUT_DIR, f"{ts}_{safe_title}")
os.makedirs(backup_dir, exist_ok=True)
with open(os.path.join(backup_dir, "文案.txt"), "w", encoding="utf-8") as f:
f.write(f"标题: {title}\n风格: {style}\n主题: {topic}\n\n{content}\n\n标签: {', '.join(tags)}\n\nSD Prompt: {sd_prompt}")
image_paths = []
for idx, img in enumerate(images):
if isinstance(img, Image.Image):
path = os.path.abspath(os.path.join(backup_dir, f"{idx+1}.jpg"))
if img.mode != "RGB":
img = img.convert("RGB")
img.save(path, format="JPEG", quality=95)
image_paths.append(path)
if not image_paths:
continue
# 加入队列
item_id = _pub_queue.add(
title=title, content=content, sd_prompt=sd_prompt,
tags=tags, image_paths=image_paths, backup_dir=backup_dir,
topic=topic, style=style, persona=persona or "",
status=STATUS_DRAFT, scheduled_time=scheduled_time,
)
# 自动排期
sched_msg = ""
if auto_schedule and _analytics:
ok = _pub_queue.auto_schedule_item(item_id, _analytics)
if ok:
item = _pub_queue.get(item_id)
sched_msg = f"{item['scheduled_time'][:16]}" if item else ""
_log(f"🕐 #{item_id} 自动排期{sched_msg}")
# 自动审核
if auto_approve:
_pub_queue.approve(item_id)
_log(f"✅ #{item_id} 自动审核通过")
results.append(f"#{item_id} {title}{sched_msg}")
_log(f"📋 已加入队列 #{item_id}: {title}")
# 多篇间隔
if i < count - 1:
time.sleep(2)
except Exception as e:
_log(f"⚠️ 第 {i+1} 篇生成异常: {e}")
continue
if not results:
return "❌ 所有内容生成失败,请检查配置"
return f"✅ 已生成 {len(results)} 篇内容加入队列:\n" + "\n".join(f" - {r}" for r in results)
except Exception as e:
return f"❌ 批量生成异常: {e}"
def _queue_publish_callback(item: dict) -> tuple[bool, str]:
"""队列发布回调: 从队列项数据发布到小红书"""
try:
mcp_url = cfg.get("mcp_url", "http://localhost:18060/mcp")
client = get_mcp_client(mcp_url)
title = item.get("title", "")
content = item.get("content", "")
image_paths = item.get("image_paths", [])
tags = item.get("tags", [])
if not title or not image_paths:
return False, "标题或图片缺失"
# 验证图片文件存在
valid_paths = [p for p in image_paths if os.path.isfile(p)]
if not valid_paths:
return False, "所有图片文件不存在"
result = client.publish_content(
title=title, content=content, images=valid_paths, tags=tags,
)
if "error" in result:
return False, result["error"]
_increment_stat("publishes")
_clear_error_streak()
return True, result.get("text", "发布成功")
except Exception as e:
return False, str(e)
def queue_format_table():
"""返回当前队列的完整表格(不过滤)"""
return _pub_queue.format_queue_table() if _pub_queue else ""
def queue_format_calendar():
"""返回未来14天的日历视图"""
return _pub_queue.format_calendar(14) if _pub_queue else ""
def queue_refresh_table(status_filter):
"""刷新队列表格"""
statuses = None
if status_filter and status_filter != "全部":
status_map = {v: k for k, v in STATUS_LABELS.items()}
if status_filter in status_map:
statuses = [status_map[status_filter]]
return _pub_queue.format_queue_table(statuses)
def queue_refresh_calendar():
"""刷新日历视图"""
return _pub_queue.format_calendar(14)
def queue_preview_item(item_id_str):
"""预览队列项"""
try:
item_id = int(str(item_id_str).strip().replace("#", ""))
return _pub_queue.format_preview(item_id)
except (ValueError, TypeError):
return "❌ 请输入有效的队列项 ID数字"
def queue_approve_item(item_id_str, scheduled_time_str):
"""审核通过"""
try:
item_id = int(str(item_id_str).strip().replace("#", ""))
sched = scheduled_time_str.strip() if scheduled_time_str else None
ok = _pub_queue.approve(item_id, scheduled_time=sched)
if ok:
status = "已排期" if sched else "待发布"
return f"✅ #{item_id} 已审核通过 → {status}"
return f"❌ #{item_id} 无法审核(可能不是草稿/失败状态)"
except (ValueError, TypeError):
return "❌ 请输入有效的 ID"
def queue_reject_item(item_id_str):
"""拒绝队列项"""
try:
item_id = int(str(item_id_str).strip().replace("#", ""))
ok = _pub_queue.reject(item_id)
return f"✅ #{item_id} 已拒绝" if ok else f"❌ #{item_id} 无法拒绝"
except (ValueError, TypeError):
return "❌ 请输入有效的 ID"
def queue_delete_item(item_id_str):
"""删除队列项"""
try:
item_id = int(str(item_id_str).strip().replace("#", ""))
ok = _pub_queue.delete(item_id)
return f"✅ #{item_id} 已删除" if ok else f"❌ #{item_id} 无法删除(可能正在发布中)"
except (ValueError, TypeError):
return "❌ 请输入有效的 ID"
def queue_retry_item(item_id_str):
"""重试失败项"""
try:
item_id = int(str(item_id_str).strip().replace("#", ""))
ok = _pub_queue.retry(item_id)
return f"✅ #{item_id} 已重新加入待发布" if ok else f"❌ #{item_id} 无法重试(不是失败状态)"
except (ValueError, TypeError):
return "❌ 请输入有效的 ID"
def queue_publish_now(item_id_str):
"""立即发布队列项"""
try:
item_id = int(str(item_id_str).strip().replace("#", ""))
return _queue_publisher.publish_now(item_id)
except (ValueError, TypeError):
return "❌ 请输入有效的 ID"
def queue_start_processor():
"""启动队列后台处理器"""
if _queue_publisher.is_running:
return "⚠️ 队列处理器已在运行中"
_queue_publisher.start(check_interval=60)
return "✅ 队列处理器已启动,每分钟检查待发布项"
def queue_stop_processor():
"""停止队列后台处理器"""
if not _queue_publisher.is_running:
return "⚠️ 队列处理器未在运行"
_queue_publisher.stop()
return "🛑 队列处理器已停止"
def queue_get_status():
"""获取队列状态摘要"""
counts = _pub_queue.count_by_status()
running = "🟢 运行中" if _queue_publisher.is_running else "⚪ 未启动"
parts = [f"**队列处理器**: {running}"]
for s, label in STATUS_LABELS.items():
cnt = counts.get(s, 0)
if cnt > 0:
parts.append(f"{label}: {cnt}")
total = sum(counts.values())
parts.append(f"**合计**: {total}")
return " · ".join(parts)
def queue_batch_approve(status_filter):
"""批量审核通过所有草稿"""
items = _pub_queue.list_by_status([STATUS_DRAFT])
if not items:
return "📭 没有待审核的草稿"
approved = 0
for item in items:
if _pub_queue.approve(item["id"]):
approved += 1
return f"✅ 已批量审核通过 {approved}"
def queue_generate_and_refresh(topics_str, sd_url_val, sd_model_name, model,
persona_text, quality_mode_val, face_swap_on,
gen_count, gen_schedule_time, auto_schedule=False):
"""生成内容到队列 + 刷新表格"""
msg = generate_to_queue(
topics_str, sd_url_val, sd_model_name, model,
persona_text=persona_text, quality_mode_val=quality_mode_val,
face_swap_on=face_swap_on, count=gen_count,
scheduled_time=gen_schedule_time.strip() if gen_schedule_time else None,
auto_schedule=auto_schedule,
)
table = _pub_queue.format_queue_table()
calendar = _pub_queue.format_calendar(14)
status = queue_get_status()
return msg, table, calendar, status
# 调度器下次执行时间追踪