xhs_factory/publish_queue.py
zhoujie 5ee26cb782 feat(publish): 新增内容发布队列与排期系统
- 新增发布队列模块 `publish_queue.py`,实现基于 SQLite 的持久化队列管理
- 支持内容草稿、审核、排期、自动发布、失败重试等全流程状态管理
- 新增批量生成到队列功能,支持智能权重选择和主题池随机选取
- 新增队列后台处理器,支持定时检查和自动发布已排期内容
- 新增内容排期日历视图,可视化展示近期的发布计划
- 新增队列管理界面,包含状态筛选、单项操作和批量审核功能
- 新增发布回调机制,将队列项数据发布到小红书平台
- 新增配置项 `use_smart_weights` 控制是否启用智能权重生成
- 更新主界面,新增第八个标签页“内容排期”,集成所有队列相关功能
2026-02-10 21:57:26 +08:00

584 lines
23 KiB
Python

"""
发布队列模块
SQLite 持久化的内容排期 + 发布队列,支持草稿预审、定时发布、失败重试
"""
import sqlite3
import json
import os
import time
import logging
import threading
from datetime import datetime, timedelta
from typing import Optional
logger = logging.getLogger(__name__)
# 队列项状态
STATUS_DRAFT = "draft" # 草稿 — 待审核
STATUS_APPROVED = "approved" # 已审核 — 待排期或立即可发布
STATUS_SCHEDULED = "scheduled" # 已排期 — 定时发布
STATUS_PUBLISHING = "publishing" # 发布中
STATUS_PUBLISHED = "published" # 已发布
STATUS_FAILED = "failed" # 发布失败
STATUS_REJECTED = "rejected" # 已拒绝/丢弃
ALL_STATUSES = [STATUS_DRAFT, STATUS_APPROVED, STATUS_SCHEDULED,
STATUS_PUBLISHING, STATUS_PUBLISHED, STATUS_FAILED, STATUS_REJECTED]
STATUS_LABELS = {
STATUS_DRAFT: "📝 草稿",
STATUS_APPROVED: "✅ 待发布",
STATUS_SCHEDULED: "🕐 已排期",
STATUS_PUBLISHING: "🚀 发布中",
STATUS_PUBLISHED: "✅ 已发布",
STATUS_FAILED: "❌ 失败",
STATUS_REJECTED: "🚫 已拒绝",
}
MAX_RETRIES = 2
class PublishQueue:
"""发布队列管理器 (SQLite 持久化)"""
def __init__(self, workspace_dir: str):
self.db_path = os.path.join(workspace_dir, "publish_queue.db")
os.makedirs(workspace_dir, exist_ok=True)
self._init_db()
# 发布中状态恢复 (启动时把 publishing → failed)
self._recover_stale()
def _get_conn(self) -> sqlite3.Connection:
conn = sqlite3.connect(self.db_path, timeout=10)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
return conn
def _init_db(self):
"""初始化数据库表"""
conn = self._get_conn()
try:
conn.execute("""
CREATE TABLE IF NOT EXISTS queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
content TEXT NOT NULL DEFAULT '',
sd_prompt TEXT DEFAULT '',
tags TEXT DEFAULT '[]',
image_paths TEXT DEFAULT '[]',
backup_dir TEXT DEFAULT '',
status TEXT NOT NULL DEFAULT 'draft',
scheduled_time TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
published_at TEXT,
topic TEXT DEFAULT '',
style TEXT DEFAULT '',
persona TEXT DEFAULT '',
error_message TEXT DEFAULT '',
retry_count INTEGER DEFAULT 0,
publish_result TEXT DEFAULT ''
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_queue_status ON queue(status)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_queue_scheduled ON queue(scheduled_time)
""")
conn.commit()
finally:
conn.close()
def _recover_stale(self):
"""启动时将残留的 publishing 状态恢复为 failed"""
conn = self._get_conn()
try:
conn.execute(
"UPDATE queue SET status = ?, error_message = '程序重启,发布中断' "
"WHERE status = ?",
(STATUS_FAILED, STATUS_PUBLISHING),
)
conn.commit()
finally:
conn.close()
# ---------- CRUD ----------
def add(self, title: str, content: str, sd_prompt: str = "",
tags: list = None, image_paths: list = None,
backup_dir: str = "", topic: str = "", style: str = "",
persona: str = "", status: str = STATUS_DRAFT,
scheduled_time: str = None) -> int:
"""添加一个队列项,返回 ID"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conn = self._get_conn()
try:
cur = conn.execute(
"""INSERT INTO queue (title, content, sd_prompt, tags, image_paths,
backup_dir, status, scheduled_time, created_at, updated_at,
topic, style, persona)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(title, content, sd_prompt,
json.dumps(tags or [], ensure_ascii=False),
json.dumps(image_paths or [], ensure_ascii=False),
backup_dir, status, scheduled_time, now, now,
topic, style, persona),
)
conn.commit()
item_id = cur.lastrowid
logger.info("📋 队列添加 #%d: %s [%s]", item_id, title[:20], status)
return item_id
finally:
conn.close()
def get(self, item_id: int) -> Optional[dict]:
"""获取单个队列项"""
conn = self._get_conn()
try:
row = conn.execute("SELECT * FROM queue WHERE id = ?", (item_id,)).fetchone()
return self._row_to_dict(row) if row else None
finally:
conn.close()
def update_status(self, item_id: int, status: str,
error_message: str = "", publish_result: str = ""):
"""更新状态"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conn = self._get_conn()
try:
fields = "status = ?, updated_at = ?"
params = [status, now]
if error_message:
fields += ", error_message = ?"
params.append(error_message)
if publish_result:
fields += ", publish_result = ?"
params.append(publish_result)
if status == STATUS_PUBLISHED:
fields += ", published_at = ?"
params.append(now)
params.append(item_id)
conn.execute(f"UPDATE queue SET {fields} WHERE id = ?", params)
conn.commit()
finally:
conn.close()
def update_content(self, item_id: int, title: str = None, content: str = None,
sd_prompt: str = None, tags: list = None,
scheduled_time: str = None):
"""更新内容 (仅 draft/approved/scheduled/failed 状态可编辑)"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conn = self._get_conn()
try:
row = conn.execute("SELECT status FROM queue WHERE id = ?", (item_id,)).fetchone()
if not row or row["status"] in (STATUS_PUBLISHING, STATUS_PUBLISHED):
return False
sets, params = ["updated_at = ?"], [now]
if title is not None:
sets.append("title = ?"); params.append(title)
if content is not None:
sets.append("content = ?"); params.append(content)
if sd_prompt is not None:
sets.append("sd_prompt = ?"); params.append(sd_prompt)
if tags is not None:
sets.append("tags = ?"); params.append(json.dumps(tags, ensure_ascii=False))
if scheduled_time is not None:
sets.append("scheduled_time = ?"); params.append(scheduled_time)
params.append(item_id)
conn.execute(f"UPDATE queue SET {', '.join(sets)} WHERE id = ?", params)
conn.commit()
return True
finally:
conn.close()
def delete(self, item_id: int) -> bool:
"""删除队列项 (仅非 publishing 状态可删)"""
conn = self._get_conn()
try:
row = conn.execute("SELECT status FROM queue WHERE id = ?", (item_id,)).fetchone()
if not row or row["status"] == STATUS_PUBLISHING:
return False
conn.execute("DELETE FROM queue WHERE id = ?", (item_id,))
conn.commit()
return True
finally:
conn.close()
def approve(self, item_id: int, scheduled_time: str = None) -> bool:
"""审核通过 → 进入待发布或排期"""
conn = self._get_conn()
try:
row = conn.execute("SELECT status FROM queue WHERE id = ?", (item_id,)).fetchone()
if not row or row["status"] not in (STATUS_DRAFT, STATUS_FAILED):
return False
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
new_status = STATUS_SCHEDULED if scheduled_time else STATUS_APPROVED
conn.execute(
"UPDATE queue SET status = ?, scheduled_time = ?, updated_at = ?, "
"error_message = '', retry_count = 0 WHERE id = ?",
(new_status, scheduled_time, now, item_id),
)
conn.commit()
return True
finally:
conn.close()
def reject(self, item_id: int) -> bool:
"""拒绝/丢弃"""
return self._set_status_if(item_id, STATUS_REJECTED,
allowed_from=[STATUS_DRAFT, STATUS_APPROVED, STATUS_SCHEDULED, STATUS_FAILED])
def retry(self, item_id: int) -> bool:
"""失败项重试"""
conn = self._get_conn()
try:
row = conn.execute("SELECT status, retry_count FROM queue WHERE id = ?", (item_id,)).fetchone()
if not row or row["status"] != STATUS_FAILED:
return False
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conn.execute(
"UPDATE queue SET status = ?, updated_at = ?, error_message = '' WHERE id = ?",
(STATUS_APPROVED, now, item_id),
)
conn.commit()
return True
finally:
conn.close()
def _set_status_if(self, item_id: int, new_status: str, allowed_from: list) -> bool:
conn = self._get_conn()
try:
row = conn.execute("SELECT status FROM queue WHERE id = ?", (item_id,)).fetchone()
if not row or row["status"] not in allowed_from:
return False
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conn.execute("UPDATE queue SET status = ?, updated_at = ? WHERE id = ?",
(new_status, now, item_id))
conn.commit()
return True
finally:
conn.close()
# ---------- 查询 ----------
def list_by_status(self, statuses: list = None, limit: int = 50) -> list[dict]:
"""按状态查询队列项"""
conn = self._get_conn()
try:
if statuses:
placeholders = ",".join("?" * len(statuses))
rows = conn.execute(
f"SELECT * FROM queue WHERE status IN ({placeholders}) "
"ORDER BY created_at DESC LIMIT ?",
statuses + [limit],
).fetchall()
else:
rows = conn.execute(
"SELECT * FROM queue ORDER BY created_at DESC LIMIT ?", (limit,)
).fetchall()
return [self._row_to_dict(r) for r in rows]
finally:
conn.close()
def get_pending_publish(self) -> list[dict]:
"""获取待发布项: approved 或 scheduled 且已到时间"""
conn = self._get_conn()
try:
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
rows = conn.execute(
"""SELECT * FROM queue
WHERE (status = ? OR (status = ? AND scheduled_time <= ?))
ORDER BY
CASE WHEN scheduled_time IS NOT NULL THEN scheduled_time
ELSE created_at END ASC
LIMIT 10""",
(STATUS_APPROVED, STATUS_SCHEDULED, now),
).fetchall()
return [self._row_to_dict(r) for r in rows]
finally:
conn.close()
def count_by_status(self) -> dict:
"""统计各状态数量"""
conn = self._get_conn()
try:
rows = conn.execute(
"SELECT status, COUNT(*) as cnt FROM queue GROUP BY status"
).fetchall()
return {r["status"]: r["cnt"] for r in rows}
finally:
conn.close()
def get_calendar_data(self, days: int = 30) -> list[dict]:
"""获取日历数据 (最近 N 天的发布/排期概览)"""
conn = self._get_conn()
try:
cutoff = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d %H:%M:%S")
rows = conn.execute(
"""SELECT id, title, status, scheduled_time, published_at, created_at
FROM queue
WHERE created_at >= ? OR scheduled_time >= ? OR published_at >= ?
ORDER BY COALESCE(scheduled_time, published_at, created_at) ASC""",
(cutoff, cutoff, cutoff),
).fetchall()
return [dict(r) for r in rows]
finally:
conn.close()
# ---------- 辅助 ----------
@staticmethod
def _row_to_dict(row: sqlite3.Row) -> dict:
"""Row → dict, 并解析 JSON 字段"""
d = dict(row)
for key in ("tags", "image_paths"):
if key in d and isinstance(d[key], str):
try:
d[key] = json.loads(d[key])
except json.JSONDecodeError:
d[key] = []
return d
def format_queue_table(self, statuses: list = None, limit: int = 30) -> str:
"""生成 Markdown 格式的队列表格"""
items = self.list_by_status(statuses, limit)
if not items:
return "📭 队列为空"
lines = ["| # | 状态 | 标题 | 主题 | 排期时间 | 创建时间 |",
"|---|------|------|------|----------|----------|"]
for item in items:
status_label = STATUS_LABELS.get(item["status"], item["status"])
sched = item.get("scheduled_time") or ""
if sched != "":
sched = sched[:16] # 去掉秒
created = item["created_at"][:16] if item.get("created_at") else ""
title_short = (item.get("title") or "")[:18]
topic_short = (item.get("topic") or "")[:10]
lines.append(f"| {item['id']} | {status_label} | {title_short} | {topic_short} | {sched} | {created} |")
# 统计摘要
counts = self.count_by_status()
summary_parts = []
for s, label in STATUS_LABELS.items():
cnt = counts.get(s, 0)
if cnt > 0:
summary_parts.append(f"{label}: {cnt}")
summary = " · ".join(summary_parts) if summary_parts else "全部为空"
return f"**队列统计**: {summary}\n\n" + "\n".join(lines)
def format_calendar(self, days: int = 14) -> str:
"""生成简易日历视图 (Markdown)"""
data = self.get_calendar_data(days)
if not data:
return "📅 暂无排期数据"
# 按日期分组
by_date = {}
for item in data:
# 优先用排期时间,其次发布时间,最后创建时间
dt_str = item.get("scheduled_time") or item.get("published_at") or item["created_at"]
date_key = dt_str[:10] if dt_str else "未知"
by_date.setdefault(date_key, []).append(item)
lines = ["### 📅 内容日历 (近 %d 天)\n" % days]
today = datetime.now().strftime("%Y-%m-%d")
for date_key in sorted(by_date.keys()):
marker = " 📌 **今天**" if date_key == today else ""
lines.append(f"**{date_key}**{marker}")
for item in by_date[date_key]:
status_icon = STATUS_LABELS.get(item["status"], "")
time_part = ""
if item.get("scheduled_time"):
time_part = f"{item['scheduled_time'][11:16]}"
elif item.get("published_at"):
time_part = f"{item['published_at'][11:16]}"
title_short = (item.get("title") or "无标题")[:20]
lines.append(f" - {status_icon} #{item['id']} {title_short}{time_part}")
lines.append("")
return "\n".join(lines)
def format_preview(self, item_id: int) -> str:
"""生成单个项目的详细预览 (Markdown)"""
item = self.get(item_id)
if not item:
return "❌ 未找到该队列项"
status_label = STATUS_LABELS.get(item["status"], item["status"])
tags = item.get("tags", [])
tags_str = " ".join(f"#{t}" for t in tags) if tags else "无标签"
images = item.get("image_paths", [])
img_count = len(images) if images else 0
lines = [
f"## {status_label} #{item['id']}",
f"### 📌 {item.get('title', '无标题')}",
"",
item.get("content", "无正文"),
"",
f"---",
f"**主题**: {item.get('topic', '')} · **风格**: {item.get('style', '')}",
f"**标签**: {tags_str}",
f"**图片**: {img_count}",
f"**人设**: {(item.get('persona') or '')[:30]}",
]
if item.get("scheduled_time"):
lines.append(f"**排期**: {item['scheduled_time']}")
if item.get("error_message"):
lines.append(f"**错误**: ❌ {item['error_message']}")
if item.get("backup_dir"):
lines.append(f"**备份**: `{item['backup_dir']}`")
lines.extend([
f"**创建**: {item.get('created_at', '')}",
f"**更新**: {item.get('updated_at', '')}",
])
if item.get("published_at"):
lines.append(f"**发布**: {item['published_at']}")
return "\n".join(lines)
class QueuePublisher:
"""后台队列发布处理器"""
def __init__(self, queue: PublishQueue):
self.queue = queue
self._running = threading.Event()
self._thread = None
self._publish_fn = None # 由外部注册的发布回调
self._log_fn = None # 日志回调
def set_publish_callback(self, fn):
"""注册发布回调: fn(item: dict) -> (success: bool, message: str)"""
self._publish_fn = fn
def set_log_callback(self, fn):
"""注册日志回调: fn(msg: str)"""
self._log_fn = fn
def _log(self, msg: str):
logger.info(msg)
if self._log_fn:
try:
self._log_fn(msg)
except Exception:
pass
def start(self, check_interval: int = 60):
"""启动后台队列处理"""
if self._running.is_set():
return
self._running.set()
self._thread = threading.Thread(
target=self._loop, args=(check_interval,), daemon=True
)
self._thread.start()
self._log("📋 发布队列处理器已启动")
def stop(self):
"""停止队列处理"""
self._running.clear()
self._log("📋 发布队列处理器已停止")
@property
def is_running(self) -> bool:
return self._running.is_set()
def _loop(self, interval: int):
while self._running.is_set():
try:
self._process_pending()
except Exception as e:
self._log(f"❌ 队列处理异常: {e}")
logger.error("队列处理异常: %s", e, exc_info=True)
# 等待,但可中断
for _ in range(interval):
if not self._running.is_set():
break
time.sleep(1)
def _process_pending(self):
"""处理所有待发布项"""
if not self._publish_fn:
return
pending = self.queue.get_pending_publish()
if not pending:
return
for item in pending:
if not self._running.is_set():
break
item_id = item["id"]
title = item.get("title", "")[:20]
self._log(f"📋 队列发布 #{item_id}: {title}")
# 标记为发布中
self.queue.update_status(item_id, STATUS_PUBLISHING)
try:
success, message = self._publish_fn(item)
if success:
self.queue.update_status(item_id, STATUS_PUBLISHED, publish_result=message)
self._log(f"✅ 队列发布成功 #{item_id}: {title}")
else:
retry_count = item.get("retry_count", 0) + 1
if retry_count <= MAX_RETRIES:
# 还有重试机会 → approved 状态等下一轮
self.queue.update_status(item_id, STATUS_APPROVED, error_message=f"{retry_count}次失败: {message}")
conn = self.queue._get_conn()
try:
conn.execute("UPDATE queue SET retry_count = ? WHERE id = ?",
(retry_count, item_id))
conn.commit()
finally:
conn.close()
self._log(f"⚠️ #{item_id} 发布失败 (重试 {retry_count}/{MAX_RETRIES}): {message}")
else:
self.queue.update_status(item_id, STATUS_FAILED, error_message=message)
self._log(f"❌ #{item_id} 发布失败已达重试上限: {message}")
except Exception as e:
self.queue.update_status(item_id, STATUS_FAILED, error_message=str(e))
self._log(f"❌ #{item_id} 发布异常: {e}")
# 发布间隔 (模拟真人)
import random
wait = random.randint(5, 15)
self._log(f"⏳ 等待 {wait}s 后处理下一项...")
for _ in range(wait):
if not self._running.is_set():
break
time.sleep(1)
def publish_now(self, item_id: int) -> str:
"""立即发布指定项 (不经过后台循环)"""
if not self._publish_fn:
return "❌ 发布回调未注册"
item = self.queue.get(item_id)
if not item:
return "❌ 未找到队列项"
if item["status"] not in (STATUS_APPROVED, STATUS_SCHEDULED, STATUS_FAILED):
return f"❌ 当前状态 [{STATUS_LABELS.get(item['status'], item['status'])}] 不可发布"
self.queue.update_status(item_id, STATUS_PUBLISHING)
try:
success, message = self._publish_fn(item)
if success:
self.queue.update_status(item_id, STATUS_PUBLISHED, publish_result=message)
return f"✅ 发布成功: {message}"
else:
self.queue.update_status(item_id, STATUS_FAILED, error_message=message)
return f"❌ 发布失败: {message}"
except Exception as e:
self.queue.update_status(item_id, STATUS_FAILED, error_message=str(e))
return f"❌ 发布异常: {e}"