文件内容
run_phase2_vol1.py
#!/usr/bin/env python3
"""
Run Phase 2 for Volume 1 only.
1. Init project with Bible
2. Run Phase 1 (volume-level outline) via DeepSeek API
3. Run Phase 2 (chapter-level plan) for Volume 1
"""
import sys, json, os, logging, urllib.request, re, time
from pathlib import Path
from dataclasses import asdict
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger('run_phase2')
# Add core to path
CORE_DIR = Path(__file__).resolve().parent / "core"
sys.path.insert(0, str(CORE_DIR))
from bible import BibleManager, Character, WorldRule, NovelBible, Arc
from arc_planner import DnDArcPlanner, VolumePlan, ChapterPlan, SenseNovaClient, FullNovelPlan
# ─── DeepSeek LLM Wrapper (swap into arc_planner) ───────────────
DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
DEEPSEEK_URL = "https://api.deepseek.com/v1/chat/completions"
DEEPSEEK_MODEL = "deepseek-chat"
class DeepSeekClient:
"""DeepSeek API client compatible with SenseNovaClient interface."""
def __init__(self, api_key: str = ""):
self.api_key = api_key or DEEPSEEK_API_KEY
if not self.api_key:
raise RuntimeError("DEEPSEEK_API_KEY not set")
def chat(self, messages: list, temp: float = 0.7, max_tokens: int = 8192,
retries: int = 3) -> str:
payload = json.dumps({
"model": DEEPSEEK_MODEL,
"messages": messages,
"temperature": temp,
"max_tokens": max_tokens,
}).encode("utf-8")
for attempt in range(retries):
try:
req = urllib.request.Request(
DEEPSEEK_URL, data=payload,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
})
with urllib.request.urlopen(req, timeout=300) as resp:
data = json.loads(resp.read().decode("utf-8"))
if "choices" in data and data["choices"]:
return data["choices"][0]["message"]["content"]
else:
raise ValueError(f"Unexpected response: {data}")
except Exception as e:
logger.warning(f"DeepSeek call failed (attempt {attempt+1}/{retries}): {e}")
if attempt < retries - 1:
time.sleep(2 ** attempt)
raise RuntimeError(f"DeepSeek API failed after {retries} retries")
def extract_json(self, text: str) -> dict:
"""Same JSON extract logic as SenseNovaClient."""
return SenseNovaClient.extract_json(self, text)
# ─── Setup Project ──────────────────────────────────────────────
PROJECT_DIR = Path(os.environ.get("LOBSTER_NOVEL_DIR", "."))
BIBLE_SRC = PROJECT_DIR / "tests" / "lobster-test" / "bible.json"
def setup_project():
"""Copy Bible and init project."""
if not BIBLE_SRC.exists():
logger.error(f"Bible source not found: {BIBLE_SRC}")
sys.exit(1)
bible_data = json.loads(BIBLE_SRC.read_text(encoding="utf-8"))
bible_file = PROJECT_DIR / "bible.json"
bible_file.write_text(
json.dumps(bible_data, ensure_ascii=False, indent=2),
encoding="utf-8")
logger.info(f"Bible saved to {bible_file}")
# Load via BibleManager to verify
bm = BibleManager(PROJECT_DIR)
b = bm.bible
logger.info(f"Bible loaded: {b.title} | {len(b.characters)} characters | {len(b.world_rules)} world rules")
return bm
def run():
logger.info("=" * 60)
logger.info("Planning Phase 1 + Phase 2 for Volume 1")
logger.info("=" * 60)
# 1. Setup project
bm = setup_project()
# 2. Create LLM client
llm = DeepSeekClient()
logger.info(f"Using DeepSeek model: {DEEPSEEK_MODEL}")
# 3. Create planner
planner = DnDArcPlanner(PROJECT_DIR, llm=llm)
# Override the llm's extract_json with SenseNovaClient's static version
llm.extract_json = lambda text: SenseNovaClient.extract_json(llm, text)
# 4. Run Phase 1 (volume-level outline)
logger.info("\n>>> Phase 1: Generating 7-volume outline...")
vol_plans = planner.plan_volumes(
total_volumes=7,
total_chapters=476,
total_words=2000000,
final_build="1级吟游诗人/10级红龙术士/29级野蛮人",
final_enemy="魅魔之主美坎修特 (Malcanthet)",
locations=["费伦大陆主位面", "无底深渊", "九层地狱", "星界"],
)
# Save Phase 1 result
plans_dir = PROJECT_DIR / "plans"
plans_dir.mkdir(parents=True, exist_ok=True)
phase1_path = plans_dir / "phase1_volumes.json"
phase1_data = {
"volumes": [asdict(v) for v in vol_plans],
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
}
phase1_path.write_text(
json.dumps(phase1_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"\nPhase 1 complete: {len(vol_plans)} volumes")
for v in vol_plans:
logger.info(f" 卷{v.number}: {v.title} ({v.chapters}章, 等级{v.level_range[0]}→{v.level_range[1]})")
# 5. Run Phase 2 for Volume 1 only — in BATCHES to avoid token limits
vol1 = vol_plans[0] # Volume 1
total_ch = vol1.chapters
batch_size = 10 # ~10 chapters per LLM call
batches = [list(range(i+1, min(i+batch_size, total_ch)+1)) for i in range(0, total_ch, batch_size)]
logger.info(f"\n>>> Phase 2: Expanding Volume 1 '{vol1.title}' ({total_ch} chapters in {len(batches)} batches)...")
all_ch_plans = []
for batch_idx, chapter_numbers in enumerate(batches):
logger.info(f" Batch {batch_idx+1}/{len(batches)}: chapters {chapter_numbers[0]}-{chapter_numbers[-1]}...")
sys_prompt = (
"你是DND 5e奇幻小说大纲策划师。"
"你的任务是根据一卷大纲, 将这一卷的特定章节展开为详细的章节计划。"
"每一章都要有: 标题(4-6字的文采短标题)、内容概要(100-150字)、地点、戏剧张力类型。"
"确保章节之间有悬念钩子连接。"
)
user_prompt = f"""请为卷1 '{vol1.title}' 的第{chapter_numbers[0]}章到第{chapter_numbers[-1]}章编写章节计划(共{len(chapter_numbers)}章)。
卷概要: {vol1.summary[:300]}
主要场景: {' / '.join(vol1.main_locations)}
等级范围: {vol1.level_range[0]}→{vol1.level_range[1]}级
输出JSON:
{{
"chapters": [
{{
"number": 整数,
"title": "4-6字短标题",
"summary": "100-150字内容概要",
"location": "场景地点",
"scenes": 3,
"dramatic_type": "buildup/climax/resolution/twist/reveal",
"character_focus": ["角色1", "角色2"]
}}
]
}}
要求:
- 每章标题4-6字, 有文采
- 概要100-150字, 包含具体情节
- 角色聚焦: 只列出本章出场的角色
- 考虑到卷1整体叙事: 从理查德在绝冬城酒馆的平凡生活开始, 经历龙血觉醒→被迫逃亡→梅丽安出现→前往深水城途中的一系列冒险
- 第1章要有力开场, 本章范围内的最后一章留悬念钩子
"""
msg = [
{"role": "system", "content": sys_prompt},
{"role": "user", "content": user_prompt},
]
response_text = llm.chat(msg, temp=0.5, max_tokens=4096)
try:
data = llm.extract_json(response_text)
except ValueError as e:
logger.warning(f"Batch {batch_idx+1} JSON parse failed: {e}")
# Try raw JSON parse
try:
# Fix truncated JSON — append closing brackets
fixed = response_text.strip()
if '```' in fixed:
fixed = re.sub(r'```(?:json)?\s*', '', fixed)
fixed = re.sub(r'\s*```', '', fixed)
# Try to extract complete chapter block
start = fixed.find('[')
end = fixed.rfind(']')
if start >= 0 and end > start:
fragment = fixed[start:end+1]
# Close any unclosed structures
data = json.loads(fragment)
else:
raise ValueError("No array found")
except Exception as e2:
logger.error(f"Batch {batch_idx+1}: recovery failed too: {e2}")
data = []
ch_this_batch = []
if isinstance(data, dict) and "chapters" in data:
for ch in data["chapters"]:
ch_this_batch.append(ChapterPlan(
number=ch.get("number", 0),
title=ch.get("title", f"第{ch.get('number', 0)}章"),
summary=ch.get("summary", ""),
location=ch.get("location", vol1.main_locations[0]),
word_target=4000,
scenes=ch.get("scenes", 3),
dramatic_type=ch.get("dramatic_type", "buildup"),
character_focus=ch.get("character_focus", []),
))
elif isinstance(data, list):
for ch in data:
ch_this_batch.append(ChapterPlan(
number=ch.get("number", 0),
title=ch.get("title", f"第{ch.get('number', 0)}章"),
summary=ch.get("summary", ""),
location=ch.get("location", vol1.main_locations[0]),
word_target=4000,
scenes=ch.get("scenes", 3),
dramatic_type=ch.get("dramatic_type", "buildup"),
character_focus=ch.get("character_focus", []),
))
all_ch_plans.extend(ch_this_batch)
logger.info(f" Batch {batch_idx+1}: {len(ch_this_batch)} chapters generated")
time.sleep(1) # rate limit buffer
# Sort by chapter number
all_ch_plans.sort(key=lambda c: c.number)
logger.info(f"Phase 2 complete: {len(all_ch_plans)}/{total_ch} chapters generated")
# Save Volume 1 chapter plan
vol1_data = {
"volume": asdict(vol1),
"chapters": [asdict(c) for c in all_ch_plans],
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
}
vol1_path = plans_dir / "volume_01_plan.json"
vol1_path.write_text(
json.dumps(vol1_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"\nPhase 2 complete: {len(all_ch_plans)} chapters for Volume 1")
logger.info(f"Saved: {vol1_path}")
# Summary
print("\n" + "=" * 60)
print("📖 第一卷章节目录")
print("=" * 60)
print(f"\n卷1: {vol1.title}")
print(f"等级: {vol1.level_range[0]}→{vol1.level_range[1]} | 章节: {len(all_ch_plans)}章 | 每章目标: ~4000字")
print(f"概要: {vol1.summary[:200]}")
print(f"\n--- 章节列表 ---")
for ch in all_ch_plans:
print(f" Ch{ch.number:03d}: {ch.title}")
print(f" {ch.summary[:120]}")
print(f" [{ch.location}] {' / '.join(ch.character_focus[:3])}")
print()
# Print token estimate
total_chars = sum(len(v.summary) for v in vol_plans)
total_chapters_of_v1 = len(all_ch_plans)
print(f"\n📊 统计")
print(f" 卷1章节数: {total_chapters_of_v1}")
print(f" 卷1总字数目标: ~{total_chapters_of_v1 * 4000:,}字")
print(f" 7卷大纲概要总字符: {total_chars:,}")
print(f" 大纲文件: {plans_dir.resolve()}")
print()
if __name__ == "__main__":
run()