文件预览

extract_roster_and_hooks.py

查看 lobster-novel 技能包中的文件内容。

文件内容

scripts/extract_roster_and_hooks.py

#!/usr/bin/env python3
"""
《深渊狂嚎》角色名册 + 伏笔追踪 — 最终版

只用确认的角色名单(手动整理),不做自动发现。
伏笔用关键词扫描。
"""
import re, json
from pathlib import Path

NOVEL_DIR = Path(os.environ.get("NOVEL_DIR", "."))
CONTINUITY_DIR = NOVEL_DIR / "continuity"

# ── 手动整理的角色名单 ──
# (名称, 重要性, 描述, 标签)
CHARACTERS = [
    # 主角
    ("理查德·泰森", "protagonist", "灰港镇吟游诗人学徒→红龙血脉觉醒→龙裔战士", ["主角", "龙血", "红龙血脉", "吟游诗人"]),
    ("理查德", "protagonist", "", []),
    ("泰森", "protagonist", "家族姓氏", []),
    
    # 重要配角
    ("梅丽安·卡斯伯特", "major", "法师协会高阶成员,追踪龙血线索三十年", ["法师", "导师", "龙血研究"]),
    ("梅丽安", "major", "", []),
    ("卡斯伯特", "major", "", []),
    
    ("托德·铁砧", "major", "铁匠铺学徒,理查德的发小和铁杆战友", ["铁匠", "战友", "发小"]),
    ("托德", "major", "", []),
    ("铁砧", "major", "家族姓氏", []),
    
    ("铁锤汉克", "major", "破浪者酒馆老板,理查德的养父", ["酒馆老板", "养父", "前雇佣兵"]),
    ("汉克", "major", "", []),
    
    ("美坎修特", "major", "深渊裂隙中的恶魔领主,千年被封印中", ["恶魔领主", "最终BOSS", "深渊"]),
    
    # 其他已确认角色
    ("灰石·泰森", "major", "理查德的父亲,持断刃的神秘人物", ["父亲", "断刃", "龙血"]),
    ("灰石", "major", "", []),
    
    ("艾琳娜·烬羽", "major", "龙裔阵营的核心人物", ["龙裔", "烬羽家族"]),
    
    ("卡珊德拉·维斯特", "major", "", ["法师", "学者"]),
    ("埃德蒙·格雷", "major", "", ["学者", "法师"]),
    
    ("伊姆索瑞斯", "major", "太古红龙,千年前陨落,龙血血脉的源头", ["古龙", "龙血源头", "红龙"]),
    
    ("乌尔里克·铁拳", "minor", "", ["铁拳家族", "战士"]),
    ("伊格纳修斯·炎裔", "minor", "", ["炎裔家族", "封印法师"]),
    ("伊莉安娜·炎裔", "minor", "首席封印法师", ["封印法师", "炎裔家族"]),
    ("理查德·烬羽", "minor", "理查德的全名(烬羽家族路线)", ["烬羽家族", "别名"]),
    ("格罗姆·铁拳", "minor", "", ["铁拳家族"]),
    ("哈克·霜牙", "minor", "", []),
    ("德拉贡·沃克", "minor", "", ["学者"]),
    ("索菲亚·艾德琳", "minor", "", []),
    ("艾德温·灰石勋爵", "minor", "", ["贵族", "灰石"]),
    ("阿尔德里克", "minor", "铁卫军老兵", ["铁卫军", "老兵"]),
    ("银语者", "minor", "", ["神秘存在", "神殿"]),
    ("马里斯船长", "minor", "", ["船长"]),
    ("哈罗德·格雷", "minor", "灰港镇镇长", ["镇长"]),
    ("查克", "minor", "灰港镇酒馆常客", ["渔民"]),
    ("老汤姆", "minor", "", []),
    
    # 组织/群体(作为特殊条目)
    ("黑焰卫", "minor", "敌对组织", ["敌对", "组织"]),
    ("铁卫军", "minor", "帝国北疆精锐军团", ["帝国", "军团"]),
]

# 构建去重角色池
ROSTER_NAMES = {}  # name -> (importance, desc, tags)
for name, imp, desc, tags in CHARACTERS:
    if name not in ROSTER_NAMES:
        ROSTER_NAMES[name] = (imp, desc, tags)


def extract_chapter_info(filename: str, vol: int):
    m = re.search(r'Ch(\d+)_?', filename)
    ch = int(m.group(1)) if m else 0
    if vol == 1:
        return ch + 100, f"V1Ch{ch:03d}"
    return ch, f"V2Ch{ch:03d}"


def sort_files(files, vol):
    return sorted(files, key=lambda f: int(re.search(r'Ch(\d+)', f.name).group(1) or 0))


# ── 伏笔关键词 ──
FORESHADOW_KW = [
    "伏笔", "埋下", "线索", "预兆",
    "隐隐不安", "隐约不对", "觉得不对",
    "不祥的预感", "有种预感", "莫名的预感",
    "预言",
    "深渊低语", "黑暗呢喃", "窃窃私语",
    "封印松动", "裂隙扩大", "深渊异动",
    "不为人知的秘密", "被遗忘的",
    "吊坠发光", "鳞片发烫",
    "似曾相识",
    "不对劲", "不太对劲", "有古怪",
    "从未如此",
    "似乎还藏着什么", "远没有这么简单",
    "黑暗中传来", "阴影中浮现",
    "不详的预感",
    "没人知道",
]

def has_foreshadow(line: str) -> bool:
    line = line.strip()
    if len(line) < 25:
        return False
    for kw in FORESHADOW_KW:
        if kw in line:
            return True
    return False


def main():
    CONTINUITY_DIR.mkdir(parents=True, exist_ok=True)
    
    vol1_dir = NOVEL_DIR / "chapters" / "volume_01"
    vol2_dir = NOVEL_DIR / "chapters" / "volume_02"
    
    vol1_files = sort_files(list(vol1_dir.glob("Ch*.md")), 1) if vol1_dir.exists() else []
    vol2_files = sort_files(list(vol2_dir.glob("Ch*.md")), 2) if vol2_dir.exists() else []
    print(f"卷一: {len(vol1_files)} 章 | 卷二: {len(vol2_files)} 章 | 总计: {len(vol1_files)+len(vol2_files)} 章")
    print(f"角色名单: {len(ROSTER_NAMES)} 个条目")
    
    # 读取所有章节
    chapter_data = []
    for vol, files in [(1, vol1_files), (2, vol2_files)]:
        for fpath in files:
            try:
                text = fpath.read_text(encoding="utf-8")
            except:
                text = fpath.read_text(encoding="gbk", errors="replace")
            ch_num, ch_disp = extract_chapter_info(fpath.name, vol)
            chapter_data.append((ch_num, ch_disp, text, vol))
    chapter_data.sort(key=lambda x: x[0])
    
    # ── 初始化名册 ──
    roster = {}
    for name, (imp, desc, tags) in ROSTER_NAMES.items():
        roster[name] = {
            "name": name,
            "importance": imp,
            "first_appearance": 99999,
            "last_appearance": 0,
            "total_appearances": 0,
            "description": desc,
            "tags": tags,
            "status": "active",
            "disappears_for": 0,
        }
    
    appearances = []
    all_hooks = []
    
    # ── 逐章扫描 ──
    print(f"\n===== 逐章扫描 =====")
    
    for ch_num, ch_disp, text, _ in chapter_data:
        # 出场角色
        chars_in_ch = {n for n in ROSTER_NAMES if n in text}
        
        new_in_ch = set()
        for name in chars_in_ch:
            entry = roster[name]
            if entry["first_appearance"] > ch_num:
                entry["first_appearance"] = ch_num
                new_in_ch.add(name)
            entry["last_appearance"] = ch_num
            entry["total_appearances"] += 1
        
        appearances.append({
            "chapter": ch_num,
            "chapter_display": ch_disp,
            "characters": sorted(chars_in_ch),
            "new_characters": sorted(new_in_ch),
            "total_crowd": len(chars_in_ch),
        })
        
        # 伏笔检测
        lines = text.split('\n')
        ch_hooks = 0
        for line in lines:
            if has_foreshadow(line):
                cat = "plot"
                if any(w in line for w in ["身份", "身世", "来历"]): cat = "character"
                elif any(w in line for w in ["深渊", "裂隙", "封印", "龙血", "古龙", "伊姆索瑞斯"]): cat = "world"
                elif any(w in line for w in ["谜", "秘密"]): cat = "mystery"
                imp = "normal"
                if any(w in line for w in ["死亡", "毁灭", "灾难", "末日", "诅咒"]): imp = "major"
                if any(w in line for w in ["世界", "大陆", "位面", "终局"]): imp = "critical"
                all_hooks.append({
                    "description": line.strip()[:200],
                    "planted_chapter": ch_num,
                    "chapter_display": ch_disp,
                    "expected_payoff": ch_num + 25,
                    "category": cat,
                    "importance": imp,
                    "status": "active",
                })
                ch_hooks += 1
        
        if ch_num % 30 == 0 or ch_num in [1, 101]:
            print(f"  {ch_disp} → 角色{len(chars_in_ch)}人(+{len(new_in_ch)}) | 伏笔{ch_hooks}条")
    
    # 去重伏笔
    seen = set()
    unique_hooks = []
    for h in all_hooks:
        key = h["description"][:70]
        if key not in seen:
            seen.add(key)
            unique_hooks.append(h)
    
    # 消失检测
    max_ch = max(chapter_data, key=lambda x: x[0])[0]
    for entry in roster.values():
        if entry["total_appearances"] > 0 and entry["importance"] in ("minor", "major") and entry["status"] != "dead":
            absent = max_ch - entry["last_appearance"]
            if absent >= 15:
                entry["status"] = "disappeared"
                entry["disappears_for"] = absent
    
    # ── 写入文件 ──
    (CONTINUITY_DIR / "character_roster.json").write_text(
        json.dumps(roster, ensure_ascii=False, indent=2), encoding="utf-8")
    (CONTINUITY_DIR / "chapter_appearances.json").write_text(
        json.dumps(appearances, ensure_ascii=False, indent=2), encoding="utf-8")
    (CONTINUITY_DIR / "hooks.json").write_text(
        json.dumps(unique_hooks, ensure_ascii=False, indent=2), encoding="utf-8")
    
    # ── 摘要输出 ──
    print("\n" + "="*50)
    print("📊 最终摘要")
    print("="*50)
    
    counts = {"protagonist": 0, "major": 0, "minor": 0}
    for e in roster.values():
        if e["importance"] in counts: counts[e["importance"]] += 1
    
    active = sum(1 for e in roster.values() if e["status"] == "active" and e["total_appearances"] > 0)
    disappeared = sum(1 for e in roster.values() if e["status"] == "disappeared")
    unused = sum(1 for e in roster.values() if e["total_appearances"] == 0)
    
    print(f"\n📋 角色统计:")
    print(f"  主角: {counts['protagonist']} 重要配角: {counts['major']} 配角: {counts['minor']}")
    print(f"  活跃{active} 消失{disappeared} 未出场{unused} 已故{sum(1 for e in roster.values() if e['status']=='dead')}")
    
    imp_cn = {"protagonist": "主角", "major": "重配", "minor": "配角"}
    def fmt_ch(x):
        if x >= 99999: return "未出场"
        return f"V{2 if x>100 else 1}Ch{x-100 if x>100 else x}"
    
    top = sorted([e for e in roster.values() if e["total_appearances"] > 0],
                 key=lambda e: -e["total_appearances"])
    
    print(f"\n🏆 出场排行 (按次数):")
    print(f"  {'角色':<16} {'级别':<4} {'出场':>5} {'首次':<10} {'最近':<10} {'状态':<6}")
    print(f"  {'-'*55}")
    for e in top:
        print(f"  {e['name']:<16} {imp_cn.get(e['importance'],'?'):<4} {e['total_appearances']:>5}次 "
              f"{fmt_ch(e['first_appearance']):<10} {fmt_ch(e['last_appearance']):<10} {e['status']:<6}")
    
    hook_stats = {}
    for h in unique_hooks:
        hook_stats[h["category"]] = hook_stats.get(h["category"], 0) + 1
    v1_h = sum(1 for h in unique_hooks if h["planted_chapter"] > 100)
    v2_h = sum(1 for h in unique_hooks if h["planted_chapter"] <= 100)
    print(f"\n🔮 伏笔: {len(unique_hooks)}条 (V1:{v1_h} V2:{v2_h})")
    print(f"  类别: {', '.join(f'{k}:{v}' for k,v in sorted(hook_stats.items()))}")
    
    print(f"\n✅ 文件写入 {CONTINUITY_DIR}/")
    for f in ["character_roster.json", "chapter_appearances.json", "hooks.json"]:
        fpath = CONTINUITY_DIR / f
        if fpath.exists():
            size = fpath.stat().st_size
            print(f"   {f}: {size/1024:.1f}KB")

if __name__ == "__main__":
    main()