文件预览

heiyan-booklist-scraper.js

查看 Story Short Scan 技能包中的文件内容。

文件内容

scripts/heiyan-booklist-scraper.js

#!/usr/bin/env node
/**
 * 黑岩短篇书库列表采集脚本
 *
 * ⚠️ 前置条件(必须):
 *   1. 启动 Chrome CDP 环境
 *   2. 在 Chrome 中手动登录 https://manage.zhangwenpindu.cn
 *      登录后会生成 Admin-Token cookie,脚本需要它调用后端 API
 *      未登录 → 脚本报错「未检测到 Admin-Token」
 *
 * 采集策略:从 Cookie 提取 Bearer token,调用 ms.zhangwenpindu.cn 后端 API
 * 获取结构化 JSON 数据(书名、作者、字数、分类、标签等)。
 *
 * 用法:
 *   node heiyan-booklist-scraper.js --pages 5              # 采集前5页(每页20条)
 *   node heiyan-booklist-scraper.js --pages 3 --channel male   # 仅男频
 *   node heiyan-booklist-scraper.js --pages 2 --detail         # 含逐本详情(标签等)
 */

const fs = require("fs");
const path = require("path");
const { ab, sleep, evalJSON, safeStr, getArg } = require("./cdp-utils");

const BOOKLIST_URL = "https://manage.zhangwenpindu.cn/books/booklist";
const API_BASE = "https://ms.zhangwenpindu.cn";
const PAGE_SIZE = 20;

// ---------------------------------------------------------------------------
// API 调用
// ---------------------------------------------------------------------------

/** 从 Cookie 中提取 Admin-Token */
function getToken(port) {
  const js =
    "JSON.stringify((()=>{" +
    "var m=document.cookie.match(/Admin-Token=([^;]+)/);" +
    "return m?m[1]:''" +
    "})())";
  return evalJSON(port, js) || "";
}

/** 调用后端 API 获取书籍列表 */
function fetchBookList(port, token, pageNum) {
  const t = safeStr(token);
  const js =
    "fetch(" + safeStr(API_BASE + "/manage/book/list") + "," +
    "{method:'POST'," +
    "headers:{" +
    "'Content-Type':'application/x-www-form-urlencoded'," +
    "'Authorization':'Bearer '+" + t +
    "}," +
    "body:new URLSearchParams({pageNum:" + safeStr(pageNum) + ",pageSize:" + safeStr(PAGE_SIZE) + ",language:'zh_TW'})" +
    "}).then(function(r){return r.json()})";
  return evalJSON(port, js);
}

/** 调用后端 API 获取书籍详情(标签等) */
function fetchBookDetail(port, token, bookId) {
  const t = safeStr(token);
  const js =
    "fetch(" + safeStr(API_BASE + "/manage/book/" + encodeURIComponent(bookId)) + "," +
    "{headers:{'Authorization':'Bearer '+" + t + "}}" +
    ").then(function(r){return r.json()})";
  return evalJSON(port, js);
}

// ---------------------------------------------------------------------------
// 主流程
// ---------------------------------------------------------------------------

const args = process.argv.slice(2);
const PORT = parseInt(getArg(args, "--port") || "9222", 10);
const OUTDIR = getArg(args, "--outdir") || ".";
const PAGES = parseInt(getArg(args, "--pages") || "5", 10);
const CHANNEL = getArg(args, "--channel") || "all";
const DETAIL = args.includes("--detail");

function buildAndSave(allBooks, total, filtered, filepath) {
  const now = new Date().toISOString();
  const maleBooks = filtered.filter((b) => b.classifyStr === "男频");
  const femaleBooks = filtered.filter((b) => b.classifyStr === "女频");
  const otherBooks = filtered.filter(
    (b) => b.classifyStr !== "男频" && b.classifyStr !== "女频"
  );

  const groups = [
    { label: "男频", books: maleBooks },
    { label: "女频", books: femaleBooks },
  ];
  if (otherBooks.length) {
    groups.push({ label: "其他", books: otherBooks });
  }

  const lines = [
    `# 黑岩 · 书库列表`,
    "",
    `- 来源:${BOOKLIST_URL}`,
    `- 抓取时间:${now}`,
    `- 总条目:${total}`,
    `- 已采集:${filtered.length} 条(${PAGES} 页)`,
    DETAIL ? "- 含详情(标签、简介)" : "- 列表模式(加 --detail 获取标签和简介)",
    "",
    "---",
    "",
  ];

  for (const g of groups) {
    if (!g.books.length) continue;
    lines.push(`## ${g.label}短篇 — ${g.books.length} 本`, "");

    for (let i = 0; i < g.books.length; i++) {
      try {
        const b = g.books[i];
        lines.push(`### #${i + 1} ${b.name}`);
        const meta = [
          b.userName,
          b.classifyStr + "/" + b.typeDesc,
          b.words ? b.words.toLocaleString() + "字" : "",
          b.price ? b.price + "钻" : "",
          b.open ? "公开" : "未公开",
        ].filter(Boolean).join(" · ");
        if (meta) lines.push(`*${meta}*`);

        if (b.createTime) lines.push(`**创建:** ${b.createTime}`);
        if (b.updateTime) lines.push(`**更新:** ${b.updateTime}`);

        if (b.tags && b.tags.length) {
          lines.push(`**标签:** ${b.tags.join("、")}`);
        }

        if (b.description) {
          lines.push("");
          lines.push(`> ${b.description.substring(0, 200)}${b.description.length > 200 ? "..." : ""}`);
        }

        lines.push("");
      } catch (bookErr) {
        console.error(`[heiyan] ${g.label} 第${i + 1}条处理出错: ${bookErr.message}`);
        lines.push("");
      }
    }

    lines.push("---", "");
  }

  fs.writeFileSync(filepath, lines.join("\n"), "utf-8");
  console.log(`  ✓ 已保存: ${filepath}`);
}

function main() {
  console.log("\n→ 采集 黑岩书库列表(API 模式)...");
  console.log(`  计划采集: ${PAGES} 页(每页 ${PAGE_SIZE} 条)`);

  const date = new Date().toISOString().slice(0, 10).replace(/-/g, "");
  const filename = `黑岩书库列表_${date}.md`;
  const filepath = path.join(OUTDIR, filename);

  // 先导航到管理后台获取 token
  let token;
  try {
    ab(PORT, "open", BOOKLIST_URL);
    sleep(3000);
    token = getToken(PORT);
  } catch (err) {
    console.error(`[heiyan] 页面加载或 token 提取出错: ${err.message}`);
    return;
  }

  if (!token) {
    console.log("  ✗ 未检测到 Admin-Token");
    console.log("  → 请先在 Chrome 中打开 https://manage.zhangwenpindu.cn 并登录");
    console.log("  → 登录后重新运行本脚本");
    return;
  }
  console.log("  ✓ 获取到认证 token");

  // 分页采集
  const allBooks = [];
  let total = 0;

  for (let p = 1; p <= PAGES; p++) {
    try {
      sleep(800);
      const resp = fetchBookList(PORT, token, p);

      if (!resp || resp.code === 401) {
        console.log(`  ⚠ 第${p}页认证失败,请重新登录`);
        break;
      }

      const rows = resp?.data?.rows;
      if (!rows || !rows.length) {
        console.log(`  第${p}页无数据,停止`);
        break;
      }

      if (p === 1) {
        total = parseInt(resp.data.total) || 0;
        console.log(`  总条目: ${total}`);
      }

      allBooks.push(...rows);
      console.log(`  第${p}页: ${rows.length} 条 (累计 ${allBooks.length})`);
    } catch (pageErr) {
      console.error(`[heiyan] 第${p}页采集出错,跳过: ${pageErr.message}`);
      if (allBooks.length > 0) {
        console.log(`  已采集 ${allBooks.length} 条,继续处理已有数据`);
      }
      break;
    }
  }

  if (!allBooks.length) {
    console.error("[heiyan] 采集失败:页面结构可能已变(选择器没匹配到数据),请检查榜单URL或更新选择器");
    return;
  }

  // 频道筛选
  let filtered = allBooks;
  if (CHANNEL === "male") {
    filtered = allBooks.filter((b) => b.classifyStr === "男频");
  } else if (CHANNEL === "female") {
    filtered = allBooks.filter((b) => b.classifyStr === "女频");
  }

  // 可选:逐本获取详情(标签等)
  if (DETAIL && filtered.length) {
    console.log(`  获取 ${filtered.length} 本详情...`);
    for (let i = 0; i < filtered.length; i++) {
      try {
        sleep(500);
        const detail = fetchBookDetail(PORT, token, filtered[i].id);
        if (detail?.data) {
          filtered[i].tags = detail.data.tags || [];
          filtered[i].description = detail.data.description || "";
          filtered[i].chapterCount = detail.data.chapterCount || 0;
        }
        if ((i + 1) % 10 === 0) {
          console.log(`    已获取 ${i + 1}/${filtered.length}`);
        }
      } catch (detailErr) {
        console.error(`[heiyan] 第${i + 1}本详情获取出错,跳过: ${detailErr.message}`);
      }
    }
    console.log("  ✓ 详情获取完成");
  }

  buildAndSave(allBooks, total, filtered, filepath);
}

main();