文件内容
scripts/fanqie-rank-scraper.js
#!/usr/bin/env node
/**
* 番茄小说排行榜采集脚本
*
* 配合 browser-cdp skill 使用。先启动 Chrome CDP 环境,再运行本脚本。
* 采集策略:从页面 __INITIAL_STATE__ 提取结构化数据,通过 fetch 获取真实标题。
* 输出 Markdown 格式匹配 scan-output-format.md 规范。
*
* 用法:
* node fanqie-rank-scraper.js --channel 1 --type 2 # 男频阅读榜
* node fanqie-rank-scraper.js --channel 0 --type 1 # 女频新书榜
* node fanqie-rank-scraper.js --channel 1 --type 2 --outdir ./ # 指定输出目录
* node fanqie-rank-scraper.js --channel all # 全部采集
*
* 前置:
* node {SKILL_DIR}/browser-cdp/scripts/setup-cdp-chrome.js 9222
*/
const fs = require("fs");
const path = require("path");
const { ab, sleep, evalJSON, scrollLoad, getArg } = require("./cdp-utils");
// ---------------------------------------------------------------------------
// 页面提取
// ---------------------------------------------------------------------------
/** 提取侧边菜单品类链接 */
function extractCategories(port, channel, type) {
const prefix = `/rank/${channel}_${type}_`;
const js = "JSON.stringify(Array.from(document.querySelectorAll('a')).filter(a=>a.href&&a.href.indexOf('" + prefix + "')>-1&&a.parentElement&&a.parentElement.classList.contains('arco-menu-item-inner')).map(a=>({name:a.innerText.trim(),href:a.getAttribute('href')})).filter(x=>x.name))";
return evalJSON(port, js) || [];
}
/** 从 __INITIAL_STATE__ 提取当前品类页的作品数据 */
function extractBookList(port) {
const js = "JSON.stringify(window.__INITIAL_STATE__?.rank?.book_list||[])";
const list = evalJSON(port, js);
return Array.isArray(list) ? list : [];
}
/** 批量获取真实书名、作者和简介:用同步 XHR 请求详情页解析 */
function fetchRealTitles(port, bookIds) {
if (!bookIds.length) return {};
const ids = JSON.stringify(bookIds);
const js = "JSON.stringify((()=>{const map={};var ids=" + ids + ";ids.forEach(function(id){try{var x=new XMLHttpRequest();x.open('GET','/page/'+id,false);x.send();var tm=x.responseText.match(/<title>([^<]+?)完整版/);var am=x.responseText.match(/\"author\":\"([^\"]+)\"/);var dm=x.responseText.match(/<meta\\s+name=\"description\"\\s+content=\"([^\"]+)\"/);if(!dm)dm=x.responseText.match(/\"abstract\":\"([^\"]{10,}?)\"/);map[id]={title:tm?tm[1]:'',author:am?am[1]:'',desc:dm?dm[1]:''}}catch(e){map[id]={title:'',author:'',desc:''}}});return map})())";
return evalJSON(port, js) || {};
}
/** 格式化在读数 */
function fmtReads(count) {
if (!count || count === "0") return "未知";
const n = parseInt(count, 10);
if (n >= 10000) return (n / 10000).toFixed(1) + "万";
return String(n);
}
/** 格式化字数 */
function fmtWords(count) {
if (!count) return "未知";
const n = parseInt(count, 10);
if (n >= 10000) return (n / 10000).toFixed(1) + "万";
return String(n);
}
/** 格式化状态 */
function fmtStatus(s) {
if (s === "1") return "连载中";
if (s === "0" || s === "2") return "已完结";
return s || "未知";
}
// ---------------------------------------------------------------------------
// 主流程
// ---------------------------------------------------------------------------
const args = process.argv.slice(2);
const PORT = parseInt(getArg(args, "--port") || "9222", 10);
const OUTDIR = getArg(args, "--outdir") || ".";
const CHANNEL = getArg(args, "--channel") || "1";
const TYPE = getArg(args, "--type") || "2";
function channelLabel(ch) {
return ch === "1" ? "男频" : "女频";
}
function typeLabel(t) {
return t === "2" ? "阅读榜" : "新书榜";
}
function scrapeChannel(ch, type) {
const chLabel = channelLabel(ch);
const tyLabel = typeLabel(type);
console.log(`\n→ 采集 ${chLabel}${tyLabel}...`);
// 用已知品类 ID 作为入口,确保菜单只显示当前频道/类型的品类
const initCatId = ch === "1" ? "1141" : "1139"; // 男频:西方奇幻 / 女频:古风世情
const initUrl = `https://fanqienovel.com/rank/${ch}_${type}_${initCatId}`;
ab(PORT, "open", initUrl);
sleep(3000);
const categories = extractCategories(PORT, ch, type);
if (!categories.length) {
console.log(` ⚠ 未提取到品类,跳过`);
return null;
}
console.log(` 发现 ${categories.length} 个品类`);
const now = new Date().toISOString();
const lines = [
`# 番茄 · ${chLabel}${tyLabel} · 全 ${categories.length} 题材`,
"",
`- 频道参数:channel=${ch},type=${type}`,
`- 抓取时间:${now}`,
`- 每题材上限 ≈ 20`,
"",
"---",
"",
];
for (let ci = 0; ci < categories.length; ci++) {
const cat = categories[ci];
console.log(` [${ci + 1}/${categories.length}] ${cat.name}`);
ab(PORT, "open", `https://fanqienovel.com${cat.href}`);
sleep(2500);
scrollLoad(PORT, 2);
const books = extractBookList(PORT);
if (!Array.isArray(books) || !books.length) {
lines.push(`## ${cat.name} — 0 本`, "", "---", "");
continue;
}
// 批量获取真实标题
const bookIds = books.map((b) => String(b.bookId));
const titles = fetchRealTitles(PORT, bookIds);
lines.push(`## ${cat.name} — ${books.length} 本`, "");
for (let i = 0; i < books.length; i++) {
const b = books[i];
const info = titles[String(b.bookId)] || {};
const title = info.title || `bookId:${b.bookId}`;
const author = info.author || "未知";
lines.push(`### #${i + 1} ${title}`);
lines.push(
`*${author} · ${fmtStatus(b.creationStatus)} · ${fmtReads(b.read_count)} 在读 · ${fmtWords(b.wordNumber)}字*`
);
lines.push(`**最新更新:** ${b.lastChapterTitle || "未知"}`);
lines.push(`[作品页](https://fanqienovel.com/page/${b.bookId})`);
if (info.desc) {
lines.push("");
lines.push("**简介**");
lines.push("");
lines.push(info.desc);
}
lines.push("");
}
lines.push("---", "");
}
return lines.join("\n");
}
function main() {
const channels = CHANNEL === "all" ? ["1", "0"] : [CHANNEL];
const types = TYPE === "all" ? ["2", "1"] : [TYPE];
for (const ch of channels) {
for (const ty of types) {
const content = scrapeChannel(ch, ty);
if (!content) continue;
const date = new Date().toISOString().slice(0, 10).replace(/-/g, "");
const filename = `番茄${channelLabel(ch)}${typeLabel(ty)}_全题材_${date}.md`;
const filepath = path.join(OUTDIR, filename);
fs.writeFileSync(filepath, content, "utf-8");
console.log(` ✓ 已保存: ${filepath}`);
}
}
}
main();