文件预览

extract_youtube_description.py

查看 YouTube Chinese Subtitle Burn-in 技能包中的文件内容。

文件内容

scripts/extract_youtube_description.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import re
import shutil
import subprocess
import sys
from pathlib import Path


def contains_chinese(text: str) -> bool:
    return bool(re.search(r"[\u4e00-\u9fff]", text))


def load_info(path_or_url: str) -> dict:
    path = Path(path_or_url)
    if path.exists():
        return json.loads(path.read_text(encoding="utf-8"))
    if shutil.which("yt-dlp") is None:
        raise SystemExit("yt-dlp is required when input is a URL")
    result = subprocess.run(
        ["yt-dlp", "--skip-download", "--dump-json", path_or_url],
        check=False,
        text=True,
        capture_output=True,
    )
    if result.returncode != 0:
        print(result.stdout, end="")
        print(result.stderr, end="")
        raise SystemExit(result.returncode)
    return json.loads(result.stdout)


def main() -> int:
    parser = argparse.ArgumentParser(description="Extract a YouTube video's metadata and description.")
    parser.add_argument("info_json_or_url", help="yt-dlp info JSON path or YouTube URL")
    parser.add_argument("--out", type=Path, required=True, help="Output Markdown file")
    args = parser.parse_args()

    info = load_info(args.info_json_or_url)
    title = info.get("title") or ""
    channel = info.get("channel") or info.get("uploader") or ""
    url = info.get("webpage_url") or info.get("original_url") or ""
    description = (info.get("description") or "").strip()
    args.out.parent.mkdir(parents=True, exist_ok=True)
    args.out.write_text(
        "\n".join(
            [
                f"# {title}",
                "",
                f"- URL: {url}",
                f"- Channel: {channel}",
                f"- Description language: {'Chinese or mixed Chinese' if contains_chinese(description) else 'non-Chinese or empty'}",
                "",
                "## Original Description",
                "",
                description,
                "",
            ]
        ),
        encoding="utf-8",
    )
    if description:
        print(f"PASS: wrote description to {args.out}")
        print("needs_translation:", "no" if contains_chinese(description) else "yes")
        return 0
    print(f"WARN: description is empty; wrote metadata to {args.out}")
    return 0


if __name__ == "__main__":
    sys.exit(main())