文件预览

handler.py

查看 Skills of A-share announcement data released by ft.tech. 技能包中的文件内容。

文件内容

sub-skills/stock-reports-specific-url-hash/scripts/handler.py

#!/usr/bin/env python3
"""通过 url_hash 下载 A 股研报 PDF 文件"""
import argparse
import json
import os
import sys
import urllib.error
import urllib.request

BASE_URL = "https://market.ft.tech"


def _safe_output_path(path: str, base_dir: str | None = None) -> str:
    """将 output 规范为绝对路径,并限制在 base_dir 内,防止路径遍历。"""
    base_dir = (base_dir or os.getcwd()).rstrip(os.sep)
    base_abs = os.path.abspath(base_dir)
    resolved = os.path.abspath(os.path.normpath(path))
    if os.path.commonpath([base_abs, resolved]) != base_abs:
        print(
            json.dumps({"error": "output path must be under base directory", "base": base_abs},
                ensure_ascii=False),
            file=sys.stderr,
        )
        sys.exit(1)
    return resolved


def main():
    parser = argparse.ArgumentParser(description="通过 url_hash 下载 A 股研报 PDF")
    parser.add_argument("--url-hash", required=True, help="研报文件的 url_hash,从研报列表接口获取")
    parser.add_argument("--output", default=None, help="保存的文件名(默认 {url_hash}.pdf)")
    args = parser.parse_args()

    raw_output = args.output or f"{args.url_hash}.pdf"
    output = _safe_output_path(raw_output)
    url = f"{BASE_URL}/data/api/v1/market/data/report/stock-reports/{args.url_hash}"

    try:
        with urllib.request.urlopen(url) as resp:
            data = resp.read()

        if b"{" in data[:10]:
            try:
                err = json.loads(data.decode())
                print(json.dumps(err, ensure_ascii=False, indent=2), file=sys.stderr)
                sys.exit(1)
            except Exception:
                pass

        with open(output, "wb") as f:
            f.write(data)
        print(json.dumps({"saved_to": os.path.abspath(output), "size_bytes": len(data)}, ensure_ascii=False))
    except urllib.error.HTTPError as e:
        body = e.read().decode()
        try:
            err = json.loads(body)
        except Exception:
            err = {"error": body}
        print(json.dumps(err, ensure_ascii=False, indent=2), file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()