文件预览

inline_images.py

查看 Wechat Article Formatter 技能包中的文件内容。

文件内容

scripts/inline_images.py

#!/usr/bin/env python3
import argparse
import base64
import mimetypes
import os
import re
from bs4 import BeautifulSoup

def inline_local_images(html_path: str, output_path: str = None) -> None:
    if not os.path.exists(html_path):
        raise FileNotFoundError(f"HTML file not found: {html_path}")
        
    html_dir = os.path.dirname(os.path.abspath(html_path))
    
    with open(html_path, "r", encoding="utf-8") as f:
        content = f.read()
        
    soup = BeautifulSoup(content, "html.parser")
    images = soup.find_all("img")
    
    inlined_count = 0
    
    for img in images:
        src = img.get("src")
        if not src:
            continue
            
        src = src.strip()
        # 跳过网络图片和已经内联的 base64 图片
        if src.startswith(("http://", "https://", "data:")):
            continue
            
        # 拼接本地图片绝对路径
        img_abs_path = os.path.abspath(os.path.join(html_dir, src))
        
        if not os.path.exists(img_abs_path):
            print(f"Warning: Image file not found at: {img_abs_path}, skipping.")
            continue
            
        # 根据后缀推导 mime type,若无法识别则默认 image/png
        mime_type, _ = mimetypes.guess_type(img_abs_path)
        if not mime_type:
            ext = os.path.splitext(img_abs_path)[1].lower()
            if ext == '.svg':
                mime_type = 'image/svg+xml'
            else:
                mime_type = 'image/png'
                
        try:
            with open(img_abs_path, "rb") as f:
                img_data = f.read()
                base64_data = base64.b64encode(img_data).decode("utf-8")
                data_url = f"data:{mime_type};base64,{base64_data}"
                img["src"] = data_url
                inlined_count += 1
        except Exception as e:
            print(f"Error processing image {img_abs_path}: {e}")
            
    if inlined_count > 0:
        target_path = output_path if output_path else html_path
        # 使用 formatter 写入,保留原始排版
        with open(target_path, "w", encoding="utf-8") as f:
            f.write(str(soup))
        print(f"Successfully inlined {inlined_count} local image(s) in: {target_path}")
    else:
        print("No local images needed inlining.")

def main():
    parser = argparse.ArgumentParser(description="Inline local relative images in HTML as base64 Data URLs.")
    parser.add_argument("--html", required=True, help="Path to the target HTML content file.")
    parser.add_argument("--output", help="Optional path to output the modified HTML. If omitted, overwrites the input HTML in-place.")
    
    args = parser.parse_args()
    
    try:
        inline_local_images(args.html, args.output)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    import sys
    main()