文件内容
scripts/inline_images.py
#!/usr/bin/env python3
import argparse
import base64
import mimetypes
import os
import re
from bs4 import BeautifulSoup
def inline_local_images(html_path: str, output_path: str = None) -> None:
if not os.path.exists(html_path):
raise FileNotFoundError(f"HTML file not found: {html_path}")
html_dir = os.path.dirname(os.path.abspath(html_path))
with open(html_path, "r", encoding="utf-8") as f:
content = f.read()
soup = BeautifulSoup(content, "html.parser")
images = soup.find_all("img")
inlined_count = 0
for img in images:
src = img.get("src")
if not src:
continue
src = src.strip()
# 跳过网络图片和已经内联的 base64 图片
if src.startswith(("http://", "https://", "data:")):
continue
# 拼接本地图片绝对路径
img_abs_path = os.path.abspath(os.path.join(html_dir, src))
if not os.path.exists(img_abs_path):
print(f"Warning: Image file not found at: {img_abs_path}, skipping.")
continue
# 根据后缀推导 mime type,若无法识别则默认 image/png
mime_type, _ = mimetypes.guess_type(img_abs_path)
if not mime_type:
ext = os.path.splitext(img_abs_path)[1].lower()
if ext == '.svg':
mime_type = 'image/svg+xml'
else:
mime_type = 'image/png'
try:
with open(img_abs_path, "rb") as f:
img_data = f.read()
base64_data = base64.b64encode(img_data).decode("utf-8")
data_url = f"data:{mime_type};base64,{base64_data}"
img["src"] = data_url
inlined_count += 1
except Exception as e:
print(f"Error processing image {img_abs_path}: {e}")
if inlined_count > 0:
target_path = output_path if output_path else html_path
# 使用 formatter 写入,保留原始排版
with open(target_path, "w", encoding="utf-8") as f:
f.write(str(soup))
print(f"Successfully inlined {inlined_count} local image(s) in: {target_path}")
else:
print("No local images needed inlining.")
def main():
parser = argparse.ArgumentParser(description="Inline local relative images in HTML as base64 Data URLs.")
parser.add_argument("--html", required=True, help="Path to the target HTML content file.")
parser.add_argument("--output", help="Optional path to output the modified HTML. If omitted, overwrites the input HTML in-place.")
args = parser.parse_args()
try:
inline_local_images(args.html, args.output)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
import sys
main()