文件预览

download_from_urls.sh

查看 web-video-downloader网页视频下载器 技能包中的文件内容。

文件内容

scripts/download_from_urls.sh

#!/bin/bash
# 从URL列表下载视频
# 用法: bash download_from_urls.sh <urls_json> <output_path>
#
# urls_json 格式(cdp_capture.js 输出):
# {
#   "urls": [
#     { "url": "https://...", "headers": { "Referer": "...", ... } },
#     ...
#   ]
# }
#
# 也支持纯文本URL列表(每行一个URL)

set -e

URLS_FILE="${1:?用法: download_from_urls.sh <urls_json_or_txt> <output_path>}"
OUTPUT_PATH="${2:-~/Desktop/video.mp4}"
OUTPUT_DIR="/tmp/video_segments_$$"

# 确保ffmpeg
ensure_ffmpeg() {
  if command -v ffmpeg &>/dev/null; then
    FFMPEG="ffmpeg"
  elif [ -x "/tmp/ffmpeg" ]; then
    FFMPEG="/tmp/ffmpeg"
  else
    echo "[ffmpeg] 下载静态版本..."
    curl -sL 'https://evermeet.cx/ffmpeg/getrelease/ffmpeg/zip' -o /tmp/ffmpeg.zip
    cd /tmp && unzip -o ffmpeg.zip && chmod +x /tmp/ffmpeg
    FFMPEG="/tmp/ffmpeg"
  fi
}

echo "=========================================="
echo "  从URL列表下载视频"
echo "=========================================="
echo "URL文件: $URLS_FILE"
echo "输出路径: $OUTPUT_PATH"
echo ""

ensure_ffmpeg
mkdir -p "$OUTPUT_DIR"

# 检测文件类型
if python3 -c "import json; json.load(open('$URLS_FILE'))" 2>/dev/null; then
  # JSON格式(CDP捕获输出)
  echo "检测到JSON格式URL列表"
  
  urls=$(python3 -c "
import json
with open('$URLS_FILE') as f:
    data = json.load(f)
entries = data.get('urls', data) if isinstance(data, dict) else data
for i, e in enumerate(entries):
    url = e['url'] if isinstance(e, dict) else e
    print(url)
" 2>/dev/null)
  
  # 提取headers(用于需要Referer的下载)
  headers_json=$(python3 -c "
import json
with open('$URLS_FILE') as f:
    data = json.load(f)
entries = data.get('urls', data) if isinstance(data, dict) else data
for e in entries:
    if isinstance(e, dict) and e.get('headers'):
        h = e['headers']
        referer = h.get('Referer', h.get('referer', ''))
        if referer:
            print(referer)
            break
" 2>/dev/null)
  
  REFERER="${headers_json:-}"
else
  # 纯文本格式
  echo "检测到文本格式URL列表"
  urls=$(cat "$URLS_FILE" | grep -v '^#' | grep -v '^$')
  REFERER=""
fi

# 分类URL
mp4_urls=()
m3u8_urls=()
ts_urls=()
other_urls=()

while IFS= read -r url; do
  [ -z "$url" ] && continue
  case "$url" in
    *.mp4*) mp4_urls+=("$url") ;;
    *.m3u8*) m3u8_urls+=("$url") ;;
    *.ts*) ts_urls+=("$url") ;;
    *) other_urls+=("$url") ;;
  esac
done <<< "$urls"

echo "URL分类:"
echo "  MP4:  ${#mp4_urls[@]} 个"
echo "  M3U8: ${#m3u8_urls[@]} 个"
echo "  TS:   ${#ts_urls[@]} 个"
echo "  其他: ${#other_urls[@]} 个"
echo ""

# 策略1: 如果有M3U8,优先用ffmpeg下载(最可靠)
if [ ${#m3u8_urls[@]} -gt 0 ]; then
  echo "=== 策略: M3U8/HLS流 ==="
  m3u8_url="${m3u8_urls[0]}"
  echo "M3U8地址: $m3u8_url"
  
  if [ -n "$REFERER" ]; then
    $FFMPEG -i "$m3u8_url" -headers "Referer: $REFERER" -c copy -movflags +faststart "$OUTPUT_PATH" -y
  else
    $FFMPEG -i "$m3u8_url" -c copy -movflags +faststart "$OUTPUT_PATH" -y
  fi
  
  if [ -f "$OUTPUT_PATH" ]; then
    echo "✅ 下载完成: $OUTPUT_PATH"
    exit 0
  fi
fi

# 策略2: 如果只有1个MP4,直接下载
if [ ${#mp4_urls[@]} -eq 1 ]; then
  echo "=== 策略: 单个MP4直链 ==="
  curl -L -o "$OUTPUT_PATH" -H "Referer: ${REFERER}" "${mp4_urls[0]}"
  
  if [ -f "$OUTPUT_PATH" ] && [ "$(stat -f%z "$OUTPUT_PATH" 2>/dev/null || stat -c%s "$OUTPUT_PATH")" -gt 100000 ]; then
    echo "✅ 下载完成: $OUTPUT_PATH"
    exit 0
  fi
fi

# 策略3: 多个MP4分片,下载后合并
if [ ${#mp4_urls[@]} -gt 1 ]; then
  echo "=== 策略: 多MP4分片下载+合并 ==="
  
  # 下载所有分片
  idx=0
  for url in "${mp4_urls[@]}"; do
    seg_file="$OUTPUT_DIR/seg_$(printf '%02d' $idx).mp4"
    printf "  下载分片 %02d/%02d..." "$idx" "${#mp4_urls[@]}"
    
    if [ -n "$REFERER" ]; then
      curl -s -L --max-time 300 -H "Referer: $REFERER" -o "$seg_file" "$url"
    else
      curl -s -L --max-time 300 -o "$seg_file" "$url"
    fi
    
    size=$(stat -f%z "$seg_file" 2>/dev/null || stat -c%s "$seg_file" 2>/dev/null || echo 0)
    if [ "$size" -gt 100000 ]; then
      printf " ✅ %.1fMB\n" "$(echo "$size" | awk '{print $1/1024/1024}')"
    else
      echo " ❌"
    fi
    
    idx=$((idx + 1))
  done
  
  # MP4 → TS
  echo "[合并] MP4 → TS..."
  for f in "$OUTPUT_DIR"/seg_*.mp4; do
    [ -f "$f" ] || continue
    $FFMPEG -i "$f" -c copy -bsf:v h264_mp4toannexb -f mpegts "${f%.mp4}.ts" -y -loglevel warning 2>/dev/null
  done
  
  # concat protocol
  echo "[合并] TS → MP4..."
  ts_list=""
  for f in $(ls "$OUTPUT_DIR"/seg_*.ts 2>/dev/null | sort); do
    [ -n "$ts_list" ] && ts_list="${ts_list}|"
    ts_list="${ts_list}${f}"
  done
  
  $FFMPEG -i "concat:$ts_list" -c copy -bsf:a aac_adtstoasc -movflags +faststart "$OUTPUT_PATH" -y -loglevel warning
  
  if [ -f "$OUTPUT_PATH" ]; then
    echo "✅ 下载完成: $OUTPUT_PATH"
  fi
fi

# 策略4: TS分片直接合并
if [ ${#ts_urls[@]} -gt 0 ] && [ ! -f "$OUTPUT_PATH" ]; then
  echo "=== 策略: TS分片下载+合并 ==="
  
  idx=0
  for url in "${ts_urls[@]}"; do
    ts_file="$OUTPUT_DIR/seg_$(printf '%02d' $idx).ts"
    printf "  下载TS分片 %02d/%02d..." "$idx" "${#ts_urls[@]}"
    curl -s -L --max-time 120 -o "$ts_file" "$url"
    size=$(stat -f%z "$ts_file" 2>/dev/null || stat -c%s "$ts_file" 2>/dev/null || echo 0)
    [ "$size" -gt 1000 ] && echo " ✅" || echo " ❌"
    idx=$((idx + 1))
  done
  
  ts_list=""
  for f in $(ls "$OUTPUT_DIR"/seg_*.ts 2>/dev/null | sort); do
    [ -n "$ts_list" ] && ts_list="${ts_list}|"
    ts_list="${ts_list}${f}"
  done
  
  $FFMPEG -i "concat:$ts_list" -c copy -movflags +faststart "$OUTPUT_PATH" -y -loglevel warning
  
  if [ -f "$OUTPUT_PATH" ]; then
    echo "✅ 下载完成: $OUTPUT_PATH"
  fi
fi

# 最终验证
if [ -f "$OUTPUT_PATH" ]; then
  final_size=$(stat -f%z "$OUTPUT_PATH" 2>/dev/null || stat -c%s "$OUTPUT_PATH")
  printf "\n✅ 完成: %.1fMB\n" "$(echo "$final_size" | awk '{print $1/1024/1024}')"
  $FFMPEG -i "$OUTPUT_PATH" -hide_banner 2>&1 | grep -E "Duration|Stream" | head -5
else
  echo "❌ 下载失败"
  exit 1
fi

# 清理
rm -rf "$OUTPUT_DIR"
echo "临时文件已清理"