文件预览

download_segmented.sh

查看 web-video-downloader网页视频下载器 技能包中的文件内容。

文件内容

scripts/download_segmented.sh

#!/bin/bash
# 分段视频下载脚本
# 用法: bash download_segmented.sh <api_url_or_vid> <output_path> [--site sohu]
#
# 支持的站点:
#   sohu - 搜狐视频(默认)
#   其他站点可扩展
#
# 示例:
#   bash download_segmented.sh 660968419 ~/Desktop/video.mp4 --site sohu
#   bash download_segmented.sh "https://my.tv.sohu.com/play/videonew.do?vid=660968419" ~/Desktop/video.mp4

set -e

INPUT="${1:?用法: download_segmented.sh <api_url_or_vid> <output_path> [--site <site>]}"
OUTPUT_PATH="${2:-~/Desktop/video.mp4}"
SITE="sohu"

# 解析参数
shift 2 2>/dev/null || true
while [[ $# -gt 0 ]]; do
  case $1 in
    --site) SITE="$2"; shift 2 ;;
    *) shift ;;
  esac
done

OUTPUT_DIR="/tmp/video_segments_$$"
BATCH_SIZE=5  # 每N个分片刷新API token

# 确保ffmpeg可用
ensure_ffmpeg() {
  if command -v ffmpeg &>/dev/null; then
    FFMPEG="ffmpeg"
  elif [ -x "/tmp/ffmpeg" ]; then
    FFMPEG="/tmp/ffmpeg"
  else
    echo "[ffmpeg] 下载静态版本..."
    curl -sL 'https://evermeet.cx/ffmpeg/getrelease/ffmpeg/zip' -o /tmp/ffmpeg.zip
    cd /tmp && unzip -o ffmpeg.zip && chmod +x /tmp/ffmpeg
    FFMPEG="/tmp/ffmpeg"
  fi
}

# ===== 搜狐视频下载 =====
download_sohu() {
  local vid="$1"
  
  # 构造API URL
  if [[ "$vid" == http* ]]; then
    API_URL="$vid"
  else
    API_URL="https://my.tv.sohu.com/play/videonew.do?vid=$vid&ver=1&ssl=1"
  fi
  
  echo "=== 搜狐视频下载 ==="
  echo "API: $API_URL"
  
  # 获取API数据
  get_api_data() {
    curl -s -L --max-time 15 \
      -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" \
      -H "Referer: https://yule.sohu.com/" \
      "$API_URL"
  }
  
  # 获取视频信息
  api_data=$(get_api_data)
  video_name=$(echo "$api_data" | python3 -c "import sys,json; print(json.load(sys.stdin)['data']['tvName'])" 2>/dev/null || echo "未知视频")
  total_segments=$(echo "$api_data" | python3 -c "import sys,json; print(len(json.load(sys.stdin)['data']['mp4PlayUrl']))" 2>/dev/null || echo "25")
  total_duration=$(echo "$api_data" | python3 -c "import sys,json; d=json.load(sys.stdin)['data']; print(sum(d['clipsDuration'])//1000)" 2>/dev/null || echo "?")
  
  echo "视频名称: $video_name"
  echo "分片数量: $total_segments"
  echo "总时长: ${total_duration}秒 ($((total_duration/60))分钟)"
  echo ""
  
  # 下载单个分片
  download_segment() {
    local index=$1
    local api_url=$2
    
    # 获取CDN真实地址
    cdn_url=$(curl -s -L --max-time 15 \
      -H "User-Agent: Mozilla/5.0" \
      -H "Referer: https://yule.sohu.com/" \
      "$api_url" 2>/dev/null | \
      python3 -c "import sys,json; d=json.load(sys.stdin); print(d['servers'][0]['url'] if d.get('servers') else '')" 2>/dev/null)
    
    if [ -z "$cdn_url" ]; then
      echo "  [$index] ❌ CDN获取失败"
      return 1
    fi
    
    # 下载分片
    curl -s -L --max-time 300 \
      -H "User-Agent: Mozilla/5.0" \
      -H "Referer: https://yule.sohu.com/" \
      "$cdn_url" -o "$OUTPUT_DIR/seg_$(printf '%02d' $index).mp4"
    
    local size
    size=$(stat -f%z "$OUTPUT_DIR/seg_$(printf '%02d' $index).mp4" 2>/dev/null || stat -c%s "$OUTPUT_DIR/seg_$(printf '%02d' $index).mp4" 2>/dev/null || echo 0)
    if [ "$size" -gt 100000 ]; then
      printf "  [%02d/%02d] ✅ %.1fMB\n" "$index" "$((total_segments-1))" "$(echo "$size" | awk '{print $1/1024/1024}')"
      return 0
    else
      echo "  [$index] ❌ 下载失败 (${size}B)"
      return 1
    fi
  }
  
  # 下载所有分片
  echo "=== 下载分片 ==="
  success=0
  failed=()
  
  for i in $(seq 0 $((total_segments-1))); do
    # 每BATCH_SIZE个刷新API
    if [ $((i % BATCH_SIZE)) -eq 0 ]; then
      api_data=$(get_api_data)
    fi
    
    api_url=$(echo "$api_data" | python3 -c "import sys,json; print(json.load(sys.stdin)['data']['mp4PlayUrl'][$i])")
    
    if download_segment "$i" "$api_url"; then
      success=$((success + 1))
    else
      failed+=($i)
    fi
  done
  
  echo ""
  echo "下载完成: $success/$total_segments"
  
  if [ ${#failed[@]} -gt 0 ]; then
    echo "失败分片: ${failed[*]}"
    # 重试失败的分片
    echo "重试失败分片..."
    for idx in "${failed[@]}"; do
      api_data=$(get_api_data)
      api_url=$(echo "$api_data" | python3 -c "import sys,json; print(json.load(sys.stdin)['data']['mp4PlayUrl'][$idx])")
      download_segment "$idx" "$api_url" && success=$((success+1))
    done
  fi
  
  if [ $success -lt $((total_segments - 5)) ]; then
    echo "❌ 分片下载不足,无法合并"
    exit 1
  fi
}

# ===== 合并视频 =====
merge_video() {
  echo ""
  echo "=== 合并视频 ==="
  
  # MP4 → TS
  echo "[1/3] MP4 → TS 转换..."
  for f in "$OUTPUT_DIR"/seg_*.mp4; do
    [ -f "$f" ] || continue
    ts_file="${f%.mp4}.ts"
    $FFMPEG -i "$f" -c copy -bsf:v h264_mp4toannexb -f mpegts "$ts_file" -y -loglevel warning 2>/dev/null
    echo "  $(basename "$f") → $(basename "$ts_file")"
  done
  
  # concat protocol 合并
  echo "[2/3] TS → MP4 合并..."
  local ts_list=""
  for f in $(ls "$OUTPUT_DIR"/seg_*.ts 2>/dev/null | sort); do
    [ -n "$ts_list" ] && ts_list="${ts_list}|"
    ts_list="${ts_list}${f}"
  done
  
  if [ -z "$ts_list" ]; then
    echo "❌ 没有TS文件可合并"
    exit 1
  fi
  
  $FFMPEG -i "concat:$ts_list" \
    -c copy -bsf:a aac_adtstoasc \
    -movflags +faststart \
    "$OUTPUT_PATH" -y -loglevel warning
  
  # 验证
  echo "[3/3] 验证..."
  if [ -f "$OUTPUT_PATH" ]; then
    local final_size
    final_size=$(stat -f%z "$OUTPUT_PATH" 2>/dev/null || stat -c%s "$OUTPUT_PATH")
    printf "\n✅ 完成: %.1fMB\n" "$(echo "$final_size" | awk '{print $1/1024/1024}')"
    echo "   文件: $OUTPUT_PATH"
    
    # 显示视频信息
    $FFMPEG -i "$OUTPUT_PATH" -hide_banner 2>&1 | grep -E "Duration|Stream" | head -5
  else
    echo "❌ 合并失败"
    exit 1
  fi
}

# ===== 主流程 =====
echo "=========================================="
echo "  通用网页视频下载器 - 分段模式"
echo "=========================================="
echo ""

ensure_ffmpeg
mkdir -p "$OUTPUT_DIR"

case "$SITE" in
  sohu)
    download_sohu "$INPUT"
    ;;
  *)
    echo "❌ 暂不支持站点: $SITE"
    echo "支持: sohu"
    echo ""
    echo "其他站点请使用CDP监听模式:"
    echo "  node scripts/cdp_capture.js <cdp_ws_url>"
    echo "  bash scripts/download_from_urls.sh /tmp/captured_video_urls.json output.mp4"
    exit 1
    ;;
esac

merge_video

# 清理
echo ""
read -p "清理临时分片文件?[Y/n] " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Nn]$ ]]; then
  rm -rf "$OUTPUT_DIR"
  echo "✅ 临时文件已清理"
else
  echo "📁 临时文件保留在: $OUTPUT_DIR"
fi