Python合并B站APP视频缓存文件为MP4文件并将同一UP主的视频按文件夹归档

MowChan · 发表于 2026-2-21 22:31

功能说明

B 站的 Android App 缓存在手机的视频通常被音视频分离为 audio.m4s 和 video.m4s 两个文件，大多数的视频封面未缓存到手机，弹幕也都是以 danmaku.xml 的文件名存储在各个视频对应的目录中。而下面的代码可遍历 B 站缓存文件的目录，读取每个视频目录下 entry.json 中的元信息，并调用 FFmpeg 将分离的音视频合成为 MP4 格式，同时下载封面、复制弹幕文件，以视频的名称命名，并按 UP 主的 ID 和名称为目录进行归档，将文件修改时间改为与原始视频一致。

其中，代码中的 ts13_to_year、ts13_to_iso8601、set_file_timestamps 函数使用 AI 编写，路径处理时 pathlib 模块的最佳实践写法参考 AI 给出的框架。

代码实测成功，并且完成整个目录的视频转换。经过测试，50GB 大小的视频缓存目录可在 72 分钟内全部转换完成（FFmpeg 未开启多线程视频转换和硬件加速）。

运行环境

Windows 系统
Android 平台的哔哩哔哩
FFmpeg 主流版本
Python 3.10.x 及以上
Pillow 图像处理库

源代码

import os
import re
import time
import json
import shutil
import requests
import datetime
import subprocess
from pathlib import Path

def sanitize_filename(filename):
    """清理文件名非法字符，限制长度"""
    illegal_chars = r'[\/:*?"<>|]'
    sanitized = re.sub(illegal_chars, '_', filename)
    return sanitized[:200]

def get_ffmpeg_path():
    """获取FFmpeg路径，优先系统环境变量"""
    ffmpeg_path = shutil.which("ffmpeg")
    if ffmpeg_path:
        return ffmpeg_path
    raise FileNotFoundError("未找到 FFmpeg，请确保已安装并添加到系统环境变量")

def ts13_to_year(ts13):
    """13位时间戳转年份字符串"""
    return str(datetime.datetime.fromtimestamp(int(ts13)//1000, datetime.UTC).year)

def ts13_to_iso8601(ts13: int = None) -> str:
    """13位毫秒时间戳转ISO 8601格式"""
    ts = int(ts13) if ts13 else int(datetime.datetime.now().timestamp() * 1000)
    if len(str(ts)) != 13:
        raise ValueError("时间戳必须为13位毫秒级")

    return (datetime.datetime.fromtimestamp(ts//1000, datetime.UTC)
            .replace(microsecond=(ts%1000)*1000)
            .strftime("%Y-%m-%dT%H:%M:%S.%fZ"))

def set_file_timestamps(file_path, create_ts, update_ts):
    """设置文件创建/修改时间（适配Windows）"""
    create_time = create_ts / 1000
    update_time = update_ts / 1000
    os.utime(str(file_path), (update_time, update_time))
    print(f"成功设置文件时间：创建时间={time.ctime(create_time)}, 修改时间={time.ctime(update_time)}")

def get_filename_without_ext(file_path):
    """获取无扩展名的文件名"""
    file_str = str(file_path)
    file_name = os.path.basename(file_str)
    name_without_ext = os.path.splitext(file_name)[0]
    return name_without_ext

def download_cover_image(cover_url, save_path, timeout=10):
    """下载封面图片"""
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        }
        response = requests.get(cover_url, headers=headers, timeout=timeout, stream=True)
        response.raise_for_status()

        with open(save_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"封面图片已下载：{save_path}")
        return True
    except Exception as e:
        print(f"下载封面失败：{str(e)}")
        return False

def get_available_filename(base_path, original_name, part_name=None):
    """处理一个投稿下的多 P 视频文件名重复"""
    original_path = base_path / f"{original_name}.mp4"
    if not original_path.exists():
        return original_path

    print(f"\n文件已存在：{original_path}")
    while True:
        choice = input("请选择处理方式 1.替换；2.忽略；3.重命名").strip()
        if choice == "1":
            return original_path
        elif choice == "2":
            return None
        elif choice == "3":
            new_name = original_name
            if part_name and part_name.strip():
                new_name = sanitize_filename(f"{original_name}_{part_name}")
                new_path = base_path / f"{new_name}.mp4"
                if not new_path.exists():
                    return new_path

            index = 1
            while True:
                seq_name = f"{original_name}_{index}"
                seq_path = base_path / f"{seq_name}.mp4"
                if not seq_path.exists():
                    new_name = seq_name
                    break
                index += 1
            return base_path / f"{new_name}.mp4"
        else:
            print("输入无效，请选择1、2或3！")

def process_video(root_dir):
    """遍历目录处理视频合成、封面下载、弹幕复制"""
    ffmpeg_path = get_ffmpeg_path()
    root_path = Path(root_dir).resolve()

    for entry_path in root_path.rglob("entry.json"):
        with open(entry_path, 'r', encoding='utf-8') as f:
            entry_data = json.load(f)

        # 提取核心信息
        title = entry_data.get("title", "")
        bvid = entry_data.get("bvid", "")
        owner_id = entry_data.get("owner_id", "")
        owner_name = entry_data.get("owner_name", "")
        type_tag = entry_data.get("type_tag", "")
        cover_url = entry_data.get("cover", "")
        time_create_stamp = entry_data.get("time_create_stamp", int(time.time() * 1000))
        time_update_stamp = entry_data.get("time_update_stamp", int(time.time() * 1000))
        creation_time = ts13_to_iso8601(time_create_stamp)
        year = ts13_to_year(time_create_stamp)
        page_data = entry_data.get("page_data", {})
        part_name = page_data.get("part", "")

        # 拼接文件路径
        output_filename = sanitize_filename(title) + ".mp4"
        entry_dir = entry_path.parent
        media_dir = entry_dir / type_tag
        audio_file = media_dir / "audio.m4s"
        video_file = media_dir / "video.m4s"
        danmaku_file = entry_dir / "danmaku.xml"

        # 检查音视频文件
        if not audio_file.exists() or not video_file.exists():
            print(f"警告：{entry_path} 音视频文件缺失，跳过")
            continue

        # 创建归档目录
        owner_dir_name = f"{owner_id}_{sanitize_filename(owner_name)}"
        output_dir = root_path.parent / "output" / owner_dir_name
        output_dir.mkdir(parents=True, exist_ok=True)

        # 处理文件名重复
        output_video_path = get_available_filename(output_dir, get_filename_without_ext(output_filename), part_name)
        if output_video_path is None:
            print(f"跳过处理：{title}")
            continue

        final_filename = get_filename_without_ext(output_video_path)

        # 构建FFmpeg命令
        cmd = [
            ffmpeg_path,
            "-i", str(video_file),
            "-i", str(audio_file),
            "-c:v", "copy",
            "-c:a", "aac",
            "-metadata", f"title={title}",
            "-metadata", f"comment={bvid}",
            "-metadata", f"artist={owner_name}",
            "-metadata", f"creation_time={creation_time}",
            "-metadata", f"date={year}",
            "-hide_banner",
            "-loglevel", "error",
            str(output_video_path)
        ]

        # 执行FFmpeg合成
        print(f"正在处理：{title} -> {output_video_path}")
        result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')

        if result.returncode != 0:
            print(f"错误：处理 {title} 失败，FFmpeg输出：{result.stderr}")
        else:
            print(f"成功：{title} 已保存到 {output_video_path}")
            set_file_timestamps(output_video_path, time_create_stamp, time_update_stamp)

            # 下载封面
            if cover_url:
                cover_save_path = output_dir / f"{final_filename}.jpg"
                download_cover_image(cover_url, cover_save_path)

            # 复制弹幕文件
            if danmaku_file.exists():
                danmaku_save_path = output_dir / f"{final_filename}.xml"
                shutil.copy2(danmaku_file, danmaku_save_path)
                print(f"弹幕文件已复制：{danmaku_save_path}")
            else:
                print(f"警告：未找到弹幕文件 {danmaku_file}")

def main():
    """主函数：接收目录输入并启动处理"""
    root_dir = input("请输入要处理的根目录路径：").strip()
    if not os.path.isdir(root_dir):
        print("错误：输入的目录不存在！")
        return

    process_video(root_dir)
    print("\n所有可处理的视频已处理完成！")

if __name__ == "__main__":
    main()

涉及到的目录、数据文件

哔哩哔哩 App 视频缓存目录结构演示

bilibili
├── 1******302
│ └── c1*******94
│ ├── 112
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json
├── 1******015
│ └── c1*******71
│ ├── 112
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json
├── 1******885
│ └── c2*******27
│ ├── 112
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json
├── 1******488
│ ├── c2*******09
│ │ ├── 112
│ │ │ ├── audio.m4s
│ │ │ ├── index.json
│ │ │ └── video.m4s
│ │ ├── danmaku.xml
│ │ └── entry.json
│ └── c2********50
│ ├── 80
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json

`entry.json` 文件键值结构

{
  "media_type": 2,
  "has_dash_audio": true,
  "is_completed": true,
  "total_bytes": 787******7,
  "downloaded_bytes": 787******7,
  "title": "【***】********班",
  "type_tag": "112",
  "cover": "http://i2.****.com/bfs/archive/92*********1d.jpg",
  "video_quality": 112,
  "prefered_video_quality": 112,
  "guessed_total_bytes": 0,
  "total_time_milli": 102*****2,
  "danmaku_count": 2,
  "time_update_stamp": 173*****604,
  "time_create_stamp": 173*****605,
  "can_play_in_advance": true,
  "interrupt_transform_temp_file": false,
  "quality_pithy_description": "1080P",
  "quality_superscript": "高码率",
  "variable_resolution_ratio": false,
  "cache_version_code": 8****00,
  "preferred_audio_quality": 0,
  "audio_quality": 0,
  "avid": 113*******9,
  "spid": 0,
  "seasion_id": 0,
  "bvid": "BV19*******kg",
  "owner_id": 4*******92,
  "owner_name": "*******区",
  "is_charge_video": false,
  "verification_code": 0,
  "page_data": {
    "cid": 262*******6,
    "page": 1,
    "from": "vupload",
    "part": "【***】*********班",
    "link": "",
    "rich_vid": "",
    "has_alias": false,
    "tid": 21,
    "width": 1920,
    "height": 1080,
    "rotate": 0,
    "download_title": "视频已缓存完成",
    "download_subtitle": "【***】*********班"
  }
}

代码存在的问题

L108 遇到输出文件名相同的视频时，如果选择替换文件，可能会由于 FFmpeg 线程阻塞而无法正常实现，后续将尝试将现有的视频直接删除是否可以实现替换。

无名 · 发表于 2026-2-21 22:42

一行简短的ffmpeg的事

nknightS · 发表于 2026-2-21 23:07

加油继续探索吧虽然很基础的东西不过比其他只会玩游戏的同学好多了

Pythonless · 发表于 2026-2-22 00:17

提示: 作者被禁止或删除内容自动屏蔽

picoyiyi · 发表于 2026-2-22 00:26

用浏览器插件搭配IDM也可以很完美的满足

52kail · 发表于 2026-2-22 05:24

很欣赏这种研究的精神，加油加油！

yoyomi · 发表于 2026-2-22 10:27

学习了，支持一下

smallsinger629 · 发表于 2026-2-22 10:39

原来还能这样

木头人_01 · 发表于 2026-2-22 20:02

还没有学习到这一步呢加油加油

pptx · 发表于 2026-3-16 14:20

感谢楼主分享，学习了

，了解的知识又增加了

帐号		自动登录	找回密码
密码			注册[Register]

Pythonless Pythonless 当前离线好友阅读权限 0 听众最后登录 1970-1-1 头像被屏蔽	Pythonless 发表于 2026-2-22 00:17 《站点帮助文档》有什么问题来这里看看吧，这里有你想知道的内容！提示: 作者被禁止或删除内容自动屏蔽
	呼吁大家发布原创作品添加吾爱破解论坛标识！
	回复举报

[Python 转载] Python合并B站APP视频缓存文件为MP4文件并将同一UP主的视频按文件夹归档

功能说明

运行环境

源代码

涉及到的目录、数据文件

哔哩哔哩 App 视频缓存目录结构演示

`entry.json` 文件键值结构

代码存在的问题

免费评分

[Python 转载] Python合并B站APP视频缓存文件为MP4文件并将同一UP主的视频按文件夹归档

功能说明

运行环境

源代码

涉及到的目录、数据文件

哔哩哔哩 App 视频缓存目录结构演示

entry.json 文件键值结构

代码存在的问题

免费评分

`entry.json` 文件键值结构