功能说明
B 站的 Android App 缓存在手机的视频通常被音视频分离为 audio.m4s 和 video.m4s 两个文件,大多数的视频封面未缓存到手机,弹幕也都是以 danmaku.xml 的文件名存储在各个视频对应的目录中。而下面的代码可遍历 B 站缓存文件的目录,读取每个视频目录下 entry.json 中的元信息,并调用 FFmpeg 将分离的音视频合成为 MP4 格式,同时下载封面、复制弹幕文件,以视频的名称命名,并按 UP 主的 ID 和名称为目录进行归档,将文件修改时间改为与原始视频一致。
其中,代码中的 ts13_to_year、ts13_to_iso8601、set_file_timestamps 函数使用 AI 编写,路径处理时 pathlib 模块的最佳实践写法参考 AI 给出的框架。
代码实测成功,并且完成整个目录的视频转换。经过测试,50GB 大小的视频缓存目录可在 72 分钟内全部转换完成(FFmpeg 未开启多线程视频转换和硬件加速)。
运行环境
- Windows 系统
- Android 平台的哔哩哔哩
- FFmpeg 主流版本
- Python 3.10.x 及以上
- Pillow 图像处理库
源代码
import os
import re
import time
import json
import shutil
import requests
import datetime
import subprocess
from pathlib import Path
def sanitize_filename(filename):
"""清理文件名非法字符,限制长度"""
illegal_chars = r'[\/:*?"<>|]'
sanitized = re.sub(illegal_chars, '_', filename)
return sanitized[:200]
def get_ffmpeg_path():
"""获取FFmpeg路径,优先系统环境变量"""
ffmpeg_path = shutil.which("ffmpeg")
if ffmpeg_path:
return ffmpeg_path
raise FileNotFoundError("未找到 FFmpeg,请确保已安装并添加到系统环境变量")
def ts13_to_year(ts13):
"""13位时间戳转年份字符串"""
return str(datetime.datetime.fromtimestamp(int(ts13)//1000, datetime.UTC).year)
def ts13_to_iso8601(ts13: int = None) -> str:
"""13位毫秒时间戳转ISO 8601格式"""
ts = int(ts13) if ts13 else int(datetime.datetime.now().timestamp() * 1000)
if len(str(ts)) != 13:
raise ValueError("时间戳必须为13位毫秒级")
return (datetime.datetime.fromtimestamp(ts//1000, datetime.UTC)
.replace(microsecond=(ts%1000)*1000)
.strftime("%Y-%m-%dT%H:%M:%S.%fZ"))
def set_file_timestamps(file_path, create_ts, update_ts):
"""设置文件创建/修改时间(适配Windows)"""
create_time = create_ts / 1000
update_time = update_ts / 1000
os.utime(str(file_path), (update_time, update_time))
print(f"成功设置文件时间:创建时间={time.ctime(create_time)}, 修改时间={time.ctime(update_time)}")
def get_filename_without_ext(file_path):
"""获取无扩展名的文件名"""
file_str = str(file_path)
file_name = os.path.basename(file_str)
name_without_ext = os.path.splitext(file_name)[0]
return name_without_ext
def download_cover_image(cover_url, save_path, timeout=10):
"""下载封面图片"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(cover_url, headers=headers, timeout=timeout, stream=True)
response.raise_for_status()
with open(save_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"封面图片已下载:{save_path}")
return True
except Exception as e:
print(f"下载封面失败:{str(e)}")
return False
def get_available_filename(base_path, original_name, part_name=None):
"""处理一个投稿下的多 P 视频文件名重复"""
original_path = base_path / f"{original_name}.mp4"
if not original_path.exists():
return original_path
print(f"\n文件已存在:{original_path}")
while True:
choice = input("请选择处理方式 1.替换;2.忽略;3.重命名").strip()
if choice == "1":
return original_path
elif choice == "2":
return None
elif choice == "3":
new_name = original_name
if part_name and part_name.strip():
new_name = sanitize_filename(f"{original_name}_{part_name}")
new_path = base_path / f"{new_name}.mp4"
if not new_path.exists():
return new_path
index = 1
while True:
seq_name = f"{original_name}_{index}"
seq_path = base_path / f"{seq_name}.mp4"
if not seq_path.exists():
new_name = seq_name
break
index += 1
return base_path / f"{new_name}.mp4"
else:
print("输入无效,请选择1、2或3!")
def process_video(root_dir):
"""遍历目录处理视频合成、封面下载、弹幕复制"""
ffmpeg_path = get_ffmpeg_path()
root_path = Path(root_dir).resolve()
for entry_path in root_path.rglob("entry.json"):
with open(entry_path, 'r', encoding='utf-8') as f:
entry_data = json.load(f)
# 提取核心信息
title = entry_data.get("title", "")
bvid = entry_data.get("bvid", "")
owner_id = entry_data.get("owner_id", "")
owner_name = entry_data.get("owner_name", "")
type_tag = entry_data.get("type_tag", "")
cover_url = entry_data.get("cover", "")
time_create_stamp = entry_data.get("time_create_stamp", int(time.time() * 1000))
time_update_stamp = entry_data.get("time_update_stamp", int(time.time() * 1000))
creation_time = ts13_to_iso8601(time_create_stamp)
year = ts13_to_year(time_create_stamp)
page_data = entry_data.get("page_data", {})
part_name = page_data.get("part", "")
# 拼接文件路径
output_filename = sanitize_filename(title) + ".mp4"
entry_dir = entry_path.parent
media_dir = entry_dir / type_tag
audio_file = media_dir / "audio.m4s"
video_file = media_dir / "video.m4s"
danmaku_file = entry_dir / "danmaku.xml"
# 检查音视频文件
if not audio_file.exists() or not video_file.exists():
print(f"警告:{entry_path} 音视频文件缺失,跳过")
continue
# 创建归档目录
owner_dir_name = f"{owner_id}_{sanitize_filename(owner_name)}"
output_dir = root_path.parent / "output" / owner_dir_name
output_dir.mkdir(parents=True, exist_ok=True)
# 处理文件名重复
output_video_path = get_available_filename(output_dir, get_filename_without_ext(output_filename), part_name)
if output_video_path is None:
print(f"跳过处理:{title}")
continue
final_filename = get_filename_without_ext(output_video_path)
# 构建FFmpeg命令
cmd = [
ffmpeg_path,
"-i", str(video_file),
"-i", str(audio_file),
"-c:v", "copy",
"-c:a", "aac",
"-metadata", f"title={title}",
"-metadata", f"comment={bvid}",
"-metadata", f"artist={owner_name}",
"-metadata", f"creation_time={creation_time}",
"-metadata", f"date={year}",
"-hide_banner",
"-loglevel", "error",
str(output_video_path)
]
# 执行FFmpeg合成
print(f"正在处理:{title} -> {output_video_path}")
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"错误:处理 {title} 失败,FFmpeg输出:{result.stderr}")
else:
print(f"成功:{title} 已保存到 {output_video_path}")
set_file_timestamps(output_video_path, time_create_stamp, time_update_stamp)
# 下载封面
if cover_url:
cover_save_path = output_dir / f"{final_filename}.jpg"
download_cover_image(cover_url, cover_save_path)
# 复制弹幕文件
if danmaku_file.exists():
danmaku_save_path = output_dir / f"{final_filename}.xml"
shutil.copy2(danmaku_file, danmaku_save_path)
print(f"弹幕文件已复制:{danmaku_save_path}")
else:
print(f"警告:未找到弹幕文件 {danmaku_file}")
def main():
"""主函数:接收目录输入并启动处理"""
root_dir = input("请输入要处理的根目录路径:").strip()
if not os.path.isdir(root_dir):
print("错误:输入的目录不存在!")
return
process_video(root_dir)
print("\n所有可处理的视频已处理完成!")
if __name__ == "__main__":
main()
涉及到的目录、数据文件
哔哩哔哩 App 视频缓存目录结构演示
bilibili
├── 1******302
│ └── c1*******94
│ ├── 112
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json
├── 1******015
│ └── c1*******71
│ ├── 112
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json
├── 1******885
│ └── c2*******27
│ ├── 112
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json
├── 1******488
│ ├── c2*******09
│ │ ├── 112
│ │ │ ├── audio.m4s
│ │ │ ├── index.json
│ │ │ └── video.m4s
│ │ ├── danmaku.xml
│ │ └── entry.json
│ └── c2********50
│ ├── 80
│ │ ├── audio.m4s
│ │ ├── index.json
│ │ └── video.m4s
│ ├── danmaku.xml
│ └── entry.json
entry.json 文件键值结构
{
"media_type": 2,
"has_dash_audio": true,
"is_completed": true,
"total_bytes": 787******7,
"downloaded_bytes": 787******7,
"title": "【***】********班",
"type_tag": "112",
"cover": "http://i2.****.com/bfs/archive/92*********1d.jpg",
"video_quality": 112,
"prefered_video_quality": 112,
"guessed_total_bytes": 0,
"total_time_milli": 102*****2,
"danmaku_count": 2,
"time_update_stamp": 173*****604,
"time_create_stamp": 173*****605,
"can_play_in_advance": true,
"interrupt_transform_temp_file": false,
"quality_pithy_description": "1080P",
"quality_superscript": "高码率",
"variable_resolution_ratio": false,
"cache_version_code": 8****00,
"preferred_audio_quality": 0,
"audio_quality": 0,
"avid": 113*******9,
"spid": 0,
"seasion_id": 0,
"bvid": "BV19*******kg",
"owner_id": 4*******92,
"owner_name": "*******区",
"is_charge_video": false,
"verification_code": 0,
"page_data": {
"cid": 262*******6,
"page": 1,
"from": "vupload",
"part": "【***】*********班",
"link": "",
"rich_vid": "",
"has_alias": false,
"tid": 21,
"width": 1920,
"height": 1080,
"rotate": 0,
"download_title": "视频已缓存完成",
"download_subtitle": "【***】*********班"
}
}
代码存在的问题
L108 遇到输出文件名相同的视频时,如果选择替换文件,可能会由于 FFmpeg 线程阻塞而无法正常实现,后续将尝试将现有的视频直接删除是否可以实现替换。