前言
在悬赏问答区有看到过求书,并且提到qq阅读,微信读书上上架了,那么怎么才能将电子书下载下来呢?这就是这个工具开发的原因。
解析&流程
因为QQ阅读·机构版可以通过注册国家图书馆账号或者其他机构账号来免费使用,就以QQ阅读·机构版为例(QQ阅读和微信读书流程类似)。
分析网站
其实这种需求的分析方式类似,基本一样,我的另一篇文章有类似的分析过程,这里直接放上相关接口给想要学习的伙伴们参考。
书籍相关的接口:
- 获取书籍详情信息:
https://weblicenseapi.qidian.com/book/getbookinfo?cbid=21162892001705206&appflag=clcn&version=1&platform=1&terminal=0
- 获取章节目录:
https://weblicenseapi.qidian.com/chapter/getallchapters?cbid=21162892001705206&appflag=clcn&version=1&platform=1&terminal=0
-
获取章节详情:
https://weblicenseapi.qidian.com/chapter/getchapterinfoandcontent?cbid=21162892001705206&ccid=56884406069021843&appflag=clcn&version=1&platform=1&terminal=0
> 有没有发现接口域名是起点的?!
token生成过程相关接口
经过测试发现token可以复用,我就没在程序里写,感兴趣的伙伴可以根据下面的内容自行添加到程序里,直接放结论,两个接口,分别是:
https://jinghecx.com/api/responsive/mobile-token?cbid=12601159404369906
https://weblicenseapi.qidian.com/public/login?cpusertoken=EBBDAA28370C98C55ACB64FF7B362CEB6108A20A804061A72C711DD25E683C20&appflag=shupu52676&version=1&platform=1&terminal=0
其中cpusertoken生成接口所需要的参数在local storage里,提交的时候需要添加到header里。
综上,分析过程结束,开始梳理编码流程
编码流程
- 获取书籍url(cbid)
- 书籍详情信息
- 获取章节目录
- 遍历获取章节详情
- 处理章节内容
- 图片本地化
- 锚点链接
- 生成epub文件
源码
import os
import re
import json
import hashlib
import requests
from datetime import datetime
from ebooklib import epub
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class EPUBGenerator:
def __init__(self, book_info, chapters,
css_url=None,
output_dir='output',
cache_dir='.css_cache'):
self.book_info = book_info
self.chapters = sorted(chapters, key=lambda x: x['chapterSort'])
self.css_url = css_url
self.output_dir = os.path.join(os.path.dirname(__file__), output_dir)
self.cache_dir = os.path.join(os.path.dirname(__file__), cache_dir)
os.makedirs(self.cache_dir, exist_ok=True)
os.makedirs(self.output_dir, exist_ok=True)
self.book = epub.EpubBook()
self.image_cache = {}
self.css_content = self._load_remote_css()
def _load_remote_css(self):
"""加载远程CSS并缓存"""
if not self.css_url:
return self._get_default_css()
cache_key = hashlib.md5(self.css_url.encode()).hexdigest()
cache_file = os.path.join(self.cache_dir, f"{cache_key}.css")
try:
if os.path.exists(cache_file):
file_age = datetime.now().timestamp() - os.path.getmtime(cache_file)
if file_age < 86400:
with open(cache_file, 'r', encoding='utf-8') as f:
return f.read()
session = requests.Session()
retries = Retry(total=3, backoff_factor=1,
status_forcelist=[500, 502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(self.css_url, timeout=10)
response.raise_for_status()
with open(cache_file, 'w', encoding='utf-8') as f:
f.write(response.text)
return response.text
except Exception as e:
print(f"远程CSS加载失败: {str(e)},使用备用样式")
return self._get_default_css()
def _get_default_css(self):
"""内置备用样式"""
return '''
body {
font-family: "Microsoft YaHei", sans-serif;
line-height: 1.6;
margin: 2em auto;
max-width: 800px;
padding: 0 1em;
}
h1, h2 {
color: #333;
border-bottom: 1px solid #eee;
padding-bottom: 0.3em;
}
img {
max-width: 100%;
height: auto;
}
'''
def _create_style_item(self):
"""创建样式表资源项"""
css_item = epub.EpubItem(
uid="remote_style",
file_name="styles/main.css",
media_type="text/css",
content=self.css_content
)
return css_item
def _process_content_images(self, content):
"""处理内容中的图片并添加到书籍资源"""
soup = BeautifulSoup(content, 'html.parser')
for img in soup.find_all('img'):
src = img.get('src', '')
if not src:
continue
if src not in self.image_cache:
try:
response = requests.get(src, verify=True)
response.raise_for_status()
img_name = f"image_{hashlib.md5(src.encode()).hexdigest()}.{src.split('.')[-1]}"
self.image_cache[src] = img_name
self.book.add_item(
epub.EpubImage(
uid=img_name,
file_name=f"images/{img_name}",
media_type='image/jpeg' if src.endswith('.jpg') else 'image/png',
content=response.content
)
)
except Exception as e:
print(f"图片下载失败: {str(e)}")
continue
img['src'] = f"images/{self.image_cache[src]}"
return str(soup)
def _process_content_links(self, content):
"""处理内容中的跨文件锚点链接"""
soup = BeautifulSoup(content, 'html.parser')
for a_tag in soup.find_all('a', href=True):
href = a_tag['href']
if '.xhtml#' in href:
anchor = href.split('#')[-1]
a_tag['href'] = f'#{anchor}'
elif href.startswith(('#', 'http://', 'https://')):
continue
elif '#' in href:
anchor = href.split('#')[-1]
a_tag['href'] = f'#{anchor}'
return str(soup)
def generate(self):
"""生成EPUB文件"""
self.book.set_identifier(self.book_info['cbid'])
self.book.set_title(self.book_info['title'])
self.book.add_author(self.book_info['author_name'])
self.book.set_language('zh-CN')
self.book.add_metadata('DC', 'description', self.book_info['intro'])
self.book.add_metadata('DC', 'publisher', self.book_info['publisher'])
self.book.add_metadata('DC', 'date', datetime.now().strftime('%Y-%m-%d'))
if self.book_info.get('coverUrl'):
try:
response = requests.get(self.book_info['coverUrl'].replace('/180', ''), verify=True)
self.book.set_cover(
"cover.jpg",
response.content,
create_page=False
)
except Exception as e:
print(f"封面下载失败: {str(e)}")
css_item = self._create_style_item()
self.book.add_item(css_item)
chapters = []
spine = []
toc = []
for idx, chapter in enumerate(self.chapters):
if not chapter.get('content'):
continue
chapter_item = self._create_chapter(chapter)
if chapter_item:
self.book.add_item(chapter_item)
chapters.append(chapter_item)
spine.append(chapter_item)
toc.append(epub.Link(chapter_item.file_name, chapter['chapterName']))
self.book.add_item(epub.EpubNav())
self.book.toc = toc
self.book.spine = spine + chapters
safe_title = re.sub(r'[\\/*?:"<>|]', "", self.book_info['title'])
safe_author_name = re.sub(r'[\\/*?:"<>|]', "", self.book_info['author_name'])
epub_filename = f"{safe_title}_{safe_author_name}_{ self.book_info['isbn']}.epub"
epub_path = os.path.join(self.output_dir, epub_filename)
epub.write_epub(epub_path, self.book, {})
return epub_path
def _create_chapter(self, chapter_info):
"""创建章节对象(增加有效性检查)"""
try:
content = self._process_content_images(chapter_info['content'])
content = self._process_content_links(content)
chapter = epub.EpubHtml(
title=chapter_info['chapterName'],
file_name=f"chapter_{chapter_info['chapterSort']}.xhtml",
lang='zh-CN'
)
chapter.content = f'''
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>{chapter_info['chapterName']}</title>
<link href="../styles/main.css" rel="stylesheet" type="text/css"/>
</head>
<body>
{content}
</body>
</html>
'''
return chapter
except Exception as e:
print(f"创建章节失败:{chapter_info['chapterName']} - {str(e)}")
return None
class QQRead:
default_css_url = 'https://ccstatic-1252317822.file.myqcloud.com/epubpublic/DEDD45/13310610603388406/OEBPS/Styles/stylesheets.css'
def __init__(self, responsive_org, responsive_session_id):
if not responsive_org:
raise ValueError("必须传入 responsive_org 参数!")
if not responsive_session_id:
raise ValueError("必须传入 responsive_session_id 参数!")
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
'Responsive-Org': responsive_org,
'Responsive-Session-Id': responsive_session_id
}
self.app_flag = ''
self.session = requests.Session()
retries = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
self.session.mount('https://', HTTPAdapter(max_retries=retries))
def _http_get(self, url, timeout=10):
try:
response = self.session.get(url, headers=self.headers, timeout=timeout, verify=True)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"HTTP GET 请求失败: {str(e)}")
return None
def _download_image(self, url, destination):
try:
response = self.session.get(url, headers=self.headers, verify=True)
response.raise_for_status()
with open(destination, 'wb') as f:
f.write(response.content)
return True
except Exception as e:
print(f"图片下载失败: {str(e)}")
return False
def get_book_info(self, cbid):
url = f"https://jinghecx.com/api/responsive/book?cbid={cbid}"
result = self._http_get(url)
return result.get('data') if result and result.get('code') == 0 else None
def get_mobile_token(self, cbid):
url = f"https://jinghecx.com/api/responsive/mobile-token?cbid={cbid}"
result = self._http_get(url)
return result.get('data') if result and result.get('code') == 0 else None
def _set_headers_token(self, cpusertoken):
url = f"https://weblicenseapi.qidian.com/public/login?cpusertoken={cpusertoken}&appflag={self.app_flag}&version=1&platform=1&terminal=0"
result = self._http_get(url)
if result and result.get('code') == 0:
data = result.get('data')
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
'token': data['token']
}
return data['token']
else:
return None
def get_all_chapters(self, cbid):
url = f"https://weblicenseapi.qidian.com/chapter/getallchapters?cbid={cbid}&appflag={self.app_flag}&version=1&platform=1&terminal=0"
result = self._http_get(url)
return result.get('data') if result and result.get('code') == 0 else None
def get_chapter_content(self, cbid, ccid):
url = f"https://weblicenseapi.qidian.com/chapter/getchapterinfoandcontent?cbid={cbid}&ccid={ccid}&appflag={self.app_flag}&version=1&platform=1&terminal=0"
result = self._http_get(url)
return result.get('data') if result and result.get('code') == 0 else None
def make_book_epub(self, cbid, output_dir=None):
book_info = self.get_book_info(cbid)
if not book_info:
raise ValueError("获取书籍内容失败")
if len(book_info['volume_list']) != 0:
raise ValueError("暂不支持纯文字类书籍")
mobile_token = self.get_mobile_token(cbid)
if not mobile_token:
raise ValueError("获取mobile-token失败")
self.app_flag = mobile_token['app_flag']
token = self._set_headers_token(mobile_token['token'])
if not token:
raise ValueError("获取token失败")
chapters = self.get_all_chapters(cbid)
if not chapters:
raise ValueError("获取章节失败")
chapters_with_content = []
for chapter in chapters:
try:
data = self.get_chapter_content(cbid, chapter['ccid'])
if data and data.get('content'):
chapters_with_content.append({
**chapter,
'content': data['content']
})
else:
print(f"章节内容为空:{chapter['chapterName']}")
except Exception as e:
print(f"获取章节失败:{chapter['chapterName']} - {str(e)}")
if chapters_with_content:
base_dir = os.path.dirname(__file__)
final_output_dir = os.path.join(base_dir, output_dir) if output_dir else os.path.join(base_dir, 'output')
generator = EPUBGenerator(book_info, chapters_with_content,self.default_css_url, final_output_dir)
epub_path = generator.generate()
print(f"EPUB已生成至:{os.path.relpath(epub_path, base_dir)}")
else:
print("错误:没有有效的章节内容")
if __name__ == '__main__':
responsive_org = 'zggjtsg'
responsive_session_id = 'F973A25A78197090B3B61ABB7EA941E1'
qq = QQRead(responsive_org,responsive_session_id)
qq.make_book_epub('29489178703580706', output_dir='my_books')
使用方法
- 注册QQ阅读·机构版
可以通过注册国家图书馆账号从而免费使用,还有很多其他机构都支持,具体方法自行搜索引擎搜索。
- 获取鉴权参数
打开开发者工具(F12),在控制台输入一下代码即可:
const responsive_org = localStorage.getItem('responsive-org');
const responsive_session_id = localStorage.getItem('responsive-session-id');
console.log("responsive_org = \'" + responsive_org + "'\n" + "responsive_session_id = \'" + responsive_session_id + "\'");
- 获取所需要的书籍cbid
就是url中cbid后面那串字符。
结果展示
注意
代码是针对出版物类的,有一些是网络小说的格式不同,程序并不支持,不过方法类似,可以自行修改,我也会在后续更新。
一些话
分享源码的目的是交流学习,请合理使用,最后希望大家每天都能有所收获吧~