好友
阅读权限 20
听众
最后登录 1970-1-1
本帖最后由 ChinaLee 于 2020-8-27 19:43 编辑
爬取喜马拉雅有声小说音频
如需要下载VIP资源需要填入VIP cookies
[Python] 纯文本查看 复制代码
import requests
from lxml import etree
import os
import time
import json
def Get_ID_Name(url, headers):
Contents_IDS = []
r = requests.get(url, headers=headers)
r.encoding = r.apparent_encoding
html = etree.HTML(r.text)
Titles = html.xpath('//div[@class="sound-list _Qp"]/ul/li/div[2]/a/@title')
Contents_lists = html.xpath('//div[@class="sound-list _Qp"]/ul/li/div[2]/a/@href')
for Contents_list in Contents_lists:
links_Cache = str(Contents_list).split('/')[-1]
JsonURL = 'https://www.ximalaya.com/revision/play/v1/audio?id={}&ptype=1'.format(links_Cache)
Contents_IDS.append(JsonURL)
return Titles, Contents_IDS
def Json_Get_links(Contents_IDS, headers):
Itemlists = []
n = 0
for Contents_ID in Contents_IDS:
contents = {}
time.sleep(1.5)
r1 = requests.get(Contents_ID, headers=headers)
r1.encoding = r1.apparent_encoding
results = json.loads(r1.text)
id = results['data']['trackId']
m4alinks = results['data']['src']
contents['ID'] = id
contents['M4aLinks'] = m4alinks
Itemlists.append(contents)
n += 1
print('已采集{}个链接!'.format(n))
# print(Itemlists)
return Itemlists
def DownLoadM4A(Itemlists, filename):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
'If-None-Match': '"llN9ISnSdOkEmb835lC9NQ_j47Kl"',
'Host': 'fdfs.xmcdn.com',
}
if not os.path.exists('./XMLYFM'):
os.mkdir('./XMLYFM')
count = 0
for filename1, Itemlist in zip(filename, Itemlists):
srclinks = Itemlist['M4aLinks']
print(srclinks)
r2 = requests.get(srclinks, headers=headers)
print(r2.raise_for_status())
with open('./XMLYFM/' + str(filename1) + '.m4a', 'wb')as f:
f.write(r2.content)
count += 1
print('已下载{}个音频文件!'.format(count))
print("{}个音频文件已全部下载完成!".format(count))
if __name__ == '__main__':
print('正在加载...')
url = 'https://www.ximalaya.com/gerenchengzhang/29391994/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
'Cookie': '',
'Referer': 'https://www.ximalaya.com/gerenchengzhang/29391994/',
'Host': 'www.ximalaya.com'
}
data1 = Get_ID_Name(url, headers)
IDlinks = data1[1]
fileName = data1[0]
DownLoadM4A(Json_Get_links(IDlinks, headers), fileName)
感谢评分!
免费评分
参与人数 8 吾爱币 +12
热心值 +7
收起
理由
清华必修
+ 1
+ 1
感谢发布原创作品,吾爱破解论坛因你更精彩!
MSOLX
+ 1
+ 1
我很赞同!
virs520
+ 1
+ 1
我很赞同!
开创者
+ 1
谢谢@Thanks!
苏紫方璇
+ 5
+ 1
感谢发布原创作品,吾爱破解论坛因你更精彩!
yjn866y
+ 1
+ 1
我很赞同!
Joxing
+ 1
+ 1
热心回复!
wangvipxxx
+ 1
+ 1
<font style="vertical-align: inherit;"><font style=
查看全部评分
发帖前要善用【论坛搜索 】 功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。