[Python] 纯文本查看 复制代码
import requests
import re
import time
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5796.400 QQBrowser/10.2.2324.400'
}
ss_url = 'https://www.biquge5200.cc/modules/article/search.php?searchkey='
# fp = open('从前有座灵剑山.txt','w',encoding='gbk')
def book_name(book_name):
fp = open(book_name+'.txt','w',encoding='gbk') ### 下载到txt文件的名字
return fp
def get_wz(url,title,fp): ### 获取文章
res = requests.get(url,headers=headers)
res.encoding = 'gbk'
wz = re.findall('<div id="content">(.*?)</div>',res.text,re.S)
fp.write(title+'\n'+'-----------------------------------------------------------------------'+'\n'+wz[0].replace('<p>','').replace('</p>','\n')+'\n'+'-----------------------------------------------------------------------'+'\n'+'-----------------------------------------------------------------------'+'\n\n\n')
print(wz[0])
def get_wz_url(wz_url,fp): # 获取文章目录的函数
res = requests.get(wz_url,headers=headers)
res.encoding = 'gbk'
link = re.findall('<dd>.*?<a href="(.*?)">(.*?)</a>.*?</dd>',res.text,re.S)
for l in link:
get_wz(l[0],l[1],fp)
def get_book(url,fp): ##获得书籍信息
res = requests.get(url, headers=headers)
res.encoding = 'gbk'
title = re.findall('<td class="odd">.*?<a href="(.*?)">(.*?)</a>.*?</td>.*?<td class="even"><a href=.*?>(.*?)</a></td>',res.text,re.S)
author = re.findall('<td class="even">.*?</td>.*?<td class="odd">(.*?)</td>.*?<td class="even">(.*?)</td>.*?<td class="odd" align="center">(.*?)</td>.*?<td class="even" align="center">(.*?)</td>',res.text,re.S)
print('正在查找中...')
time.sleep(2)
book_url = []
book_name = []
print('书籍ID\t', '书籍名称\t\t\t', '书籍作者\t\t', '书籍字数\t', '最后更新时间\t', '连载状态\t', '书籍链接\t', '\n')
for i in range(0,len(title)):
print(i,'\t\t',title[i][1],'\t\t',author[i][0],'\t',author[i][1],'\t','\t',author[i][2],'\t',author[i][3],'\t',title[i][0], '\n')
book_url.append(title[i][0]) #### 将获取到的书籍链接存起来
book_name.append(title[i][1]) #### 将获取到的书籍名字存起来
xs_id = int(input('请输入要下载的小说ID:\n'))
get_wz_url(book_url[xs_id],fp)
# get_book_name(book_name[xs_id])
if __name__ == '__main__':
book = input('请输入你想要查找的书名:\n')
book_url = ss_url+book
fp = book_name(book)
get_book(book_url,fp)