from
PIL
import
Image
from
io
import
BytesIO
import
requests,re,os,json
from
urllib.parse
import
urlparse
u
=
'https://book.yunzhan365.com/zaidx/hxmv/mobile/javascript/config.js?VAWSLzFTvgCWKoouv0cMeg=='
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0'
}
ss
=
requests.session()
z
=
{
'title'
:'
','
page
':0,'
p':[]}
def
getimg(n,s):
_s
=
ss.get(s,headers
=
headers)
img
=
Image.
open
(BytesIO(_s.content))
print
(n,end
=
','
, flush
=
True
)
return
img
def
x0(u):
res0
=
ss.get(u,headers
=
headers)
_c
=
re.findall(
'src="javascript/config.js\?(.+?)"></script>'
,res0.text,re.S)
_u
=
urlparse(u).path.split(
'/'
)
if
_c:
u0
=
f
'https://book.yunzhan365.com/{_u[1]}/{_u[2]}/mobile/javascript/config.js?{_c[0]}'
res1
=
ss.get(u0,headers
=
headers)
res
=
json.loads(re.findall(
'({.*});'
,res1.text)[
0
])
title
=
res[
'meta'
][
'title'
]
pagelist
=
[]
if
res.get(
'fliphtml5_pages'
,
0
):
print
(
"找到html5列表,获取图片中..."
)
for
i
in
res[
'fliphtml5_pages'
]:
pagelist.append(i[
'n'
][
0
])
else
:
for
i
in
range
(
1
,res[
'bookConfig'
][
'totalPageCount'
]
+
1
):
pagelist.append(f
"{i}.jpg"
)
print
(f
"{title} / 共{len(pagelist)}页"
)
_path
=
res[
'bookConfig'
][
'largePath'
][
0
]
if
isinstance
(res[
'bookConfig'
][
'largePath'
],
list
)
else
res[
'bookConfig'
][
'largePath'
]
_path
=
_path.replace(
'..'
,'')
for
n,i
in
enumerate
(pagelist,
1
):
_i
=
f
"https://book.yunzhan365.com/{_u[1]}/{_u[2]}{_path}{i}"
img
=
getimg(n,_i)
z[
'p'
].append(img)
print
('')
print
(
"开始制作并合并成PDF..."
)
z[
'p'
][
0
].save(f
"./{title}.pdf"
,
"PDF"
, resolution
=
100.0
, save_all
=
True
, append_images
=
z[
'p'
][
1
:])
print
(f
"{os.getcwd ()}\{title}.pdf"
)
else
:
print
(u,
'识别错误'
)
if
__name__
=
=
'__main__'
:
_id
=
input
(
"输入书本网址:[例:https://book.yunzhan365.com/zaidx/hxmv/mobile/index.html]\n"
)
x0(_id)