import
requests
import
time
import
json
import
re
def
crawl_jd_books():
headers
=
{
'User-Agent'
:
'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
,
'referer'
:
'https://book.jd.com/'
}
books_info
=
[]
for
n
in
range
(
1
,
6
):
body
=
json.dumps({
"moduleType"
:
1
,
"page"
: n,
"pageSize"
:
20
,
"scopeType"
:
1
})
param
=
{
"callback"
:
"func"
,
"body"
: body,
"functionId"
:
"bookRank"
,
"client"
:
"e.jd.com"
,
"_"
:
int
(
round
(time.time()
*
1000
))
}
url
=
'https://gw-e.jd.com/client.action'
response
=
requests.get(url, params
=
param, headers
=
headers)
json_deal
=
re.search(
'func\((.*?)\}\)'
, response.text).group(
1
)
+
'}'
json_back
=
json.loads(json_deal)
book_list
=
json_back[
'data'
][
'books'
]
for
i
in
book_list:
book_id
=
i[
'bookId'
]
book_name
=
i[
'bookName'
]
publisher
=
i[
'publisher'
]
item_url
=
f
'https://item.jd.com/{book_id}.html'
sell_price
=
i[
'sellPrice'
]
define_price
=
i[
'definePrice'
]
if
define_price
=
=
'':
old_price
=
0
else
:
old_price
=
define_price
books_info.append((book_id, book_name, sell_price, old_price, publisher, item_url))
return
books_info
if
__name__
=
=
'__main__'
:
crawl_jd_books()