import
requests
from
requests.exceptions
import
RequestException
from
bs4
import
BeautifulSoup
import
re
import
time
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
}
def
get_html(url):
try
:
response
=
requests.get(url,headers
=
headers)
response.raise_for_status()
response.encoding
=
'GB2312'
return
response.text
except
:
print
(
'请求失败'
)
def
parse_html(html):
soup
=
BeautifulSoup(html,
"lxml"
)
thread
=
soup.select(
'#threadlisttableid'
)[
0
]
tbodys
=
thread.find_all(
id
=
re.
compile
(
'normalthread'
))
for
tbody
in
tbodys:
title
=
tbody.select(
'a'
)[
2
].string
try
:
with
open
(
'wuai_statistics.txt'
,
'a'
, encoding
=
'utf-8'
) as f:
f.write(title
+
'\n'
)
f.close()
except
:
continue
def
main(page):
url
=
'https://www.52pojie.cn/forum.php?mod=forumdisplay&fid=8&specialtype=reward&filter=specialtype&specialtype=reward&rewardtype=1&page='
+
str
(page)
html
=
get_html(url)
parse_html(html)
if
__name__
=
=
'__main__'
:
for
page
in
range
(
1
,
201
):
print
(
'正在保存第'
+
str
(page)
+
'页'
)
main(page)
print
(
'成功'
)