import
requests
import
re
from
bs4
import
BeautifulSoup
import
time
import
json
from
requests
import
exceptions
class
Baiduyun:
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
def
__init__(
self
,url):
self
.url
=
url
def
get_link(
self
):
match
=
re.search(
'https://pan.baidu.com/s/1(.{22})'
,
self
.url,re.S)
if
match:
id
=
match.group(
1
)
return
id
def
verify(
self
):
id
=
self
.get_link()
init_url
=
'https://pan.baidu.com/share/init?surl='
+
id
response
=
requests.get(init_url,headers
=
Baiduyun.headers)
if
response.status_code
=
=
200
:
response.encoding
=
'utf-8'
soup
=
BeautifulSoup(response.text,
'lxml'
)
if
soup.select(
'dl.pickpw.clearfix'
):
clearfix
=
soup.select(
'dl.pickpw.clearfix'
)[
0
]
notice
=
clearfix.dt.string
if
'请输入提取码'
in
notice:
print
(
'有效'
)
return
True
else
:
print
(
'已经失效'
)
return
False
else
:
print
(response.status_code)
print
(
'已经失效'
)
return
False
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
def
get_index(url):
index_list
=
[]
response
=
requests.get(url,headers
=
headers)
if
response.status_code
=
=
200
:
soup
=
BeautifulSoup(response.text,
'lxml'
)
bm_c
=
soup.select(
'#ct > div.mn > div.tl.bm > div.bm_c'
)[
0
]
items
=
bm_c.select(
'th > a'
)
for
item
in
items:
detial_info
=
{}
href
=
item[
'href'
]
title
=
item.string
detail_url
=
'https://www.52pojie.cn/'
+
href
detial_info[
'detail_url'
]
=
detail_url
detial_info[
'title'
]
=
title
index_list.append(detial_info)
return
index_list
def
get_detail(url):
response
=
requests.get(url,headers
=
headers)
if
response.status_code
=
=
200
:
response.encoding
=
'GB2312'
baiduyun_link_match
=
re.search(
'(https://pan.baidu.com/s/1.{22}).*?[提取码|密码].*?([A-Za-z0-9]{4})'
,response.text,re.S)
if
baiduyun_link_match:
raw_url
=
baiduyun_link_match.group(
1
)
password
=
baiduyun_link_match.group(
2
)
return
{
'url'
:raw_url,
'password'
:password
}
else
:
print
(
'status_code'
,response.status_code)
def
save_result(content):
with
open
(
'result.txt'
,
'a'
,encoding
=
'utf-8'
) as f:
f.write(json.dumps(content)
+
'\n'
)
if
__name__
=
=
"__main__"
:
for
i
in
range
(
1
,
18
):
try
:
url
=
'https://www.52pojie.cn/forum.php?mod=collection&action=view&ctid=1667&page={}'
.
format
(
str
(i))
print
(url)
index_list
=
get_index(url)
for
detial_info
in
index_list:
detail_url
=
detial_info[
'detail_url'
]
print
(detial_info[
'title'
])
result
=
get_detail(detail_url)
if
result:
result[
'title'
]
=
detial_info[
'title'
]
print
(result)
test_valid
=
Baiduyun(result[
'url'
])
isvalid
=
test_valid.verify()
if
isvalid:
save_result(result)
time.sleep(
1
)
except
exceptions as e:
time.sleep(
10
)
print
(e)
continue
except
:
time.sleep(
10
)
continue