本帖最后由 18382747915 于 2018-9-28 12:49 编辑
废话少说,看代码:
[Python] 纯文本查看 复制代码 import re,requests
def maoyan(url,i):
if i==0:
i=40
else:
i = i - 10
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0',
'Host': 'maoyan.com',
'Referer': 'http://maoyan.com/board/4?offset=%s'%i}
date=requests.get(url,headers=header)
html=date.text
paiming=re.findall('<i class="board-index board-index-(.*?)">(.*?)</i>',html,re.S)
mingcheng = re.findall('<p class="name"><a href="/films/(.*?)" title="(.*?)" data-act="boarditem-click" data-val="{movieId:(.*?)}">(.*?)</a></p>', html, re.S)
zhuyan = re.findall('<p class="star">(.*?)</p>', html, re.S)
shijian=re.findall('<p class="releasetime">(.*?)</p>',html,re.S)
for i ,l in enumerate(mingcheng):
name=l[1]
pm=paiming[i][0]
sj=shijian[i]
zy=zhuyan[i].strip()
print("--------------------------------")
print("排名:第%s名"%pm)
print("电影名称:%s"%name)
print(zy)
print(sj)
if __name__=='__main__':
num=0
while num<100:
maoyan("http://maoyan.com/board/4?offset=%s"%num,num)
num=num+10 |