[Asm] 纯文本查看 复制代码
import requests
# 职业标准下载
def save_paf(name,data):
with open(name+'.pdf', 'wb') as f:
f.write(data)
def get_data(standardInfo):
headers = {
'Host': 'www.osta.org.cn',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
'Accept': 'application/json, text/plain, */*',
'Referer': 'http://www.osta.org.cn/skillStandard',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': '_gscu_486005091=457139136nfl9219; Hm_lvt_e85984af56dd04582a569a53719e397f=1745713914,1745738702,1746495986',
}
params = {
'fileName': standardInfo,
}
response = requests.get('http://www.osta.org.cn/api/sys/downloadFile/decrypt', params=params,
headers=headers)
return response.content
def get_url_name(page):
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Referer': 'http://www.osta.org.cn/skillStandard',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
'Cookie': '_gscu_486005091=457139136nfl9219; Hm_lvt_e85984af56dd04582a569a53719e397f=1745713914,1745738702,1746495986',
}
params = {
'pageSize': '10',
'pageNum': page,
'total': '0',
'nameCode': '',
'status': '1',
}
response = requests.get(
'http://www.osta.org.cn/api/public/skillStandardList',
params=params,
headers=headers,
verify=False,
)
for i in response.json()["body"]["list"]:
print(i)
name = i["name"]+"-"+i['issueNumber']
base_data = get_data(i['standardInfo'])
save_paf(name,base_data)
if __name__ == '__main__':
# 在这里填写爬取的页面数,走第一页开始
get_url_name(3)