[Python] 纯文本查看 复制代码
import re
import requests
import pandas as pd
class ChaoXing:
def __init__(self, user, password):
self.password = password
self.user = user
self.work_data_list = []
self.class_lists = []
self.headers = {
"User-agent": "Mozilla/4.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36",
}
self.session = requests.session()
def session_token(self):
login_api = "https://passport2.chaoxing.com/api/login"
params = {
"name": self.user, # 学习通账号
"pwd": self.password, # 学习通密码
"verify": "0",
"schoolid": "",
}
self.session.get(login_api, params=params, headers=self.headers)
def class_list(self):
class_url = "http://mooc2-ans.chaoxing.com/visit/courses/list?v=1652629452722&rss=1&start=0&size=500&catalogId=0&searchname="
class_data = self.session.get(url=class_url,headers=self.headers).text.replace(' ',"")
obj = re.compile(r'"color1" href="(?P<src>.*?)".*?break-all;" title="(?P<name>.*?)">.*?<i class="icon-news"></i>',re.S)
result = obj.finditer(class_data)
for cx in result:
dict={}
dict['src']=cx.group("src")
dict['name']=cx.group("name")
self.class_lists.append(dict)
def class_data(self):
pf = pd.DataFrame(self.class_lists)
pf.index = pf.index + 1
data_dicts = {
"name": "课程名称",
}
pf.rename(columns=data_dicts, inplace=True)
print(pf)
a = int(input("请输入课程序号:"))
src = (self.class_lists[a-1]["src"])
class_res = self.session.get(url=src,headers=self.headers).text
work_url = re.findall(r'title="作业" data-url="(.*?)">',class_res,re.S)[0]
courseid = re.findall(r'name="courseid" value="(.*?)"/>',class_res,re.S)[0]
classid = re.findall(r'name="clazzid" value="(.*?)"/>', class_res, re.S)[0]
workenc = re.findall(r'name="workEnc" value="(.*?)">', class_res, re.S)[0]
self.work_class_url = work_url + "?courseId=" + courseid +"&classId=" + classid +"&enc="+workenc
work_class_url_res = self.session.get(url=self.work_class_url,headers=self.headers).text
works_url = re.findall(r'<li onclick="goTask.*?data="(.*?)">',work_class_url_res,re.S)
for i in works_url:
page_text = self.session.get(url=i,headers=self.headers).text
if "span data="" in page_text:
obj = re.compile(r'<h2 class="mark_title">(?P<name>.*?)</h2>.*?data="(?P<enc>.*?)" type="(?P<type>.*?)"',re.S)
else:
obj = re.compile(r'<h2 class="mark_title">(?P<name>.*?)</h2>.*?<p class="attachNew"><em style="font-style:normal; display:block;min-height:2px;"></em><span data="(?P<enc>.*?)" type="(?P<type>.*?)" name=".*?"',re.S)
class_result = obj.finditer(page_text)
for j in class_result:
dict={}
dict['enc'] = j.group("enc")
dict['type'] = j.group("type")
dict['file_name'] = (j.group("name")+"."+j.group("type")).replace(r"、",".")
self.work_data_list.append(dict)
pf = pd.DataFrame(self.work_data_list)
pf.index = pf.index + 1
data_dicts = {
"file_name": "文件名称",
}
pf.rename(columns=data_dicts, inplace=True)
print(pf)
def download_data(self):
work_nums = int(input("请输入你想下载文件的序号:"))
work_enc = self.work_data_list[work_nums-1]['enc']
work_type = self.work_data_list[work_nums-1]['type']
work_name = self.work_data_list[work_nums-1]['file_name']
work_url = "https://mooc1.chaoxing.com/ueditorupload/read?objectId=" + work_enc
headers = {
'Referer': self.work_class_url,
"User-agent": "Mozilla/4.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36",
}
if work_type == 'zip':
work_content = self.session.get(url=work_url,headers=headers,stream=True)
else:
work_res = self.session.get(url=work_url, headers=self.headers).text
file_url = re.findall(r'<a class="btnDown" href="(.*?)"><span>',work_res,re.S)[0]
work_content = self.session.get(url=file_url, headers=headers ,stream=True)
work_path = "./" + work_name
with open(work_path, 'wb') as f:
for chunk in work_content.iter_content(1024 * 1024 * 2):
f.write(chunk)
if __name__ == "__main__":
chaoxing = ChaoXing(user="", password="")
chaoxing.session_token()
chaoxing.class_list()
chaoxing.class_data()
chaoxing.download_data()