[Python] 纯文本查看 复制代码
import os
import requests
from lxml import etree
import re
from io import BytesIO
from PIL import Image
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59'
}
# 保存文件
def xzwj(xz, head, path):
w_na = xz.split("/")[-1].rsplit(".", 1)[0] + '.jpg' # 拆分地址,以最后一段为存盘的文件名
w_rar = requests.get(xz, headers=head)
byte_stream = BytesIO(w_rar.content)
im = Image.open(byte_stream)
if im.mode == "RGBA":
im.load() # required for png.split()
background = Image.new("RGB", im.size, (255, 255, 255))
background.paste(im, mask=im.split()[3])
im.save(path + f'/{w_na}', 'JPEG')
print("正在下载,请耐心等待。。。")
def xzwj2(xz, head, path):
w_na = xz.split("/")[-1] # 拆分地址,以最后一段为存盘的文件名
w_rar = requests.get(xz, headers=head).content
with open(path + f'//{w_na}', 'wb') as f: # 修改e://debug// 处,改变文件存放位置。文件目录必须提前建好。
f.write(w_rar)
print("正在下载,请耐心等待。。。")
print(w_na, "下载完成")
# 解释压缩文件网址
def rar_xz(rar_url, head):
xz_res = requests.get(rar_url, headers=head).text
xz_rar = re.compile(r"window.location='(.*?)'")
xz = xz_rar.findall(xz_res)[0]
return xz
def xzxz(xx): # 文件下载
print("要下载的内容和网址是:", pna[xx - 1], pli[xx - 1])
print("开始下载展示图片".center(30, '-'))
r_resp = requests.get(pli[xx - 1], headers=headers)
r_tree = etree.HTML(r_resp.text)
# 使用xpth选择元素
r_imgs = r_tree.xpath('//div/div/p/img/@src') # 获取每张图片的网址
r_nas = r_tree.xpath('//div/div/p/img/@title') # 获取下载的图片的文件名
rar_url = r_tree.xpath('//div/div[@class="pay-box"]/a/@href')[0] # 获取下载文件的网址
n = 1
path = f"d://美女写真//{r_nas[0]}"
if not os.path.exists(path):
os.makedirs(path)
head = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
'referer': pli[xx - 1]
}
for img in r_imgs:
xzwj(img, head, path)
print(f"{r_nas[0][:-2]},第{n}张下载完成---")
n += 1
print("共", len(r_imgs), "张下载完成")
print("============开始下载压缩文件(文件很大)==========")
xz = rar_xz(rar_url, head)
xzwj2(xz, head, path)
def main_xz():
print("1.性感美女 2.清纯可爱 3.性感御姐 4.制服诱惑")
ms = input("请选择分类:")
url1 = "https://dimgw.us/xinggan"
url2 = "https://dimgw.us/qc"
url3 = "https://dimgw.us/yj"
url4 = "https://dimgw.us/zf"
if int(ms) == 1:
url = url1
elif int(ms) == 2:
url = url2
elif int(ms) == 3:
url = url3
elif int(ms) == 4:
url = url4
# print(url)
resp = requests.get(url, headers=headers)
t = re.findall('<h2 class="entry-title">(.*?)</a></h2>', resp.text, re.S)
a = 0
pna = []
pli = []
print("此类共", len(t), "页!")
print("*" * 50 + '\n')
for i in t:
# print(i)
t1 = re.findall('>(.*)', i, re.S)
t2 = re.findall('href=\"(.*?)\"', i, re.S)
a += 1
# 把t1,t2列表中的每个元素分别添加到列表pna,pli
pna.extend(t1)
pli.extend(t2)
# print(t2[0])
print(str(a) + ')', t1[0])
return pna, pli
pna, pli = main_xz()
var = 1
while var != len(pna):
xx = input("请输入要下载的序号(0退出):")
if xx == '':
continue
elif int(xx) != 0:
xzxz(int(xx))
else:
print("已退出!")
break
var += 1