好友
阅读权限 10
听众
最后登录 1970-1-1
本帖最后由 被遗忘的路人 于 2020-6-22 15:56 编辑
前段时间弄那个python爬妹子图,有个前端的小伙伴问我能不能用前端的语音写一份,今天补上
本来上个月都发出来了,弄着弄着IP让禁了,一直没管,今天没事干打开一看好了,接着继续弄!
不会因为新建文件夹名字错误停止运行,小伙伴不用操心!
我这字是不是特别大?没事,我女朋友说我无论哪里都大!
一楼给放个使用教程
[JavaScript] 纯文本查看 复制代码
const fs = require("fs");
const https = require("https");
const cheerio = require("cheerio");
const request = require("request");
const requestUrl = "https://www.mzitu.com/";
// 规避文件命名
const regEx = new RegExp("[`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()&;|{}【】‘;:”“'。,、?]");
// 请求头部信息
const option = {
headers: {
"referer": requestUrl,
"pragma": "no-cache",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"
}
};
// 开始请求
const requestPlay = () => {
https.get(requestUrl, option, (res) => {
let thisHtml = "";
res.on("data", function (callbackDom) {
thisHtml += callbackDom;
});
res.on("end", function () {
getPage(thisHtml);
});
});
};
// 获取一共有多少页
const getPage = (pageDom) => {
let $ = cheerio.load(pageDom); // 解析HTML
let maxPage = Number($(".pagination").find(".nav-links").find(".next").prev().text()); // 总共有多少页
let pageArr = [];
pageArr.push(requestUrl);
for (let i = 2; i <= maxPage; i++) {
pageArr.push(requestUrl + "page/" + i + "/");
}
getPicture(0, pageArr);
};
// 获取每页里面有多少套图
const getPicture = (palyIndex, pageArr) => {
let requestUrl = pageArr[palyIndex];
https.get(requestUrl, option, (res) => {
let thisPicture = "";
res.on("data", function (callbackDom) {
thisPicture += callbackDom;
});
res.on("end", function () {
let pictureArr = [];
let $ = cheerio.load(thisPicture);
let pictureDom = $("#pins").find("li");
for (let i = 0; i < pictureDom.length; i++) {
pictureArr.push({
name: pictureDom.eq(i).find("img").attr("alt"),
href: pictureDom.eq(i).find("a").attr("href")
});
}
requestPicture(pictureArr, 0, ++palyIndex, pageArr);
});
});
};
// 请求每个套图图片
const requestPicture = (Picture, pictureIndex, palyIndex, pageArr) => {
let requestUrl = Picture[pictureIndex].href;
https.get(requestUrl, option, (res) => {
let thisHtml = "";
res.on("data", function (callbackDom) {
thisHtml += callbackDom;
});
res.on("end", function () {
pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
});
});
};
// 处理套图里面的图片
const pictureDom = (Picture, pictureIndex, palyIndex, pageArr, thisHtml) => {
let $ = cheerio.load(thisHtml);
let pictureSrc = $(".main-image").find("img").attr("src"); // 获取装载图片的节点个数
downloadImg(Picture[pictureIndex].name, pictureSrc);
let nextPage = $(".main-image").find("a").attr("href");
if (nextPage.indexOf(Picture[pictureIndex].href) !== -1) {
setTimeout(() => {
https.get(nextPage, option, function (res) {
let thisHtml = "";
res.on("data", function (callbackDom) {
thisHtml += callbackDom;
});
res.on("end", function () {
pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
});
})
}, 3000);
} else {
if (++pictureIndex < Picture.length) {
pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
} else {
getPicture(palyIndex, pageArr);
}
}
};
const dealNmae = (name) => {
let newName = "";
for (let i = 0; i < name.length; i++) {
newName = newName + name.substr(i,1).replace(regEx,'');
}
return newName;
};
// 下载图片
const downloadImg = (folderName, imgSrc) => {
let newFolderName = dealNmae(folderName);
if (!fs.existsSync(newFolderName)) {
fs.mkdirSync(newFolderName);
}
let imgName = imgSrc.split("/").pop();
request(imgSrc, option, (err,res) => {
if(!err && res.statusCode === 200){
console.info("文件夹名称【" + newFolderName + "】图片链接: " + imgSrc + " 已爬取完成!");
}
}).pipe(fs.createWriteStream(newFolderName + "/" + imgName));
};
// 调用开始请求
requestPlay();
再来一个爬 自拍图 的
[JavaScript] 纯文本查看 复制代码
const fs = require("fs");
const https = require("https");
const cheerio = require("cheerio");
const request = require("request");
const requestServerUrl = "https://自拍图.xyz/";
// 规避文件命名
const regEx = new RegExp("[`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()&;|{}【】‘;:”“'。,、?]");
// 请求头部信息
const option = {
headers: {
"referer": "https://xn--wcsr8yy8y.xyz/",
"pragma": "no-cache",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"
}
};
// 开始请求
const requestPlay = () => {
https.get(requestServerUrl, option, (res) => {
let thisHtml = "";
res.on("data", function (callbackDom) {
thisHtml += callbackDom;
});
res.on("end", function () {
getPage(thisHtml);
});
});
};
// 获取一共有多少页
const getPage = (pageDom) => {
let $ = cheerio.load(pageDom); // 解析HTML
let maxPage = Number($(".num").text().split("/").pop()); // 总共有多少页
let pageArr = [];
pageArr.push(requestServerUrl);
for (let i = 2; i <= maxPage; i++) {
pageArr.push(requestServerUrl + "index_" + i + ".html");
}
getPicture(0, pageArr);
};
// 获取每页里面有多少套图
const getPicture = (palyIndex, pageArr) => {
let requestUrl = pageArr[palyIndex];
https.get(requestUrl, option, (res) => {
let thisPicture = "";
res.on("data", function (callbackDom) {
thisPicture += callbackDom;
});
res.on("end", function () {
let pictureArr = [];
let $ = cheerio.load(thisPicture);
let pictureDom = $("#tiles").find("li");
for (let i = 0; i < pictureDom.length; i++) {
pictureArr.push({
name: pictureDom.eq(i).find("a").text(),
href: requestServerUrl + JSON.stringify(pictureDom.eq(i) + '0').split(''')[1]
});
}
requestPicture(pictureArr, 0, ++palyIndex, pageArr);
});
});
};
// 请求每个套图图片
const requestPicture = (Picture, pictureIndex, palyIndex, pageArr) => {
let requestUrl = Picture[pictureIndex].href;
https.get(requestUrl, option, (res) => {
let thisHtml = "";
res.on("data", function (callbackDom) {
thisHtml += callbackDom;
});
res.on("end", function () {
pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
});
});
};
// 处理套图里面的图片
const pictureDom = (Picture, pictureIndex, palyIndex, pageArr, thisHtml) => {
let pictureArr = [];
let playPicture = 0;
let $ = cheerio.load(thisHtml);
let pictureLen = $(".artical-content").find("img").length; // 获取装载图片的节点个数
for (let i = 0; i < pictureLen; i++) {
pictureArr.push($(".artical-content").find("img").eq(i).attr("src"));
}
let setDownloadImg = setInterval(() => {
if (playPicture < pictureLen) {
downloadImg(Picture[pictureIndex].name, pictureArr[playPicture]);
++playPicture;
} else {
clearInterval(setDownloadImg);
if (++pictureIndex < Picture.length) {
requestPicture(Picture, pictureIndex, palyIndex, pageArr);
} else {
getPicture(palyIndex, pageArr);
}
}
}, 2000);
};
const dealNmae = (name) => {
let newName = "";
for (let i = 0; i < name.length; i++) {
newName = newName + name.substr(i, 1).replace(regEx, '');
}
return newName;
};
// 下载图片
const downloadImg = (folderName, imgSrc) => {
let newFolderName = dealNmae(folderName);
if (!fs.existsSync(newFolderName)) {
fs.mkdirSync(newFolderName);
}
let imgName = imgSrc.split("/").pop();
request(imgSrc, option, (err, res) => {
if (!err && res.statusCode === 200) {
console.info("文件夹名称【" + newFolderName + "】图片链接: " + imgSrc + " 已爬取完成!");
}
}).pipe(fs.createWriteStream(newFolderName + "/" + imgName));
};
// 调用开始请求
requestPlay();
免费评分
参与人数 5 吾爱币 +5
热心值 +4
收起
理由
tygb
+ 1
我很赞同!学习了
苏紫方璇
+ 3
+ 1
欢迎分析讨论交流,吾爱破解论坛有你更精彩!
公子吖
+ 1
+ 1
热心回复!
王星星
+ 1
热心回复!
漁滒
+ 1
我很赞同!
查看全部评分