吾爱破解 - LCG - LSG |安卓破解|病毒分析|www.52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 4632|回复: 42
收起左侧

[其他转载] 为了爬妹子图去学python, 学麻木了, 用老本行语言试着写了一下

[复制链接]
被遗忘的路人 发表于 2020-6-9 16:24
本帖最后由 被遗忘的路人 于 2020-6-22 15:56 编辑

前段时间弄那个python爬妹子图,有个前端的小伙伴问我能不能用前端的语音写一份,今天补上

本来上个月都发出来了,弄着弄着IP让禁了,一直没管,今天没事干打开一看好了,接着继续弄!


不会因为新建文件夹名字错误停止运行,小伙伴不用操心!


我这字是不是特别大?没事,我女朋友说我无论哪里都大!


一楼给放个使用教程


[JavaScript] 纯文本查看 复制代码
const fs = require("fs");
const https = require("https");
const cheerio = require("cheerio");
const request = require("request");

const requestUrl = "https://www.mzitu.com/";

// 规避文件命名
const regEx = new RegExp("[`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()&;|{}【】‘;:”“'。,、?]");

// 请求头部信息
const option = {
    headers: {
        "referer": requestUrl,
        "pragma": "no-cache",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"
    }
};

// 开始请求
const requestPlay = () => {
    https.get(requestUrl, option, (res) => {
        let thisHtml = "";
        res.on("data", function (callbackDom) {
            thisHtml += callbackDom;
        });
        res.on("end", function () {
            getPage(thisHtml);
        });
    });
};

// 获取一共有多少页
const getPage = (pageDom) => {
    let $ = cheerio.load(pageDom); // 解析HTML
    let maxPage = Number($(".pagination").find(".nav-links").find(".next").prev().text()); // 总共有多少页
    let pageArr = [];
    pageArr.push(requestUrl);
    for (let i = 2; i <= maxPage; i++) {
        pageArr.push(requestUrl + "page/" + i + "/");
    }
    getPicture(0, pageArr);
};

// 获取每页里面有多少套图
const getPicture = (palyIndex, pageArr) => {
    let requestUrl = pageArr[palyIndex];
    https.get(requestUrl, option, (res) => {
        let thisPicture = "";
        res.on("data", function (callbackDom) {
            thisPicture += callbackDom;
        });
        res.on("end", function () {
            let pictureArr = [];
            let $ = cheerio.load(thisPicture);
            let pictureDom = $("#pins").find("li");
            for (let i = 0; i < pictureDom.length; i++) {
                pictureArr.push({
                    name: pictureDom.eq(i).find("img").attr("alt"),
                    href: pictureDom.eq(i).find("a").attr("href")
                });
            }
            requestPicture(pictureArr, 0, ++palyIndex, pageArr);
        });
    });
};

// 请求每个套图图片
const requestPicture = (Picture, pictureIndex, palyIndex, pageArr) => {
    let requestUrl = Picture[pictureIndex].href;
    https.get(requestUrl, option, (res) => {
        let thisHtml = "";
        res.on("data", function (callbackDom) {
            thisHtml += callbackDom;
        });
        res.on("end", function () {
            pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
        });
    });
};

// 处理套图里面的图片
const pictureDom = (Picture, pictureIndex, palyIndex, pageArr, thisHtml) => {
    let $ = cheerio.load(thisHtml);
    let pictureSrc = $(".main-image").find("img").attr("src"); // 获取装载图片的节点个数
    downloadImg(Picture[pictureIndex].name, pictureSrc);
    let nextPage = $(".main-image").find("a").attr("href");
    if (nextPage.indexOf(Picture[pictureIndex].href) !== -1) {
        setTimeout(() => {
            https.get(nextPage, option, function (res) {
                let thisHtml = "";
                res.on("data", function (callbackDom) {
                    thisHtml += callbackDom;
                });
                res.on("end", function () {
                    pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
                });
            })
        }, 3000);
    } else {
        if (++pictureIndex < Picture.length) {
            pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
        } else {
            getPicture(palyIndex, pageArr);
        }
    }
};

const dealNmae = (name) => {
    let newName = "";
    for (let i = 0; i < name.length; i++) {
        newName = newName + name.substr(i,1).replace(regEx,'');
    }
    return newName;
};

// 下载图片
const downloadImg = (folderName, imgSrc) => {
    let newFolderName = dealNmae(folderName);
    if (!fs.existsSync(newFolderName)) {
        fs.mkdirSync(newFolderName);
    }
    let imgName = imgSrc.split("/").pop();
    request(imgSrc, option, (err,res) => {
        if(!err && res.statusCode === 200){
            console.info("文件夹名称【" + newFolderName + "】图片链接: " + imgSrc + " 已爬取完成!");
        }
    }).pipe(fs.createWriteStream(newFolderName + "/" + imgName));

};

// 调用开始请求
requestPlay();





再来一个爬 自拍图 的

[JavaScript] 纯文本查看 复制代码
const fs = require("fs");
const https = require("https");
const cheerio = require("cheerio");
const request = require("request");

const requestServerUrl = "https://自拍图.xyz/";

// 规避文件命名
const regEx = new RegExp("[`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()&;|{}【】‘;:”“'。,、?]");

// 请求头部信息
const option = {
    headers: {
        "referer": "https://xn--wcsr8yy8y.xyz/",
        "pragma": "no-cache",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"
    }
};

// 开始请求
const requestPlay = () => {
    https.get(requestServerUrl, option, (res) => {
        let thisHtml = "";
        res.on("data", function (callbackDom) {
            thisHtml += callbackDom;
        });
        res.on("end", function () {
            getPage(thisHtml);
        });
    });
};

// 获取一共有多少页
const getPage = (pageDom) => {
    let $ = cheerio.load(pageDom); // 解析HTML
    let maxPage = Number($(".num").text().split("/").pop()); // 总共有多少页
    let pageArr = [];
    pageArr.push(requestServerUrl);
    for (let i = 2; i <= maxPage; i++) {
        pageArr.push(requestServerUrl + "index_" + i + ".html");
    }
    getPicture(0, pageArr);
};

// 获取每页里面有多少套图
const getPicture = (palyIndex, pageArr) => {
    let requestUrl = pageArr[palyIndex];
    https.get(requestUrl, option, (res) => {
        let thisPicture = "";
        res.on("data", function (callbackDom) {
            thisPicture += callbackDom;
        });
        res.on("end", function () {
            let pictureArr = [];
            let $ = cheerio.load(thisPicture);
            let pictureDom = $("#tiles").find("li");
            for (let i = 0; i < pictureDom.length; i++) {
                pictureArr.push({
                    name: pictureDom.eq(i).find("a").text(),
                    href: requestServerUrl + JSON.stringify(pictureDom.eq(i) + '0').split(''')[1]
                });
            }
            requestPicture(pictureArr, 0, ++palyIndex, pageArr);
        });
    });
};

// 请求每个套图图片
const requestPicture = (Picture, pictureIndex, palyIndex, pageArr) => {
    let requestUrl = Picture[pictureIndex].href;
    https.get(requestUrl, option, (res) => {
        let thisHtml = "";
        res.on("data", function (callbackDom) {
            thisHtml += callbackDom;
        });
        res.on("end", function () {
            pictureDom(Picture, pictureIndex, palyIndex, pageArr, thisHtml);
        });
    });
};

// 处理套图里面的图片
const pictureDom = (Picture, pictureIndex, palyIndex, pageArr, thisHtml) => {
    let pictureArr = [];
    let playPicture = 0;
    let $ = cheerio.load(thisHtml);
    let pictureLen = $(".artical-content").find("img").length; // 获取装载图片的节点个数
    for (let i = 0; i < pictureLen; i++) {
        pictureArr.push($(".artical-content").find("img").eq(i).attr("src"));
    }
    let setDownloadImg = setInterval(() => {
        if (playPicture < pictureLen) {
            downloadImg(Picture[pictureIndex].name, pictureArr[playPicture]);
            ++playPicture;
        } else {
            clearInterval(setDownloadImg);
            if (++pictureIndex < Picture.length) {
                requestPicture(Picture, pictureIndex, palyIndex, pageArr);
            } else {
                getPicture(palyIndex, pageArr);
            }
        }
    }, 2000);
};

const dealNmae = (name) => {
    let newName = "";
    for (let i = 0; i < name.length; i++) {
        newName = newName + name.substr(i, 1).replace(regEx, '');
    }
    return newName;
};

// 下载图片
const downloadImg = (folderName, imgSrc) => {
    let newFolderName = dealNmae(folderName);
    if (!fs.existsSync(newFolderName)) {
        fs.mkdirSync(newFolderName);
    }
    let imgName = imgSrc.split("/").pop();
    request(imgSrc, option, (err, res) => {
        if (!err && res.statusCode === 200) {
            console.info("文件夹名称【" + newFolderName + "】图片链接: " + imgSrc + " 已爬取完成!");
        }
    }).pipe(fs.createWriteStream(newFolderName + "/" + imgName));

};

// 调用开始请求
requestPlay();




免费评分

参与人数 5吾爱币 +5 热心值 +4 收起 理由
tygb + 1 我很赞同!学习了
苏紫方璇 + 3 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
公子吖 + 1 + 1 热心回复!
王星星 + 1 热心回复!
漁滒 + 1 我很赞同!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

xiaohanGG 发表于 2020-6-22 16:53
本帖最后由 xiaohanGG 于 2020-6-22 16:54 编辑

"C:\Program Files\nodejs\node.exe" E:\webstorm\mzt\index.js
E:\webstorm\mzt\index.js:60
                    href: requestServerUrl + JSON.stringify(pictureDom.eq(i) + '0').split(''')[1]
                                                                                          ^^

SyntaxError: missing ) after argument list
    at wrapSafe (internal/modules/cjs/loader.js:1054:16)
    at Module._compile (internal/modules/cjs/loader.js:1102:27)
    at Object.Module._extensions..js (internal/modules/cjs/loader.js:1158:10)
    at Module.load (internal/modules/cjs/loader.js:986:32)
    at Function.Module._load (internal/modules/cjs/loader.js:879:14)
    at Function.executeUserEntryPoint [as runMain] (internal/modules/run_main.js:71:12)
    at internal/main/run_main_module.js:17:47

进程已结束,退出代码 1
xiaohanGG 发表于 2020-6-22 15:52
大佬看一下。运行不了。报错
"C:\Program Files\nodejs\node.exe" E:\webstorm\mzt\index.js
internal/modules/cjs/loader.js:969
  throw err;
  ^

Error: Cannot find module 'cheerio'
Require stack:
- E:\webstorm\mzt\index.js
    at Function.Module._resolveFilename (internal/modules/cjs/loader.js:966:15)
    at Function.Module._load (internal/modules/cjs/loader.js:842:27)
    at Module.require (internal/modules/cjs/loader.js:1026:19)
    at require (internal/modules/cjs/helpers.js:72:18)
    at Object.<anonymous> (E:\webstorm\mzt\index.js:3:17)
    at Module._compile (internal/modules/cjs/loader.js:1138:30)
    at Object.Module._extensions..js (internal/modules/cjs/loader.js:1158:10)
    at Module.load (internal/modules/cjs/loader.js:986:32)
    at Function.Module._load (internal/modules/cjs/loader.js:879:14)
    at Function.executeUserEntryPoint [as runMain] (internal/modules/run_main.js:71:12) {
  code: 'MODULE_NOT_FOUND',
  requireStack: [ 'E:\\webstorm\\mzt\\index.js' ]
}

进程已结束,退出代码 1
 楼主| 被遗忘的路人 发表于 2020-6-9 16:25
本帖最后由 被遗忘的路人 于 2020-6-10 09:39 编辑

我自己的运行方式:
1. 安装nodeJS 链接:https://nodejs.org/dist/v12.18.0/node-v12.18.0-x64.msi

2.安装webstorm 链接:https://www.jetbrains.com/webstorm/download/download-thanks.html

3.桌面新建 webstorm 文件夹

4.打开安装好的webstorm软件,目录选择刚才新建的 webstorm 文件夹

5.在webstorm文件夹里面新建 项目名 文件夹,比如 mzt

5.在 项目名 文件夹,右击新建 package.json 文件

6.在 项目名 文件夹,右击新建 index.js 文件,并且粘贴代码

7.在 index.js 编辑状态里面 右击 选择 Run 'index.js' 启动


这好像就是全部了
漁滒 发表于 2020-6-9 16:27
这是nodejs吗?以后有准备学习的计划
流浪星空 发表于 2020-6-9 16:28
社会的进步大多都是应为欲望
1983 发表于 2020-6-9 16:36
哈哈哈,4楼说的很对
 楼主| 被遗忘的路人 发表于 2020-6-9 16:36
aiai 发表于 2020-6-9 16:27
这是nodejs吗?以后有准备学习的计划

是的 nodeJS
ankoku 发表于 2020-6-9 16:37
这是python?怎么感觉像是Nodejs?
kesai 发表于 2020-6-9 16:37
高手高手啊
 楼主| 被遗忘的路人 发表于 2020-6-9 16:37
流浪星空 发表于 2020-6-9 16:28
社会的进步大多都是应为欲望

我是为了学习,欲望是其次
 楼主| 被遗忘的路人 发表于 2020-6-9 16:38

你的头像很好看,小妹妹
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则 警告:本版块禁止灌水或回复与主题无关内容,违者重罚!

快速回复 收藏帖子 返回列表 搜索

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-4-24 20:12

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表