吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 2784|回复: 23
收起左侧

[其他原创] 分享一个用了多年自动下载百度贴吧图片的PHP脚本

  [复制链接]
hualove52pojie 发表于 2023-4-11 21:59
分享一个我经常用的下载百度贴吧的PHP脚本,贴吧有防爬机制,目前的做法是下载前设置cookie,然后下载完一张图片后停1-3秒,脚本基于phpspider,把脚本放到官方的demo目录,然后通过PHP命令行执行即可,代码如下:
[PHP] 纯文本查看 复制代码
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
ini_set("memory_limit", "10240M");
require_once __DIR__ . '/../autoloader.php';
use phpspider\core\phpspider;
use phpspider\core\requests;
use phpspider\core\selector;
 
shezhenba();
 
function shezhenba(){
    $pn = 0;
    $base_url = "https://tieba.baidu.com";
    do{
         
        $page = 'https://tieba.baidu.com/f?kw=%E8%88%8C%E8%AF%8A&ie=utf-8&pn='.$pn;
        print_r($page);
        echo "\n";
        $cookies = 'BIDUPSID=6CE1EC442CE375FA4F1E75D997B5ADE8; PSTM=1632754665; __yjs_duid=1_af1282fe895844b6fb1b5aa89c54bb0e1633790836871; BDUSS=diZzNnRlpLbEdQUGtsLWFQMW1YVlZtOEQyY05BeFN1ZkRPd2E2cE90Zk5WUXBpSVFBQUFBJCQAAAAAAAAAAAEAAABx2j4UYTg3MDA4OTQwMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM3I4mHNyOJhS1; BDUSS_BFESS=diZzNnRlpLbEdQUGtsLWFQMW1YVlZtOEQyY05BeFN1ZkRPd2E2cE90Zk5WUXBpSVFBQUFBJCQAAAAAAAAAAAEAAABx2j4UYTg3MDA4OTQwMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM3I4mHNyOJhS1; BAIDUID=5456774366A8C00FF86876486F0DB581:FG=1; BAIDUID_BFESS=5456774366A8C00FF86876486F0DB581:FG=1; ZFY=5eOz6iTAoRSXOQIWQeJoaZPN21JdsF2CCt9BuAy2iCg:C; FEID=v10-b28aceca5f39f5eed5e238c6d308f8895cd85927; __xaf_fpstarttimer__=1672475658503; __xaf_thstime__=1672475658530; __xaf_fptokentimer__=1672475658654; __bid_n=186203427bcc9936ac4207; STOKEN=7facc69ba79da44fc22e5eb5e5a7d07ccc9bae352015d2295cc8f59fa333124f; BAIDU_WISE_UID=wapp_1678945027782_802; arialoadData=false; USER_JUMP=-1; Hm_lvt_98b9d8c2fd6608d564bf2ac2ae642948=1678945026,1679021240; 339663473_FRSVideoUploadTip=1; video_bubble339663473=1; st_key_id=17; FPTOKEN=0WUptM9aTVWCpeGQDJAxkbvuBnYJEvx+UHMK/kHwRiS2ftyac7+BQU5NtXrjEWEwaidVOIKllHETC0hWzCLmjsst7cTDJD05zlR2JsF6QXrlKLmSUexFmPjZx9flG1VFBzc+rWOKKDRQmR+1WCOwPNwmJcZTztaw7/VhluW5e5WsU0al2cQ9ht543kQW3DpcP0nvvkqZTS476NgV/jVCQm/Rw2thUvk+bv07WS8AlhQcikrPkib9kttVCgvqVATfOVqg0cp2JQh2FW+R55z8AZLkjtvoz+qDEmONzN/8ln5LvrPZ64y31/UVZvSPgMygWx109gI8g8eq1y2godJfAlRmyBKnfo0zfWz7fMuCb2ErGg3elsS2BJXtjZQU526tmXdipVtK1o52bp5/XvAHlg==|aYVMzCSV3lKqH2S1CjYxc3CNBWz6g/IOlpi0b2YTnVw=|10|52cebbac2c97988cd9917e0963646e2c; wise_device=0; tb_as_data=4d723ef8e237b364f0ce33fd6b857e94fad1e624a2f04a3d47c1fb5a78d1708f77cba9a498e81b483fe7d2e9c821cbeba9735978f7673b06e8b35220fd8acdd211b9ab9f799c8b8e4c9959c575f82e5c9d6f488807979aa0dca83e1f949a9343; Hm_lpvt_98b9d8c2fd6608d564bf2ac2ae642948=1679021611; XFI=e7572c90-c46e-11ed-a256-dbfb24d09753; ab_sr=1.0.1_ZDYzM2JjYWY4ZjNmNGE3YzdjYzEzNjY3NjliNzhmNTY4ZGZjM2VhYWFkY2Q2ZDI0YzQwMGRhYjlmYmU2MTBlYjIyNTY5NTlhZjkwNTlhYWJjNzcxM2FjMWNjNmVhNmRhNzQ1ZjNiOGE1ZDY4ZDQ0NDY4OGMxZDFlZTA3YmNiMmY2Mzc2Mjc5NmZkMjFiMmM2ZGZkNDc5YjEzNzY1N2U0OTVjNGQyMmZjNGE0N2Q4MDRlMTAxYjIxZWI1ODFmNDQx; st_data=a0de6beb1908da41c2b9442b142823cf0d686519129581863ab7b383dfa3ec057af452673680dbe10f706d9126d8cbccd63ee984d9b58498e045c578ddd2a2677a09ec412fb57188ef388884c070689034bc9d3dedfb1693f057409b498941de; st_sign=d29e0556; BCLID=9729821696371937476; BCLID_BFESS=9729821696371937476; BDSFRCVID=HS8OJeCT5G09-d6fLcPAMeLbZdxIU97TTPjcTR5qJ04BtyCVcmiREG0Ptsp1nZLM_EGSogKK3gOTH4DF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; BDSFRCVID_BFESS=HS8OJeCT5G09-d6fLcPAMeLbZdxIU97TTPjcTR5qJ04BtyCVcmiREG0Ptsp1nZLM_EGSogKK3gOTH4DF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tbIJoDK5JDD3fP36q45HMt00qxby26n7Lbb9aJ5nQI5nhKIzb5jtX-trLtc-WPjz5TbA-CI5QUbmjRO206oay6O3LlO83h52aC5NKl0MLPb5qKjkWxvYBUL10UnMBMPe52OnaIbg3fAKftnOM46JehL3346-35543bRTLnLy5KJYMDcnK4-XDTcBDNQP; H_BDCLCKID_SF_BFESS=tbIJoDK5JDD3fP36q45HMt00qxby26n7Lbb9aJ5nQI5nhKIzb5jtX-trLtc-WPjz5TbA-CI5QUbmjRO206oay6O3LlO83h52aC5NKl0MLPb5qKjkWxvYBUL10UnMBMPe52OnaIbg3fAKftnOM46JehL3346-35543bRTLnLy5KJYMDcnK4-XDTcBDNQP; H_PS_PSSID=38185_36561_38410_38113_38127_38398_38170_38290_38235_37937_38314_38382_38285_38041_26350_38281_37881; XFCS=B220D1E2279D5EF5A7FDFBC9FF026AB479D506E9C98AADFC2AFAD41D119C37B8; XFT=9AaMbOWwNuyEUeuVBScwdldPcP4Ya/XM8TSwhDHC1dA=; RT="z=1&dm=baidu.com&si=d0fec9c8-7f18-45e5-b71b-6535e933dede&ss=lfbxw7hz&sl=i&tt=fpc&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=835e&ul=8dka';
        requests::set_cookies($cookies, 'tieba.baidu.com');
        $html = requests::get($page);
        $url = selector::select($html, "//div[@class='threadlist_lz clearfix']");
 
        foreach ($url as $key => $value) {
            preg_match('/href="(.*?)"/', $value,$matches);
 
            if ($matches[1] && "/p/5908153839" != $matches[1]) {
 
                $chtml = requests::get($base_url.$matches[1]);
                $data = selector::select($chtml, "//div[contains(@id,'post_content_')]");  
 
                if(empty($data)) {
                    continue;
 
                }
                foreach ($data as $key => $content) {
 
                    preg_match_all('/src="(.*?)"/', $content,$matches);
 
                    if ($matches[1]) {
 
                        foreach ($matches[1] as $key => $img_url) {
                            if(strpos($img_url,'image_') !== false || strpos($img_url,'i_') !== false || strpos($img_url,'gif') !== false || strpos($img_url,'mp4') !== false){
                                continue;
                            }
                            $arr = parse_url($img_url);
                            $arr2 = pathinfo($arr['path']);
                            print_r($img_url);echo "\n";
                            $file = "E:/zhongyiba2/" . $arr2["basename"];
                            if(!file_exists($file)){
                                file_put_contents($file,file_get_contents($img_url) );
                            }
                            // 停几秒,防止触发百度防爬算法
                            sleep(1);
                        }
                         
                    }
                }
 
                 
            }
        }
        $pn += 50;
        // 停几秒,防止触发百度防爬算法
        sleep(rand(1,5));
    }while($pn < 501) ;
}


如果有大佬知道更好的防爬机制,请赐教。

免费评分

参与人数 3吾爱币 +1 热心值 +3 收起 理由
d5850951556 + 1 我很赞同!
sht281 + 1 + 1 谢谢@Thanks!
zhangxiaoxiao + 1 谢谢@Thanks!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

cjx09231211 发表于 2023-4-11 22:48
可以啊,感谢感谢
AIIGE 发表于 2023-4-11 23:31
Sm0key 发表于 2023-4-12 00:01
kinghosty 发表于 2023-4-12 07:03
好软件,支持一下。给楼主点个赞。
q393810655 发表于 2023-4-12 07:20
感谢楼主分享~!
daoye9988 发表于 2023-4-12 07:27
必须支持一波
aa2923821a 发表于 2023-4-12 08:40
支持一波  谢谢大佬啦
qqdns 发表于 2023-4-12 09:16
感谢分享,这个不错
8970665 发表于 2023-4-12 10:15
高手快来改良
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2025-5-30 03:52

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表