吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 8516|回复: 54
上一主题 下一主题
收起左侧

[Python 原创] 抖音搜索视频采集

  [复制链接]
跳转到指定楼层
楼主
13955925361 发表于 2025-4-10 21:21 回帖奖励
本帖最后由 13955925361 于 2025-5-1 13:59 编辑

这次给大家分享抖音搜索数据采集,注意是在视频区,我的cookies如果过期了要换自己网页上的,
使用方法也是一如既往的简单,输入想搜索的关键词即可,获得全部视频并命名保存在文件夹内。


[Python] 纯文本查看 复制代码
import requests
import  os

##请注意,务必使用自己电脑上搜索网页产生的cookies
cookies = {
    'device_web_cpu_core': '32',
    'device_web_memory_size': '8',
    'architecture': 'amd64',
    'csrf_session_id': 'ad3faf6a949237d5af255894374b89ff',
    'webcast_leading_last_show_time': '1706274219677',
    'webcast_leading_total_show_times': '1',
    'webcast_local_quality': 'origin',
    'passport_fe_beating_status': 'true',
    'ttwid': '1%7Cj0AB1Pl_aVbAjGwE4P9_3H5KBw7ZA0yqZ-wsbk--sK0%7C1740372914%7Cd386f650da113d88512b6460bfa04f26f0c825f7a53abb7c264b3f2c2974215c',
    'x-web-secsdk-uid': 'af66562d-005d-4f16-8e4a-271cf2c137f3',
    'hevc_supported': 'true',
    'fpk1': 'U2FsdGVkX19skYOCWrK0wT5e5n6BV8l93DWMmAaH5s98G0h9oYVdqy8gL8FY7f6M2B0NpCTlg/GdPMi86+ev7g==',
    'fpk2': '9258db5fffd4f17a8703a19e760af505',
    'odin_tt': '712ca6a9cffa424266aae08ebe4d32d1e6c0f535013ff0be84d2f1634a07b1cbc45f3cd7a22e3c7df641ef637068418fb780411c8aeffcfd47de0e943823d780f9fcabdb4129bbf4d935be3ac9a836ce',
    'UIFID': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
    'dy_swidth': '1707',
    'dy_sheight': '960',
    's_v_web_id': 'verify_m9a3id25_jv982Snw_4Egf_42Zu_9Osa_L79yf6GHm4Kv',
    'is_dash_user': '1',
    'volume_info': '%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Atrue%2C%22volume%22%3A0.201%7D',
    'xgplayer_user_id': '818224755114',
    'passport_csrf_token': '7ea9d1cb10eb7b782cfee69890eea146',
    'passport_csrf_token_default': '7ea9d1cb10eb7b782cfee69890eea146',
    'FORCE_LOGIN': '%7B%22videoConsumedRemainSeconds%22%3A180%7D',
    '__security_mc_1_s_sdk_crypt_sdk': 'e581bcff-4b0c-821a',
    '__security_mc_1_s_sdk_cert_key': 'd7b521c3-4836-8a84',
    '__security_mc_1_s_sdk_sign_data_key_web_protect': '14766a15-43d8-acf4',
    'bd_ticket_guard_client_web_domain': '2',
    'SearchMultiColumnLandingAbVer': '1',
    'SEARCH_RESULT_LIST_TYPE': '%22multi%22',
    'xgplayer_device_id': '76706908427',
    'stream_recommend_feed_params': '%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1707%2C%5C%22screen_height%5C%22%3A960%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A32%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A50%7D%22',
    'strategyABtestKey': '%221744218661.635%22',
    'SearchColumnSwitchLog': '%5B%7B%22date%22%3A%222025-04-09%22%2C%22latestColumnType%22%3A%22multi%22%7D%2C%7B%22date%22%3A%222025-04-10%22%2C%22latestColumnType%22%3A%22multi%22%7D%5D',
    'download_guide': '%223%2F20250410%2F0%22',
    'home_can_add_dy_2_desktop': '%221%22',
    'biz_trace_id': '10a3bd37',
    'bd_ticket_guard_client_data': 'eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTVJjNHh5RnVOdTFtR0JmV3Z3ZEhaZkp1MUVFVjVWdjJiWHBZNTZ2OHY4YzhJN2VBRkVlZ0VoK1NGY1I5MTFBNWpVcndWeExwcjVVeG5HbG11MUJIZ0E9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D',
    'gulu_source_res': 'eyJwX2luIjoiOWVjNWI1NGQzNWVlMDQ1YzJlZjdlOGUwZDI0NmQ5NTE2MzVmZTQ5YWRiNTA1Njg3ZjFkM2MyMzJhOTc2MzEzOSJ9',
    'xg_device_score': '7.8003162307251666',
    'sdk_source_info': '7e276470716a68645a606960273f276364697660272927676c715a6d6069756077273f276364697660272927666d776a68605a607d71606b766c6a6b5a7666776c7571273f275e58272927666a6b766a69605a696c6061273f27636469766027292762696a6764695a7364776c6467696076273f275e5827292771273f2732313d313c30313d3731313234272927676c715a75776a716a666a69273f2763646976602778',
    'bit_env': 'RZd5QHG5wBOkX44_tiN7wJ9iXIgwJevXDML38TE3gv7gyTJ5UTUCF_MfgTiqqplcoxoake1d3johbioUxEmlByKaBFBP0sgypqhkd81i9ZH8CddJoFJQatNaJAkEjbNwksTCAk4iTf444nVBPct_lPDuUU1cjwJY4cyEzNXk3xxpMcUzSIp3vlbVEMjTAttSkak9pZNq2sRSJJRgKkebRSDc8QbXqMLoPM1Mpzyrsjl1FTO-baAPjiKeCegJIlvuL04_6eHfLyCisGAtWBtfsAWp-YzTE6dYU0fDTQtUUuaXKJRwsN7uwmRLlfeY0gW2OlQMRKKacp_gTRDzU8YftSiFuEPQKn1wjMzUQnVqvHYKwvxK3ZoE3DzRQ0_E-Mli_rlyGA3_aeoLcQr0dDyfY-7z-ihW_vfJDD2UoUd7NByv-8F-wa619S5IZ_tfraeQAQvVH9_Sp7rfelM2stqrZLW58rqQL7IhjeMyDl3JbZ8pmJ7JrsZBY5QkPOC05fHRp5keyjgT-9X1usg4ZJFhMjCJhly-myvIk22yKr1hgE0%3D',
    'passport_auth_mix_state': 'gf0d1y95s4d4v9cl601scq3cjbvxy9hwo34tnnf2itqhwwok',
    'stream_player_status_params': '%22%7B%5C%22is_auto_play%5C%22%3A0%2C%5C%22is_full_screen%5C%22%3A0%2C%5C%22is_full_webscreen%5C%22%3A0%2C%5C%22is_mute%5C%22%3A1%2C%5C%22is_speed%5C%22%3A1%2C%5C%22is_visible%5C%22%3A1%7D%22',
    'IsDouyinActive': 'true',
}



class Spider(object):
    os_path = os.getcwd() + '/抖音搜索视频/'
    if not os.path.exists(os_path):
        os.mkdir(os_path)
    def __init__(self):
        self.url= 'https://www.douyin.com/aweme/v1/web/search/item/'
        self.headers= {
            'accept': 'application/json, text/plain, */*',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            'pragma': 'no-cache',
            'priority': 'u=1, i',
            'referer': 'https://www.douyin.com/root/search/%E5%BC%A0%E4%BC%9F?type=video',
            'sec-ch-ua': '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'cors',
            'sec-fetch-site': 'same-origin',
            'uifid': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0',
        }
        self.key = str(input('请输入您想采集的内容: '))


    def parse_start_url(self, page_number):


        params = (
            ('device_platform', 'webapp'),
            ('aid', '6383'),
            ('channel', 'channel_pc_web'),
            ('search_channel', 'aweme_video_web'),
            ('enable_history', '1'),
            ('keyword', self.key),
            ('search_source', 'normal_search'),
            ('query_correct_type', '1'),
            ('is_filter_search', '0'),
            ('from_group_id', ''),
            ('offset', page_number),
            ('count', '10'),
            ('need_filter_settings', '1'),
            ('list_type', 'multi'),
            ('update_version_code', '170400'),
            ('pc_client_type', '1'),
            ('pc_libra_divert', 'Windows'),
            ('support_h265', '1'),
            ('support_dash', '1'),
            ('version_code', '170400'),
            ('version_name', '17.4.0'),
            ('cookie_enabled', 'true'),
            ('screen_width', '1707'),
            ('screen_height', '960'),
            ('browser_language', 'zh-CN'),
            ('browser_platform', 'Win32'),
            ('browser_name', 'Edge'),
            ('browser_version', '135.0.0.0'),
            ('browser_online', 'true'),
            ('engine_name', 'Blink'),
            ('engine_version', '135.0.0.0'),
            ('os_name', 'Windows'),
            ('os_version', '10'),
            ('cpu_core_num', '32'),
            ('device_memory', '8'),
            ('platform', 'PC'),
            ('downlink', '10'),
            ('effective_type', '4g'),
            ('round_trip_time', '50'),
            ('webid', '7474844642175010344'),
            ('uifid',
             '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1'),)

        # ('msToken',
        #  'yJ3OCyQmLkA3aYgRBx4gEVXtArb0A68nWhwJneWw-OOjdFLqZGeNcovnGg9P8_fAqKJGiYCZ761Fl8QV84CMjju1_LSCbefoH_DcswS2-3B0cUcRc6uX5SJdzU5NlvT3w5T3DcMnBlttd9yzIlgvj7F76SmQ0kY20KxpALs7e8UVBrReUA=='),
        #     ('a_bogus',
        #      'mJsRgtWjxd/VPdFbmcTGt-3lrAgANP8yKpTdWCPT9NFEGwtcE8PIpxSXJxuPm7WR4mBkioNH4d-AafdcQTX0Z99kqmpkSwUSe42IVUfoMqiIbtwkLrfhSL8zqwBSUbwil/nWiIW5Us0EIEI5INAmApIGC5zLQmg2SqZCp2Y9JDSWps6TVn/1Ca26'),
        # )   (
        # 'msToken'和'a_bogus'动态变化,但是输入了一次,后面可以不管,没怎么做校验,我是直接给注释掉了。

        response = requests.get(self.url, headers=self.headers, params=params,cookies=cookies).json()
       # print(response)
        self.save_data(response)


    def save_data(self,response):
        for data in response['data']:
            name=data['aweme_info']['desc']
            if not name:
                name=data['aweme_info']['aweme_id']
            for char in '<>:"/\\|?*\n':
                name = name.replace(char, '_')
            mp4_url=data['aweme_info']['video']['play_addr']['url_list'][-1]
            print(name,mp4_url)
            self.save_video(name,mp4_url)

    def save_video(self,name,mp4_url):
        video_data = requests.get(mp4_url, headers=self.headers).content
        with open(self.os_path + name + '.mp4', 'wb') as f:
            f.write(video_data)
            print(f"视频 :{name} - - - 下载完成",'\n\n')


if __name__ == '__main__':
    spider = Spider()

page_number = 0
while True:
    spider.parse_start_url(page_number)
    page_number +=10
else:
    print(f'已下载{page_number}个视频')


##spider.parse_start_url(page_number)函数接口 page_number为视频个数,从0开始,每页+10,
# 便于中途报错重新爬取,直接  FOR 循环输入大于page_number即可,是10的整数倍,这就是全部爬取了。










QQ20250410-211217.png (1.65 MB, 下载次数: 15)

1

1

QQ20250410-210910.png (147.96 KB, 下载次数: 4)

QQ20250410-210910.png

QQ20250411-135210.png (316.64 KB, 下载次数: 5)

QQ20250411-135210.png

免费评分

参与人数 12吾爱币 +15 热心值 +11 收起 理由
AixCrimson + 1 + 1 热心回复!
pythonfun + 1 + 1 我很赞同!
congcong40 + 1 + 1 我很赞同!
小涛g + 1 我很赞同!
z3s + 1 + 1 我很赞同!
苏紫方璇 + 5 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
IcePlume + 1 + 1 我很赞同!
walykyy + 1 我很赞同!
为之奈何? + 1 + 1 我很赞同!
有时候感觉 + 1 + 1 谢谢@Thanks!
wari01 + 1 + 1 热心回复!
Kanchow + 1 + 1 谢谢@Thanks!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

推荐
wari01 发表于 2025-4-10 22:28
不知道有木有大神做好成品
推荐
zzZzzZ124213123 发表于 2025-5-14 10:57
zxhlh 发表于 2025-5-14 10:51
来个成品可以吗?感谢

[Python] 纯文本查看 复制代码
import requests
import  os
import time

def string_to_dict(cookie_string):
    cookie_dict = {}
    for cookie in cookie_string.split(';'):
        if '=' in cookie:
            key, value = cookie.strip().split('=', 1)
            cookie_dict[key] = value
    return cookie_dict
##请注意,务必使用自己电脑上搜索网页产生的cookies
cookie_str = 'UIFID_TEMP=9813fed978a3c771be2b666bdcfa929d1334ed722b7c12524b489df48e588a5c51cb117a18fa5735b4a61dbc57f41dfe5b73b2fdf0e28ff9931f9e523880ce37ed1447cd909e4869637c2adef01c068d; fpk1=U2FsdGVkX1+tFweCK8NMTa4xF84b75AAt0tPzCi5lS/09031IV8EDhEJ5AIx1SdEmiVT9pTMWza7XYZiLdbX6g==; fpk2=0a4ec5943b80707adbc82b48683207dd; UIFID=9813fed978a3c771be2b666bdcfa929d1334ed722b7c12524b489df48e588a5c25cfe601d9a654bcdf603de3057a5ca0e42c655d3730a355c58a5acf3e8567593342557d2bc1e7c4581fc67f55491c54407b25ecddf2ab6b67adf6cad934d3c8c4857a0e790b505bdeaacee778eca9e6cb649d7394bb142724e0e1b544f1b060e17a38a3ab2259409372d999225e429542bcb903565408618e8fc0d2c50c5500; hevc_supported=true; xgplayer_device_id=14451711088; xgplayer_user_id=277492132112; SEARCH_RESULT_LIST_TYPE=%22single%22; is_dash_user=1; passport_csrf_token=f5c2a1a860aca5b614d19eb832404b8d; passport_csrf_token_default=f5c2a1a860aca5b614d19eb832404b8d; __security_mc_1_s_sdk_crypt_sdk=2a3419f7-4a28-9bca; bd_ticket_guard_client_web_domain=2; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A1%7D; s_v_web_id=verify_ma1zdq2m_T58IKTlX_JSyp_4aiD_ASnw_P6zqSHe9CDaA; ttwid=1%7C9J-wbAEBpW1RbU_nrLYSAiTl9ShIyGjygHrlsF-rVio%7C1745939026%7Cc57869d5101670dce1fe34bc9fb4e3632d6a2160f0059810dd915b8f513ea2a6; passport_mfa_token=Cjd6%2Bwrz7RHQuLwuVbMpeJ0xd8Peerc1iMheAUOfq%2BzFgJ48xO4qDeQtMHigHPD3UGTVC31CZFAWGkoKPAAAAAAAAAAAAABO7%2BpfTPg%2BXOZXCB76%2B3eZlgH0CEI9Anla5F4rtgu9yMKnul4nGXGbIOj9KIN94n%2BNfBD4hvANGPax0WwgAiIBA%2Bd22eo%3D; d_ticket=1aefd782c7eac07793b6028676b2011c99105; passport_assist_user=CkGWKuT-Nt6Y-Hxc_VNFM33kHrke1s_-ftti6qnE4YbNimlcOw8DHxQiDSynPbIp7JYqOPXyYxazzBzyVSyiOhHkCBpKCjwqhVj9z6sweHl7zu4u_MmNZdUHBQ2wWH-IfdcGOCKl_eYMq9Wpf1IojNJ6H74pzawMkqw5emrPV2iMUJYQtobwDRiJr9ZUIAEiAQMNlH0J; n_mh=40vgplZPesxOieWeAud-o_mn_8l6Lg33PWrWzo10Rno; passport_auth_status=0a5f27eee16474ad6fe858963787654b%2C; passport_auth_status_ss=0a5f27eee16474ad6fe858963787654b%2C; sid_guard=d638a6c2f1639b58a0a7d53fa66ea9e6%7C1745939405%7C5184000%7CSat%2C+28-Jun-2025+15%3A10%3A05+GMT; uid_tt=f44a2e5049b95d3f0bd4ff608d9cafc7; uid_tt_ss=f44a2e5049b95d3f0bd4ff608d9cafc7; sid_tt=d638a6c2f1639b58a0a7d53fa66ea9e6; sessionid=d638a6c2f1639b58a0a7d53fa66ea9e6; sessionid_ss=d638a6c2f1639b58a0a7d53fa66ea9e6; is_staff_user=false; sid_ucp_v1=1.0.0-KDdlMDJhOTg2ZmI5NmM5MmQ1MmM3OTg2Mjk0ZTVmNDg2MGU1N2E4OGYKIQiuztCx2435BRDN18PABhjvMSAMMJPH7ZYGOAJA8QdIBBoCaGwiIGQ2MzhhNmMyZjE2MzliNThhMGE3ZDUzZmE2NmVhOWU2; ssid_ucp_v1=1.0.0-KDdlMDJhOTg2ZmI5NmM5MmQ1MmM3OTg2Mjk0ZTVmNDg2MGU1N2E4OGYKIQiuztCx2435BRDN18PABhjvMSAMMJPH7ZYGOAJA8QdIBBoCaGwiIGQ2MzhhNmMyZjE2MzliNThhMGE3ZDUzZmE2NmVhOWU2; login_time=1745939404074; SelfTabRedDotControl=%5B%5D; _bd_ticket_crypt_cookie=379b54ed005dab727dcab14c1d6d32ef; __security_mc_1_s_sdk_sign_data_key_web_protect=068970f3-4a9e-b45b; __security_mc_1_s_sdk_cert_key=0b34d9e3-493f-af14; __security_server_data_status=1; __ac_signature=_02B4Z6wo00f01pp45XgAAIDDiJWtRtq4KPqaWOHAAM6ZVwPvKnx.a-0hkPj74RO7nFTOOydWBG5KS0P5sIpHd9phrR7YhGXdyR.O-dC7dGbFzx7okwxoidJ.4ycY9RnoEp5h9uvJ.7lSKZne80; publish_badge_show_info=%220%2C0%2C0%2C1746606382125%22; download_guide=%223%2F20250508%2F0%22; csrf_session_id=7c0e798ff1522cdbef32df6e26638d9b; IsDouyinActive=true; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTFNyZEZkZ3JlbVdneS9lUk9BZkhoVEZ4a1FPSlhPT0szVnFGK29pSGs1d2RHYTNheThvK2lxSERKV2U3cHV0SnVKaXB4dnltWEpwcldTeGZrTy9sWVk9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D; home_can_add_dy_2_desktop=%221%22; passport_fe_beating_status=true; odin_tt=952beaa801083cb75ef1c2b2859fa58b2e870e9bc2c03e31ee2ac84a6fc6008e223f70f242b544ce07b1de9c0316b56e6cf7f31e9a6e940e4526dff5736a1b0a; __ac_nonce=068228ea3003830f08b71'
cookies = string_to_dict(cookie_str)
print(cookies)

class Spider(object):
    os_path = os.path.dirname(os.path.abspath(__file__)) + '/抖音搜索视频/'
    if not os.path.exists(os_path):
        os.mkdir(os_path)
    def __init__(self):
        self.url= 'https://www.douyin.com/aweme/v1/web/search/item/'
        self.headers= {
            'accept': 'application/json, text/plain, */*',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            'pragma': 'no-cache',
            'priority': 'u=1, i',
            'referer': 'https://www.douyin.com/root/search/%E5%BC%A0%E4%BC%9F?type=video',
            'sec-ch-ua': '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'cors',
            'sec-fetch-site': 'same-origin',
            'uifid': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0',
            'Cookie': f'{cookie_str}'
        }
        self.key = str(input('请输入您想采集的内容: '))
 
 
    def parse_start_url(self, page_number):
 
 
        params = (
            ('device_platform', 'webapp'),
            ('aid', '6383'),
            ('channel', 'channel_pc_web'),
            ('search_channel', 'aweme_video_web'),
            ('enable_history', '1'),
            ('keyword', self.key),
            ('search_source', 'normal_search'),
            ('query_correct_type', '1'),
            ('is_filter_search', '1'), #-----------------------------------
            ('sort_type', '1'),
            ('publish_time', '0'),
            ('from_group_id', ''),
            ('offset', page_number),
            ('count', '10'),
            ('need_filter_settings', '1'),
            ('list_type', 'multi'),
            ('update_version_code', '170400'),
            ('pc_client_type', '1'),
            ('pc_libra_divert', 'Windows'),
            ('support_h265', '1'),
            ('support_dash', '1'),
            ('version_code', '170400'),
            ('version_name', '17.4.0'),
            ('cookie_enabled', 'true'),
            ('screen_width', '1707'),
            ('screen_height', '960'),
            ('browser_language', 'zh-CN'),
            ('browser_platform', 'Win32'),
            ('browser_name', 'Edge'),
            ('browser_version', '135.0.0.0'),
            ('browser_online', 'true'),
            ('engine_name', 'Blink'),
            ('engine_version', '135.0.0.0'),
            ('os_name', 'Windows'),
            ('os_version', '10'),
            ('cpu_core_num', '32'),
            ('device_memory', '8'),
            ('platform', 'PC'),
            ('downlink', '10'),
            ('effective_type', '4g'),
            ('round_trip_time', '50'),
            ('webid', '7474844642175010344'),
            ('uifid',
             '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1'),#)

         ('msToken',
          'yJ3OCyQmLkA3aYgRBx4gEVXtArb0A68nWhwJneWw-OOjdFLqZGeNcovnGg9P8_fAqKJGiYCZ761Fl8QV84CMjju1_LSCbefoH_DcswS2-3B0cUcRc6uX5SJdzU5NlvT3w5T3DcMnBlttd9yzIlgvj7F76SmQ0kY20KxpALs7e8UVBrReUA=='),
             ('a_bogus',
              'mJsRgtWjxd/VPdFbmcTGt-3lrAgANP8yKpTdWCPT9NFEGwtcE8PIpxSXJxuPm7WR4mBkioNH4d-AafdcQTX0Z99kqmpkSwUSe42IVUfoMqiIbtwkLrfhSL8zqwBSUbwil/nWiIW5Us0EIEI5INAmApIGC5zLQmg2SqZCp2Y9JDSWps6TVn/1Ca26'),
         )   
        # 'msToken'和'a_bogus'动态变化,但是输入了一次,后面可以不管,没怎么做校验,我是直接给注释掉了。
 
        response = requests.get(self.url, headers=self.headers, params=params,cookies=cookies).json()
        # print(response)
        self.save_data(response)
 
 
    def save_data(self,response):
        for data in response['data']:
            name=data['aweme_info']['desc']
            if not name:
                name=data['aweme_info']['aweme_id']
            for char in '<>:"/\\|?*\n':
                name = name.replace(char, '_')
            mp4_url=data['aweme_info']['video']['play_addr']['url_list'][-1]
            print(name,mp4_url)
            self.save_video(name,mp4_url)
 
    def save_video(self,name,mp4_url):
        video_data = requests.get(mp4_url, headers=self.headers).content
        with open(self.os_path + name + '.mp4', 'wb') as f:
            f.write(video_data)
            #time.sleep(1)
            print(f"视频 :{name} - - - 下载完成",'\n\n')
 
 
if __name__ == '__main__':
    spider = Spider()
 
page_number = 60
while True:
    spider.parse_start_url(page_number)
    page_number +=10
else:
    print(f'已下载{page_number}个视频')
 
 
##spider.parse_start_url(page_number)函数接口 page_number为视频个数,从0开始,每页+10,
# 便于中途报错重新爬取,直接  FOR 循环输入大于page_number即可,是10的整数倍,这就是全部爬取了。
3#
Kanchow 发表于 2025-4-10 22:16
4#
caivi 发表于 2025-4-10 22:18
感谢分享,试下效果
5#
xiaoniaoyou325 发表于 2025-4-10 22:27
Kanchow 发表于 2025-4-10 22:16
可以用这个好工具下载学习资料了

真的可以吗?什么原理,任何平台都能用吗
6#
jtui6999 发表于 2025-4-10 23:45
感恩分享   
7#
havealook 发表于 2025-4-10 23:49
感谢分享!
8#
walykyy 发表于 2025-4-11 08:12
太强了,这个必须赞一个
9#
IcePlume 发表于 2025-4-11 08:17
狗头萝莉,有点意思
10#
xiaopeng928 发表于 2025-4-11 08:28
感谢分享学习交流
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - 52pojie.cn ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2026-5-16 14:36

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表