吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 2503|回复: 49
上一主题 下一主题
收起左侧

[Python 原创] 抖音搜索视频采集

[复制链接]
跳转到指定楼层
楼主
13955925361 发表于 2025-4-10 21:21 回帖奖励
本帖最后由 13955925361 于 2025-5-1 13:59 编辑

这次给大家分享抖音搜索数据采集,注意是在视频区,我的cookies如果过期了要换自己网页上的,
使用方法也是一如既往的简单,输入想搜索的关键词即可,获得全部视频并命名保存在文件夹内。


[Python] 纯文本查看 复制代码
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import requests
import  os
 
##请注意,务必使用自己电脑上搜索网页产生的cookies
cookies = {
    'device_web_cpu_core': '32',
    'device_web_memory_size': '8',
    'architecture': 'amd64',
    'csrf_session_id': 'ad3faf6a949237d5af255894374b89ff',
    'webcast_leading_last_show_time': '1706274219677',
    'webcast_leading_total_show_times': '1',
    'webcast_local_quality': 'origin',
    'passport_fe_beating_status': 'true',
    'ttwid': '1%7Cj0AB1Pl_aVbAjGwE4P9_3H5KBw7ZA0yqZ-wsbk--sK0%7C1740372914%7Cd386f650da113d88512b6460bfa04f26f0c825f7a53abb7c264b3f2c2974215c',
    'x-web-secsdk-uid': 'af66562d-005d-4f16-8e4a-271cf2c137f3',
    'hevc_supported': 'true',
    'fpk1': 'U2FsdGVkX19skYOCWrK0wT5e5n6BV8l93DWMmAaH5s98G0h9oYVdqy8gL8FY7f6M2B0NpCTlg/GdPMi86+ev7g==',
    'fpk2': '9258db5fffd4f17a8703a19e760af505',
    'odin_tt': '712ca6a9cffa424266aae08ebe4d32d1e6c0f535013ff0be84d2f1634a07b1cbc45f3cd7a22e3c7df641ef637068418fb780411c8aeffcfd47de0e943823d780f9fcabdb4129bbf4d935be3ac9a836ce',
    'UIFID': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
    'dy_swidth': '1707',
    'dy_sheight': '960',
    's_v_web_id': 'verify_m9a3id25_jv982Snw_4Egf_42Zu_9Osa_L79yf6GHm4Kv',
    'is_dash_user': '1',
    'volume_info': '%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Atrue%2C%22volume%22%3A0.201%7D',
    'xgplayer_user_id': '818224755114',
    'passport_csrf_token': '7ea9d1cb10eb7b782cfee69890eea146',
    'passport_csrf_token_default': '7ea9d1cb10eb7b782cfee69890eea146',
    'FORCE_LOGIN': '%7B%22videoConsumedRemainSeconds%22%3A180%7D',
    '__security_mc_1_s_sdk_crypt_sdk': 'e581bcff-4b0c-821a',
    '__security_mc_1_s_sdk_cert_key': 'd7b521c3-4836-8a84',
    '__security_mc_1_s_sdk_sign_data_key_web_protect': '14766a15-43d8-acf4',
    'bd_ticket_guard_client_web_domain': '2',
    'SearchMultiColumnLandingAbVer': '1',
    'SEARCH_RESULT_LIST_TYPE': '%22multi%22',
    'xgplayer_device_id': '76706908427',
    'stream_recommend_feed_params': '%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1707%2C%5C%22screen_height%5C%22%3A960%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A32%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A50%7D%22',
    'strategyABtestKey': '%221744218661.635%22',
    'SearchColumnSwitchLog': '%5B%7B%22date%22%3A%222025-04-09%22%2C%22latestColumnType%22%3A%22multi%22%7D%2C%7B%22date%22%3A%222025-04-10%22%2C%22latestColumnType%22%3A%22multi%22%7D%5D',
    'download_guide': '%223%2F20250410%2F0%22',
    'home_can_add_dy_2_desktop': '%221%22',
    'biz_trace_id': '10a3bd37',
    'bd_ticket_guard_client_data': 'eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTVJjNHh5RnVOdTFtR0JmV3Z3ZEhaZkp1MUVFVjVWdjJiWHBZNTZ2OHY4YzhJN2VBRkVlZ0VoK1NGY1I5MTFBNWpVcndWeExwcjVVeG5HbG11MUJIZ0E9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D',
    'gulu_source_res': 'eyJwX2luIjoiOWVjNWI1NGQzNWVlMDQ1YzJlZjdlOGUwZDI0NmQ5NTE2MzVmZTQ5YWRiNTA1Njg3ZjFkM2MyMzJhOTc2MzEzOSJ9',
    'xg_device_score': '7.8003162307251666',
    'sdk_source_info': '7e276470716a68645a606960273f276364697660272927676c715a6d6069756077273f276364697660272927666d776a68605a607d71606b766c6a6b5a7666776c7571273f275e58272927666a6b766a69605a696c6061273f27636469766027292762696a6764695a7364776c6467696076273f275e5827292771273f2732313d313c30313d3731313234272927676c715a75776a716a666a69273f2763646976602778',
    'bit_env': 'RZd5QHG5wBOkX44_tiN7wJ9iXIgwJevXDML38TE3gv7gyTJ5UTUCF_MfgTiqqplcoxoake1d3johbioUxEmlByKaBFBP0sgypqhkd81i9ZH8CddJoFJQatNaJAkEjbNwksTCAk4iTf444nVBPct_lPDuUU1cjwJY4cyEzNXk3xxpMcUzSIp3vlbVEMjTAttSkak9pZNq2sRSJJRgKkebRSDc8QbXqMLoPM1Mpzyrsjl1FTO-baAPjiKeCegJIlvuL04_6eHfLyCisGAtWBtfsAWp-YzTE6dYU0fDTQtUUuaXKJRwsN7uwmRLlfeY0gW2OlQMRKKacp_gTRDzU8YftSiFuEPQKn1wjMzUQnVqvHYKwvxK3ZoE3DzRQ0_E-Mli_rlyGA3_aeoLcQr0dDyfY-7z-ihW_vfJDD2UoUd7NByv-8F-wa619S5IZ_tfraeQAQvVH9_Sp7rfelM2stqrZLW58rqQL7IhjeMyDl3JbZ8pmJ7JrsZBY5QkPOC05fHRp5keyjgT-9X1usg4ZJFhMjCJhly-myvIk22yKr1hgE0%3D',
    'passport_auth_mix_state': 'gf0d1y95s4d4v9cl601scq3cjbvxy9hwo34tnnf2itqhwwok',
    'stream_player_status_params': '%22%7B%5C%22is_auto_play%5C%22%3A0%2C%5C%22is_full_screen%5C%22%3A0%2C%5C%22is_full_webscreen%5C%22%3A0%2C%5C%22is_mute%5C%22%3A1%2C%5C%22is_speed%5C%22%3A1%2C%5C%22is_visible%5C%22%3A1%7D%22',
    'IsDouyinActive': 'true',
}
 
 
 
class Spider(object):
    os_path = os.getcwd() + '/抖音搜索视频/'
    if not os.path.exists(os_path):
        os.mkdir(os_path)
    def __init__(self):
        self.url= 'https://www.douyin.com/aweme/v1/web/search/item/'
        self.headers= {
            'accept': 'application/json, text/plain, */*',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            'pragma': 'no-cache',
            'priority': 'u=1, i',
            'referer': 'https://www.douyin.com/root/search/%E5%BC%A0%E4%BC%9F?type=video',
            'sec-ch-ua': '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'cors',
            'sec-fetch-site': 'same-origin',
            'uifid': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0',
        }
        self.key = str(input('请输入您想采集的内容: '))
 
 
    def parse_start_url(self, page_number):
 
 
        params = (
            ('device_platform', 'webapp'),
            ('aid', '6383'),
            ('channel', 'channel_pc_web'),
            ('search_channel', 'aweme_video_web'),
            ('enable_history', '1'),
            ('keyword', self.key),
            ('search_source', 'normal_search'),
            ('query_correct_type', '1'),
            ('is_filter_search', '0'),
            ('from_group_id', ''),
            ('offset', page_number),
            ('count', '10'),
            ('need_filter_settings', '1'),
            ('list_type', 'multi'),
            ('update_version_code', '170400'),
            ('pc_client_type', '1'),
            ('pc_libra_divert', 'Windows'),
            ('support_h265', '1'),
            ('support_dash', '1'),
            ('version_code', '170400'),
            ('version_name', '17.4.0'),
            ('cookie_enabled', 'true'),
            ('screen_width', '1707'),
            ('screen_height', '960'),
            ('browser_language', 'zh-CN'),
            ('browser_platform', 'Win32'),
            ('browser_name', 'Edge'),
            ('browser_version', '135.0.0.0'),
            ('browser_online', 'true'),
            ('engine_name', 'Blink'),
            ('engine_version', '135.0.0.0'),
            ('os_name', 'Windows'),
            ('os_version', '10'),
            ('cpu_core_num', '32'),
            ('device_memory', '8'),
            ('platform', 'PC'),
            ('downlink', '10'),
            ('effective_type', '4g'),
            ('round_trip_time', '50'),
            ('webid', '7474844642175010344'),
            ('uifid',
             '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1'),)
 
        # ('msToken',
        #  'yJ3OCyQmLkA3aYgRBx4gEVXtArb0A68nWhwJneWw-OOjdFLqZGeNcovnGg9P8_fAqKJGiYCZ761Fl8QV84CMjju1_LSCbefoH_DcswS2-3B0cUcRc6uX5SJdzU5NlvT3w5T3DcMnBlttd9yzIlgvj7F76SmQ0kY20KxpALs7e8UVBrReUA=='),
        #     ('a_bogus',
        #      'mJsRgtWjxd/VPdFbmcTGt-3lrAgANP8yKpTdWCPT9NFEGwtcE8PIpxSXJxuPm7WR4mBkioNH4d-AafdcQTX0Z99kqmpkSwUSe42IVUfoMqiIbtwkLrfhSL8zqwBSUbwil/nWiIW5Us0EIEI5INAmApIGC5zLQmg2SqZCp2Y9JDSWps6TVn/1Ca26'),
        # )   (
        # 'msToken'和'a_bogus'动态变化,但是输入了一次,后面可以不管,没怎么做校验,我是直接给注释掉了。
 
        response = requests.get(self.url, headers=self.headers, params=params,cookies=cookies).json()
       # print(response)
        self.save_data(response)
 
 
    def save_data(self,response):
        for data in response['data']:
            name=data['aweme_info']['desc']
            if not name:
                name=data['aweme_info']['aweme_id']
            for char in '<>:"/\\|?*\n':
                name = name.replace(char, '_')
            mp4_url=data['aweme_info']['video']['play_addr']['url_list'][-1]
            print(name,mp4_url)
            self.save_video(name,mp4_url)
 
    def save_video(self,name,mp4_url):
        video_data = requests.get(mp4_url, headers=self.headers).content
        with open(self.os_path + name + '.mp4', 'wb') as f:
            f.write(video_data)
            print(f"视频 :{name} - - - 下载完成",'\n\n')
 
 
if __name__ == '__main__':
    spider = Spider()
 
page_number = 0
while True:
    spider.parse_start_url(page_number)
    page_number +=10
else:
    print(f'已下载{page_number}个视频')
 
 
##spider.parse_start_url(page_number)函数接口 page_number为视频个数,从0开始,每页+10,
# 便于中途报错重新爬取,直接  FOR 循环输入大于page_number即可,是10的整数倍,这就是全部爬取了。






QQ20250410-211217.png (1.65 MB, 下载次数: 5)

1

1

QQ20250410-210910.png (147.96 KB, 下载次数: 0)

QQ20250410-210910.png

QQ20250411-135210.png (316.64 KB, 下载次数: 1)

QQ20250411-135210.png

免费评分

参与人数 10吾爱币 +13 热心值 +9 收起 理由
congcong40 + 1 + 1 我很赞同!
小涛g + 1 我很赞同!
z3s + 1 + 1 我很赞同!
苏紫方璇 + 5 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
IcePlume + 1 + 1 我很赞同!
walykyy + 1 我很赞同!
为之奈何? + 1 + 1 我很赞同!
有时候感觉 + 1 + 1 谢谢@Thanks!
wari01 + 1 + 1 热心回复!
Kanchow + 1 + 1 谢谢@Thanks!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

推荐
wari01 发表于 2025-4-10 22:28
不知道有木有大神做好成品
推荐
zzZzzZ124213123 发表于 2025-5-14 10:57
zxhlh 发表于 2025-5-14 10:51
来个成品可以吗?感谢

[Python] 纯文本查看 复制代码
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import requests
import  os
import time
 
def string_to_dict(cookie_string):
    cookie_dict = {}
    for cookie in cookie_string.split(';'):
        if '=' in cookie:
            key, value = cookie.strip().split('=', 1)
            cookie_dict[key] = value
    return cookie_dict
##请注意,务必使用自己电脑上搜索网页产生的cookies
cookie_str = 'UIFID_TEMP=9813fed978a3c771be2b666bdcfa929d1334ed722b7c12524b489df48e588a5c51cb117a18fa5735b4a61dbc57f41dfe5b73b2fdf0e28ff9931f9e523880ce37ed1447cd909e4869637c2adef01c068d; fpk1=U2FsdGVkX1+tFweCK8NMTa4xF84b75AAt0tPzCi5lS/09031IV8EDhEJ5AIx1SdEmiVT9pTMWza7XYZiLdbX6g==; fpk2=0a4ec5943b80707adbc82b48683207dd; UIFID=9813fed978a3c771be2b666bdcfa929d1334ed722b7c12524b489df48e588a5c25cfe601d9a654bcdf603de3057a5ca0e42c655d3730a355c58a5acf3e8567593342557d2bc1e7c4581fc67f55491c54407b25ecddf2ab6b67adf6cad934d3c8c4857a0e790b505bdeaacee778eca9e6cb649d7394bb142724e0e1b544f1b060e17a38a3ab2259409372d999225e429542bcb903565408618e8fc0d2c50c5500; hevc_supported=true; xgplayer_device_id=14451711088; xgplayer_user_id=277492132112; SEARCH_RESULT_LIST_TYPE=%22single%22; is_dash_user=1; passport_csrf_token=f5c2a1a860aca5b614d19eb832404b8d; passport_csrf_token_default=f5c2a1a860aca5b614d19eb832404b8d; __security_mc_1_s_sdk_crypt_sdk=2a3419f7-4a28-9bca; bd_ticket_guard_client_web_domain=2; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A1%7D; s_v_web_id=verify_ma1zdq2m_T58IKTlX_JSyp_4aiD_ASnw_P6zqSHe9CDaA; ttwid=1%7C9J-wbAEBpW1RbU_nrLYSAiTl9ShIyGjygHrlsF-rVio%7C1745939026%7Cc57869d5101670dce1fe34bc9fb4e3632d6a2160f0059810dd915b8f513ea2a6; passport_mfa_token=Cjd6%2Bwrz7RHQuLwuVbMpeJ0xd8Peerc1iMheAUOfq%2BzFgJ48xO4qDeQtMHigHPD3UGTVC31CZFAWGkoKPAAAAAAAAAAAAABO7%2BpfTPg%2BXOZXCB76%2B3eZlgH0CEI9Anla5F4rtgu9yMKnul4nGXGbIOj9KIN94n%2BNfBD4hvANGPax0WwgAiIBA%2Bd22eo%3D; d_ticket=1aefd782c7eac07793b6028676b2011c99105; passport_assist_user=CkGWKuT-Nt6Y-Hxc_VNFM33kHrke1s_-ftti6qnE4YbNimlcOw8DHxQiDSynPbIp7JYqOPXyYxazzBzyVSyiOhHkCBpKCjwqhVj9z6sweHl7zu4u_MmNZdUHBQ2wWH-IfdcGOCKl_eYMq9Wpf1IojNJ6H74pzawMkqw5emrPV2iMUJYQtobwDRiJr9ZUIAEiAQMNlH0J; n_mh=40vgplZPesxOieWeAud-o_mn_8l6Lg33PWrWzo10Rno; passport_auth_status=0a5f27eee16474ad6fe858963787654b%2C; passport_auth_status_ss=0a5f27eee16474ad6fe858963787654b%2C; sid_guard=d638a6c2f1639b58a0a7d53fa66ea9e6%7C1745939405%7C5184000%7CSat%2C+28-Jun-2025+15%3A10%3A05+GMT; uid_tt=f44a2e5049b95d3f0bd4ff608d9cafc7; uid_tt_ss=f44a2e5049b95d3f0bd4ff608d9cafc7; sid_tt=d638a6c2f1639b58a0a7d53fa66ea9e6; sessionid=d638a6c2f1639b58a0a7d53fa66ea9e6; sessionid_ss=d638a6c2f1639b58a0a7d53fa66ea9e6; is_staff_user=false; sid_ucp_v1=1.0.0-KDdlMDJhOTg2ZmI5NmM5MmQ1MmM3OTg2Mjk0ZTVmNDg2MGU1N2E4OGYKIQiuztCx2435BRDN18PABhjvMSAMMJPH7ZYGOAJA8QdIBBoCaGwiIGQ2MzhhNmMyZjE2MzliNThhMGE3ZDUzZmE2NmVhOWU2; ssid_ucp_v1=1.0.0-KDdlMDJhOTg2ZmI5NmM5MmQ1MmM3OTg2Mjk0ZTVmNDg2MGU1N2E4OGYKIQiuztCx2435BRDN18PABhjvMSAMMJPH7ZYGOAJA8QdIBBoCaGwiIGQ2MzhhNmMyZjE2MzliNThhMGE3ZDUzZmE2NmVhOWU2; login_time=1745939404074; SelfTabRedDotControl=%5B%5D; _bd_ticket_crypt_cookie=379b54ed005dab727dcab14c1d6d32ef; __security_mc_1_s_sdk_sign_data_key_web_protect=068970f3-4a9e-b45b; __security_mc_1_s_sdk_cert_key=0b34d9e3-493f-af14; __security_server_data_status=1; __ac_signature=_02B4Z6wo00f01pp45XgAAIDDiJWtRtq4KPqaWOHAAM6ZVwPvKnx.a-0hkPj74RO7nFTOOydWBG5KS0P5sIpHd9phrR7YhGXdyR.O-dC7dGbFzx7okwxoidJ.4ycY9RnoEp5h9uvJ.7lSKZne80; publish_badge_show_info=%220%2C0%2C0%2C1746606382125%22; download_guide=%223%2F20250508%2F0%22; csrf_session_id=7c0e798ff1522cdbef32df6e26638d9b; IsDouyinActive=true; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTFNyZEZkZ3JlbVdneS9lUk9BZkhoVEZ4a1FPSlhPT0szVnFGK29pSGs1d2RHYTNheThvK2lxSERKV2U3cHV0SnVKaXB4dnltWEpwcldTeGZrTy9sWVk9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D; home_can_add_dy_2_desktop=%221%22; passport_fe_beating_status=true; odin_tt=952beaa801083cb75ef1c2b2859fa58b2e870e9bc2c03e31ee2ac84a6fc6008e223f70f242b544ce07b1de9c0316b56e6cf7f31e9a6e940e4526dff5736a1b0a; __ac_nonce=068228ea3003830f08b71'
cookies = string_to_dict(cookie_str)
print(cookies)
 
class Spider(object):
    os_path = os.path.dirname(os.path.abspath(__file__)) + '/抖音搜索视频/'
    if not os.path.exists(os_path):
        os.mkdir(os_path)
    def __init__(self):
        self.url= 'https://www.douyin.com/aweme/v1/web/search/item/'
        self.headers= {
            'accept': 'application/json, text/plain, */*',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            'pragma': 'no-cache',
            'priority': 'u=1, i',
            'referer': 'https://www.douyin.com/root/search/%E5%BC%A0%E4%BC%9F?type=video',
            'sec-ch-ua': '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'cors',
            'sec-fetch-site': 'same-origin',
            'uifid': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0',
            'Cookie': f'{cookie_str}'
        }
        self.key = str(input('请输入您想采集的内容: '))
  
  
    def parse_start_url(self, page_number):
  
  
        params = (
            ('device_platform', 'webapp'),
            ('aid', '6383'),
            ('channel', 'channel_pc_web'),
            ('search_channel', 'aweme_video_web'),
            ('enable_history', '1'),
            ('keyword', self.key),
            ('search_source', 'normal_search'),
            ('query_correct_type', '1'),
            ('is_filter_search', '1'), #-----------------------------------
            ('sort_type', '1'),
            ('publish_time', '0'),
            ('from_group_id', ''),
            ('offset', page_number),
            ('count', '10'),
            ('need_filter_settings', '1'),
            ('list_type', 'multi'),
            ('update_version_code', '170400'),
            ('pc_client_type', '1'),
            ('pc_libra_divert', 'Windows'),
            ('support_h265', '1'),
            ('support_dash', '1'),
            ('version_code', '170400'),
            ('version_name', '17.4.0'),
            ('cookie_enabled', 'true'),
            ('screen_width', '1707'),
            ('screen_height', '960'),
            ('browser_language', 'zh-CN'),
            ('browser_platform', 'Win32'),
            ('browser_name', 'Edge'),
            ('browser_version', '135.0.0.0'),
            ('browser_online', 'true'),
            ('engine_name', 'Blink'),
            ('engine_version', '135.0.0.0'),
            ('os_name', 'Windows'),
            ('os_version', '10'),
            ('cpu_core_num', '32'),
            ('device_memory', '8'),
            ('platform', 'PC'),
            ('downlink', '10'),
            ('effective_type', '4g'),
            ('round_trip_time', '50'),
            ('webid', '7474844642175010344'),
            ('uifid',
             '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1'),#)
 
         ('msToken',
          'yJ3OCyQmLkA3aYgRBx4gEVXtArb0A68nWhwJneWw-OOjdFLqZGeNcovnGg9P8_fAqKJGiYCZ761Fl8QV84CMjju1_LSCbefoH_DcswS2-3B0cUcRc6uX5SJdzU5NlvT3w5T3DcMnBlttd9yzIlgvj7F76SmQ0kY20KxpALs7e8UVBrReUA=='),
             ('a_bogus',
              'mJsRgtWjxd/VPdFbmcTGt-3lrAgANP8yKpTdWCPT9NFEGwtcE8PIpxSXJxuPm7WR4mBkioNH4d-AafdcQTX0Z99kqmpkSwUSe42IVUfoMqiIbtwkLrfhSL8zqwBSUbwil/nWiIW5Us0EIEI5INAmApIGC5zLQmg2SqZCp2Y9JDSWps6TVn/1Ca26'),
         )  
        # 'msToken'和'a_bogus'动态变化,但是输入了一次,后面可以不管,没怎么做校验,我是直接给注释掉了。
  
        response = requests.get(self.url, headers=self.headers, params=params,cookies=cookies).json()
        # print(response)
        self.save_data(response)
  
  
    def save_data(self,response):
        for data in response['data']:
            name=data['aweme_info']['desc']
            if not name:
                name=data['aweme_info']['aweme_id']
            for char in '<>:"/\\|?*\n':
                name = name.replace(char, '_')
            mp4_url=data['aweme_info']['video']['play_addr']['url_list'][-1]
            print(name,mp4_url)
            self.save_video(name,mp4_url)
  
    def save_video(self,name,mp4_url):
        video_data = requests.get(mp4_url, headers=self.headers).content
        with open(self.os_path + name + '.mp4', 'wb') as f:
            f.write(video_data)
            #time.sleep(1)
            print(f"视频 :{name} - - - 下载完成",'\n\n')
  
  
if __name__ == '__main__':
    spider = Spider()
  
page_number = 60
while True:
    spider.parse_start_url(page_number)
    page_number +=10
else:
    print(f'已下载{page_number}个视频')
  
  
##spider.parse_start_url(page_number)函数接口 page_number为视频个数,从0开始,每页+10,
# 便于中途报错重新爬取,直接  FOR 循环输入大于page_number即可,是10的整数倍,这就是全部爬取了。
3#
Kanchow 发表于 2025-4-10 22:16
4#
caivi 发表于 2025-4-10 22:18
感谢分享,试下效果
5#
xiaoniaoyou325 发表于 2025-4-10 22:27
Kanchow 发表于 2025-4-10 22:16
可以用这个好工具下载学习资料了

真的可以吗?什么原理,任何平台都能用吗
6#
jtui6999 发表于 2025-4-10 23:45
感恩分享   
7#
havealook 发表于 2025-4-10 23:49
感谢分享!
8#
hqyt888 发表于 2025-4-11 00:02
大佬牛逼..
9#
xyt210819 发表于 2025-4-11 00:44
感谢大佬分享!
10#
zjtzjt 发表于 2025-4-11 06:30
感谢分享,搜搜好看的小姐姐
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2025-5-18 03:02

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表