[Python] 纯文本查看 复制代码
import requests
import os
##请注意,务必使用自己电脑上搜索网页产生的cookies
cookies = {
'device_web_cpu_core': '32',
'device_web_memory_size': '8',
'architecture': 'amd64',
'csrf_session_id': 'ad3faf6a949237d5af255894374b89ff',
'webcast_leading_last_show_time': '1706274219677',
'webcast_leading_total_show_times': '1',
'webcast_local_quality': 'origin',
'passport_fe_beating_status': 'true',
'ttwid': '1%7Cj0AB1Pl_aVbAjGwE4P9_3H5KBw7ZA0yqZ-wsbk--sK0%7C1740372914%7Cd386f650da113d88512b6460bfa04f26f0c825f7a53abb7c264b3f2c2974215c',
'x-web-secsdk-uid': 'af66562d-005d-4f16-8e4a-271cf2c137f3',
'hevc_supported': 'true',
'fpk1': 'U2FsdGVkX19skYOCWrK0wT5e5n6BV8l93DWMmAaH5s98G0h9oYVdqy8gL8FY7f6M2B0NpCTlg/GdPMi86+ev7g==',
'fpk2': '9258db5fffd4f17a8703a19e760af505',
'odin_tt': '712ca6a9cffa424266aae08ebe4d32d1e6c0f535013ff0be84d2f1634a07b1cbc45f3cd7a22e3c7df641ef637068418fb780411c8aeffcfd47de0e943823d780f9fcabdb4129bbf4d935be3ac9a836ce',
'UIFID': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
'dy_swidth': '1707',
'dy_sheight': '960',
's_v_web_id': 'verify_m9a3id25_jv982Snw_4Egf_42Zu_9Osa_L79yf6GHm4Kv',
'is_dash_user': '1',
'volume_info': '%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Atrue%2C%22volume%22%3A0.201%7D',
'xgplayer_user_id': '818224755114',
'passport_csrf_token': '7ea9d1cb10eb7b782cfee69890eea146',
'passport_csrf_token_default': '7ea9d1cb10eb7b782cfee69890eea146',
'FORCE_LOGIN': '%7B%22videoConsumedRemainSeconds%22%3A180%7D',
'__security_mc_1_s_sdk_crypt_sdk': 'e581bcff-4b0c-821a',
'__security_mc_1_s_sdk_cert_key': 'd7b521c3-4836-8a84',
'__security_mc_1_s_sdk_sign_data_key_web_protect': '14766a15-43d8-acf4',
'bd_ticket_guard_client_web_domain': '2',
'SearchMultiColumnLandingAbVer': '1',
'SEARCH_RESULT_LIST_TYPE': '%22multi%22',
'xgplayer_device_id': '76706908427',
'stream_recommend_feed_params': '%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1707%2C%5C%22screen_height%5C%22%3A960%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A32%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A50%7D%22',
'strategyABtestKey': '%221744218661.635%22',
'SearchColumnSwitchLog': '%5B%7B%22date%22%3A%222025-04-09%22%2C%22latestColumnType%22%3A%22multi%22%7D%2C%7B%22date%22%3A%222025-04-10%22%2C%22latestColumnType%22%3A%22multi%22%7D%5D',
'download_guide': '%223%2F20250410%2F0%22',
'home_can_add_dy_2_desktop': '%221%22',
'biz_trace_id': '10a3bd37',
'bd_ticket_guard_client_data': 'eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTVJjNHh5RnVOdTFtR0JmV3Z3ZEhaZkp1MUVFVjVWdjJiWHBZNTZ2OHY4YzhJN2VBRkVlZ0VoK1NGY1I5MTFBNWpVcndWeExwcjVVeG5HbG11MUJIZ0E9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D',
'gulu_source_res': 'eyJwX2luIjoiOWVjNWI1NGQzNWVlMDQ1YzJlZjdlOGUwZDI0NmQ5NTE2MzVmZTQ5YWRiNTA1Njg3ZjFkM2MyMzJhOTc2MzEzOSJ9',
'xg_device_score': '7.8003162307251666',
'sdk_source_info': '7e276470716a68645a606960273f276364697660272927676c715a6d6069756077273f276364697660272927666d776a68605a607d71606b766c6a6b5a7666776c7571273f275e58272927666a6b766a69605a696c6061273f27636469766027292762696a6764695a7364776c6467696076273f275e5827292771273f2732313d313c30313d3731313234272927676c715a75776a716a666a69273f2763646976602778',
'bit_env': 'RZd5QHG5wBOkX44_tiN7wJ9iXIgwJevXDML38TE3gv7gyTJ5UTUCF_MfgTiqqplcoxoake1d3johbioUxEmlByKaBFBP0sgypqhkd81i9ZH8CddJoFJQatNaJAkEjbNwksTCAk4iTf444nVBPct_lPDuUU1cjwJY4cyEzNXk3xxpMcUzSIp3vlbVEMjTAttSkak9pZNq2sRSJJRgKkebRSDc8QbXqMLoPM1Mpzyrsjl1FTO-baAPjiKeCegJIlvuL04_6eHfLyCisGAtWBtfsAWp-YzTE6dYU0fDTQtUUuaXKJRwsN7uwmRLlfeY0gW2OlQMRKKacp_gTRDzU8YftSiFuEPQKn1wjMzUQnVqvHYKwvxK3ZoE3DzRQ0_E-Mli_rlyGA3_aeoLcQr0dDyfY-7z-ihW_vfJDD2UoUd7NByv-8F-wa619S5IZ_tfraeQAQvVH9_Sp7rfelM2stqrZLW58rqQL7IhjeMyDl3JbZ8pmJ7JrsZBY5QkPOC05fHRp5keyjgT-9X1usg4ZJFhMjCJhly-myvIk22yKr1hgE0%3D',
'passport_auth_mix_state': 'gf0d1y95s4d4v9cl601scq3cjbvxy9hwo34tnnf2itqhwwok',
'stream_player_status_params': '%22%7B%5C%22is_auto_play%5C%22%3A0%2C%5C%22is_full_screen%5C%22%3A0%2C%5C%22is_full_webscreen%5C%22%3A0%2C%5C%22is_mute%5C%22%3A1%2C%5C%22is_speed%5C%22%3A1%2C%5C%22is_visible%5C%22%3A1%7D%22',
'IsDouyinActive': 'true',
}
class Spider(object):
os_path = os.getcwd() + '/抖音搜索视频/'
if not os.path.exists(os_path):
os.mkdir(os_path)
def __init__(self):
self.url= 'https://www.douyin.com/aweme/v1/web/search/item/'
self.headers= {
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'https://www.douyin.com/root/search/%E5%BC%A0%E4%BC%9F?type=video',
'sec-ch-ua': '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'uifid': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0',
}
self.key = str(input('请输入您想采集的内容: '))
def parse_start_url(self, page_number):
params = (
('device_platform', 'webapp'),
('aid', '6383'),
('channel', 'channel_pc_web'),
('search_channel', 'aweme_video_web'),
('enable_history', '1'),
('keyword', self.key),
('search_source', 'normal_search'),
('query_correct_type', '1'),
('is_filter_search', '0'),
('from_group_id', ''),
('offset', page_number),
('count', '10'),
('need_filter_settings', '1'),
('list_type', 'multi'),
('update_version_code', '170400'),
('pc_client_type', '1'),
('pc_libra_divert', 'Windows'),
('support_h265', '1'),
('support_dash', '1'),
('version_code', '170400'),
('version_name', '17.4.0'),
('cookie_enabled', 'true'),
('screen_width', '1707'),
('screen_height', '960'),
('browser_language', 'zh-CN'),
('browser_platform', 'Win32'),
('browser_name', 'Edge'),
('browser_version', '135.0.0.0'),
('browser_online', 'true'),
('engine_name', 'Blink'),
('engine_version', '135.0.0.0'),
('os_name', 'Windows'),
('os_version', '10'),
('cpu_core_num', '32'),
('device_memory', '8'),
('platform', 'PC'),
('downlink', '10'),
('effective_type', '4g'),
('round_trip_time', '50'),
('webid', '7474844642175010344'),
('uifid',
'3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1'),)
# ('msToken',
# 'yJ3OCyQmLkA3aYgRBx4gEVXtArb0A68nWhwJneWw-OOjdFLqZGeNcovnGg9P8_fAqKJGiYCZ761Fl8QV84CMjju1_LSCbefoH_DcswS2-3B0cUcRc6uX5SJdzU5NlvT3w5T3DcMnBlttd9yzIlgvj7F76SmQ0kY20KxpALs7e8UVBrReUA=='),
# ('a_bogus',
# 'mJsRgtWjxd/VPdFbmcTGt-3lrAgANP8yKpTdWCPT9NFEGwtcE8PIpxSXJxuPm7WR4mBkioNH4d-AafdcQTX0Z99kqmpkSwUSe42IVUfoMqiIbtwkLrfhSL8zqwBSUbwil/nWiIW5Us0EIEI5INAmApIGC5zLQmg2SqZCp2Y9JDSWps6TVn/1Ca26'),
# ) (
# 'msToken'和'a_bogus'动态变化,但是输入了一次,后面可以不管,没怎么做校验,我是直接给注释掉了。
response = requests.get(self.url, headers=self.headers, params=params,cookies=cookies).json()
# print(response)
self.save_data(response)
def save_data(self,response):
for data in response['data']:
name=data['aweme_info']['desc']
if not name:
name=data['aweme_info']['aweme_id']
for char in '<>:"/\\|?*\n':
name = name.replace(char, '_')
mp4_url=data['aweme_info']['video']['play_addr']['url_list'][-1]
print(name,mp4_url)
self.save_video(name,mp4_url)
def save_video(self,name,mp4_url):
video_data = requests.get(mp4_url, headers=self.headers).content
with open(self.os_path + name + '.mp4', 'wb') as f:
f.write(video_data)
print(f"视频 :{name} - - - 下载完成",'\n\n')
if __name__ == '__main__':
spider = Spider()
page_number = 0
while True:
spider.parse_start_url(page_number)
page_number +=10
else:
print(f'已下载{page_number}个视频')
##spider.parse_start_url(page_number)函数接口 page_number为视频个数,从0开始,每页+10,
# 便于中途报错重新爬取,直接 FOR 循环输入大于page_number即可,是10的整数倍,这就是全部爬取了。