本帖最后由 TZ糖纸 于 2023-7-21 13:24 编辑
先上脚本
[JavaScript] 纯文本查看 复制代码 // ==UserScript==
// @name 抖音用户主页抓取
// @namespace http://tampermonkey.net/
// @version 0.1
// @description try to take over the world!
// @author You
// @match https://www.douyin.com/user/*
// @icon https://lf1-cdn-tos.bytegoofy.com/goofy/ies/douyin_web/public/favicon.ico
// @grant none
// ==/UserScript==
(function() {
'use strict';
var isFirst = true;
var aweme_list = [];
var nickname = "";
function extractDataFromScript() {
var scriptTag = document.getElementById('RENDER_DATA');
if (!scriptTag) return;
var encodedContent = scriptTag.innerHTML;
var decodedContent = decodeURIComponent(encodedContent);
var json = JSON.parse(decodedContent);
for (var prop in json) {
if (json.hasOwnProperty(prop) && prop !== "_location" && prop !== "app") {
var user = json[prop];
nickname = user.user.user.nickname;
var post = user.post;
var data = post.data;
aweme_list = aweme_list.concat(data);
}
}
}
function createButton() {
const button = document.createElement('button');
button.textContent = '点击我';
button.style.position = 'fixed';
button.style.right = '20px';
button.style.bottom = '30%';
button.addEventListener('click', buttonClick);
document.body.appendChild(button);
}
function buttonClick() {
console.log(aweme_list);
const files = [];
aweme_list.forEach((item) => {
if (item.aweme_type === 0 || item.awemeType === 0 || item.aweme_type === 61 || item.awemeType === 61) {
try {
files.push({ name: item.desc, url: item.video.play_addr.url_list[0] });
} catch (error) {
files.push({ name: item.desc, url: item.video.playAddr[0].src });
}
} else if (item.aweme_type === 68 || item.awemeType === 68) {
var urlList = item.images.map(img => {
try {
return img.url_list[0];
} catch (error) {
return img.urlList[0];
}
});
files.push({ name: item.desc, urlList: urlList });
}
});
var data = { nickname: nickname, aweme_list: files };
console.log(data);
}
function interceptResponse() {
var originalSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function() {
var self = this;
this.onreadystatechange = function() {
if (self.readyState === 4) {
if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
var json = JSON.parse(self.response);
var data = json.aweme_list;
aweme_list = aweme_list.concat(data);
}
}
};
originalSend.apply(this, arguments);
};
}
function scrollPageToBottom() {
const SCROLL_DELAY = 1000; // Adjust the delay between each scroll action (in milliseconds)
let scrollInterval;
function getScrollPosition() {
return window.scrollY || window.pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0;
}
function scrollToBottom() {
window.scrollTo(0, document.body.scrollHeight);
}
function hasReachedBottom() {
return getScrollPosition() >= (document.body.scrollHeight - window.innerHeight);
}
function scrollLoop() {
if (!hasReachedBottom()) {
scrollToBottom();
} else {
console.log("Reached the bottom of the page!");
clearInterval(scrollInterval);
// You can perform additional actions here after reaching the bottom of the page.
}
}
function startScrolling() {
scrollInterval = setInterval(scrollLoop, SCROLL_DELAY);
}
function createButton() {
const button = document.createElement('button');
button.textContent = '点击开始下拉';
button.style.position = 'fixed';
button.style.right = '20px';
button.style.bottom = '35%';
button.addEventListener('click', startScrolling);
document.body.appendChild(button);
}
createButton();
}
// To start scrolling, call the function:
scrollPageToBottom();
if (isFirst) {
console.log("首次加载");
isFirst = false;
setTimeout(function() {
extractDataFromScript();
createButton();
}, 5000); // 延迟时间为5000毫秒(即5秒)
}
interceptResponse();
})();
重写XMLHttpRequest的Send函数达到拦截
注意!是用户主页!!!
注意!是用户主页!!!
注意!是用户主页!!!
重要的事情说三遍
比如https://www.douyin.com/user/MS4wLjABAAAAd4IEE9JOezbMuKOhRFAEAwlN3D5qgBDvTjjqV2g5FHM?is_search=0&list_name=follow&nt=0
半甜梦.的主页 - 抖音 (douyin.com),进去的话点点赞哈,我看这个女生还挺好看的
页面5秒钟后会在页面右下角生成一个按钮
点击后会在控制台打印数据
一个是原始数据,一个是经过处理的数据(标题+作品地址)
对应的下载器:https://gitee.com/tjfzeishuai/DouyinDownloader
C# 自行编译
觉得帖子还不错点点赞~
|