本帖最后由 Webrobot 于 2022-5-19 11:06 编辑
最近自己需要用的小东西, 需要的自取
不一定用于检查外链, 自己发挥用途
成品没有添加代{过}{滤}理, 需要使用代{过}{滤}理的, 自行修改代码
下载:https://wwd.lanzoul.com/ir9bu0533a5c
密码:3o8d
演示图:
代码:
#-*-coding:UTF-8-*-
import os
import sys
import time
import requests
from tqdm import tqdm
from multiprocessing.dummy import Pool
# 链接检查函数
def link_check(url):
# 请求UA头设置
user_agent="Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
headers={"User-Agent":user_agent}
# 设置代{过}{滤}理
proxies = {
'http': 'socks5://127.0.0.1:10808',
'https': 'socks5://127.0.0.1:10808'
}
try:
code = requests.get(url,headers=headers,proxies=proxies)
except:
file.write(str(url) + "连接超时,请检查" + "\n")
else:
if code.status_code == 200:
html = code.text
if keywords not in html:
file.write("域名: " + str(url) + " 外链丢失" + "\n")
# 创建列表以及日志文件
domain_file = open(os.path.dirname(os.path.realpath(sys.argv[0])) + '\domain_list.txt','a',encoding = 'UTF-8')
file = open(os.path.dirname(os.path.realpath(sys.argv[0])) + '\log.txt','a',encoding = 'UTF-8')
if __name__ == '__main__':
print("""
_ _ _ ____ _ _
| | (_)_ __ | | __ / ___| |__ ___ ___| | _____ _ __
| | | | '_ \| |/ / | | | '_ \ / _ \/ __| |/ / _ \ '__|
| |___| | | | | < | |___| | | | __/ (__| < __/ |
|_____|_|_| |_|_|\_\ \____|_| |_|\___|\___|_|\_\___|_|
Code By Webrobot
""")
print("温馨提示:请确认你已经在domain_list.txt输入你需要检查的网址!\n")
# 请输入你要查找的关键字
keywords = input("请输入你要查找的关键字:")
file.write("-----------------开始检查-----------------\n")
# 读取网站列表
domain_path = os.path.dirname(os.path.realpath(sys.argv[0])) + '\domain_list.txt'
with open(domain_path,'r',encoding = 'UTF-8') as f:
all_urls = f.readlines()
# 将读取到的网站分别存入列表
all_urls = [c.strip() for c in all_urls]
# 列表长度
all_urls_len = len(all_urls)
# 设置多线程执行检查
with Pool(10) as p:
# 多线程tqdm进度条输出
progress = list(tqdm(p.imap(link_check, all_urls), total=all_urls_len))
# 输出结束时间
file.write("------------" + time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + "-----------\n")
print("\n检查完毕,请结束本程序查看log.txt")
os.system("pause")
|