吾爱破解 - LCG - LSG |安卓破解|病毒分析|破解软件|www.52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

搜索
查看: 651|回复: 22
上一主题 下一主题

[Python] 分享百度快排发包python源码核心文件

  [复制链接]
跳转到指定楼层
楼主
为人生而奋斗 发表于 2019-10-10 11:09 回帖奖励
  最近都在研究怎么做快排,分享下成果,可以一起学习交流!
  附上代码:
   
[Python] 纯文本查看 复制代码
# -*- coding: utf-8 -*-from selenium import webdriver
import time
import requests
import random
import os
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import traceback
import urllib.request
import pymysql
import socket
#import win32api #pip install pypiwin32

#from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
#DesiredCapabilities.INTERNETEXPLORER['ignoreProtectedModeSettings'] = True



#rasdial 宽带连接 19ab68----643534
def connect():
    cmd_str = "rasdial %s %s %s" % (g_adsl_account['name'], g_adsl_account['username'], g_adsl_account['password'])
    os.system(cmd_str)
    time.sleep(5)


#"rasdial 断开宽带连接 /disconnect"
def disconnect():
    cmd_str = "rasdial %s /disconnect" % g_adsl_account['name']
    os.system(cmd_str)
    time.sleep(5)
	
#获取ip地址	
def get_ip():
	#return ['ip','address']
	fp = urllib.request.urlopen("http://ip.chinaz.com/getip.aspx")
	mybytes = fp.read()
	# note that Python3 does not read the html code as string
	# but as html code bytearray, convert to string with
	mystr = mybytes.decode("utf8")
	fp.close()
	ip = mystr.find("ip")
	add = mystr.find("address")
	ip = mystr[ip+4:add-2]
	address = mystr[add+9:-2]
	return [ip,address]

#将ip地址插入数据库
def insert_db(ipdate):
	#try:
		#获取一个数据库连接,注意如果是UTF-8类型的,需要制定数据库
		conn=pymysql.connect(host='localhost',user='root',passwd='',port=3306,charset='utf8')
		cur=conn.cursor()                              #获取一个游标对象
		#cur.execute("CREATE DATABASE zongzong")          #执行对应的SQL语句
		#exit()
		cur.execute("USE zongzong")
		#exit()
		#cur.execute("CREATE TABLE `ip_log` (`id` int(11) NOT NULL AUTO_INCREMENT,`ip` varchar(32) DEFAULT NULL,`address` varchar(64) DEFAULT NULL,`keyword` varchar(64) DEFAULT '',`url` varchar(256) DEFAULT '',`error` varchar(64) DEFAULT '',`created_at` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP,PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=21 DEFAULT CHARSET=utf8;")
		
		#插入数据
		ISOTIMEFORMAT='%Y-%m-%d %X'
		ipdate.append( time.strftime( ISOTIMEFORMAT, time.localtime() ))
		cur.execute("INSERT INTO ip_log(ip,address,keyword,url,error,page,rank,created_at) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)",ipdate)
		
		#cur.execute("SELECT * FROM ip_log")
		#data=cur.fetchall()
		#print(data)
			
		cur.close()#关闭游标
		conn.commit()#向数据库中提交任何未解决的事务,对不支持事务的数据库不进行任何操作
		conn.close()#关闭到数据库的连接,释放数据库资源
	#except:
	#	print("发生异常")	


#获取搜素出来的url
def get_search_url(driver):
	urls = []
	real = []
	real_url = []
	click_link = []
	content = driver.find_element_by_css_selector("div[id=\"content_left\"]")
	links = content.find_elements_by_tag_name("a")
	for link in links:
		if link.get_attribute('class') == "c-showurl":
			real.append(link.text)
			url = link.get_attribute('href')
			urls.append(url)
			
			#解密url
			header = requests.head(url).headers
			is_append = True
			for out_url in out_urls:
				if out_url in header['location']:
					is_append = False
					break
					
			if is_append == True:
				real_url.append(header['location'])
				#a标签对象
				click_link.append(link)
					
	#print(real)
	#print(urls)
	#return urls
	return [real_url,click_link]
	
	
#function:解析加密url,剔除竞争对手的url
# def get_real_url(urls):
	# real_url = []
	# for url in urls:
		# header = requests.head(url).headers
		# is_append = True
		# for out_url in out_urls:
			# if out_url in header['location']:
				# is_append = False
				# break
			
		# if is_append == True:
			# real_url.append(header['location'])
	# return real_url

#function 目标地址是否在某个list中
def get_urlIndex(tagurl,urls):
	i = 0
	has = -1
	for url in urls:
		if tagurl in url:
			has = True
			return i
		i = i+1
	return has 

	
#点击百度搜索内容下面的下一页
def click_nextBtn(driver):
	div = driver.find_element_by_css_selector("div[id=\"page\"]")
	a = div.find_elements_by_tag_name("a")
	for item in a:
		print(item.text)
		if item.text == "下一页>":
			item.click()
	
	return driver

			


#随机点击
def click_search_url(driver,items):
	urls = []
	real = []
	content = driver.find_element_by_css_selector("div[id=\"content_left\"]")
	links = content.find_elements_by_tag_name("a")
	i=0
	'''获取当前窗口'''
	nowhandle = driver.current_window_handle
	#allhandles=driver.window_handles
	#for handle in allhandles:
	#	print('....当前窗口....',handle.title)
	#exit()
	
	for link in links:
		if link.get_attribute('class') == "c-showurl":
			if i in items:
				print("随机点击item:",i)
				print(link.get_attribute('href'),link.text)
				#exit()
				link.click()
				#停留在点击页面
				time.sleep(random.randint(5,10))
			
				'''获取所有窗口'''
				allhandles=driver.window_handles
				#for handle in allhandles:
				#	print('....当前窗口....',handle.title)
				#exit()
				
				'''循环判断窗口是否为当前窗口'''
				for handle in allhandles:
					if handle != nowhandle:
						print("切换到当前窗口")
						driver.switch_to_window(handle)
						print("title:",driver.title)
						'''关闭当前窗口'''
						driver.close()
						'''回到原先的窗口'''
						print("切换到原来的窗口")
						driver.switch_to_window(nowhandle)
						print("title:",driver.title)
				print("本次随机点击完毕!")
						
			i=i+1

			
#获取随机点击的搜索页random.randint(0
def get_random_index(index,len):
	if index >= 8:
		random_index = [
			random.randint(0,4),random.randint(5,8)
		]
	elif index>=4:
		random_index = [
			random.randint(0,3),random.randint(3,index)
		]
	elif index>=0:
		random_index = [
			index
		]
	elif index == -1:
		if len <=5:
			random_index = [
				random.randint(0,5)
			]
		else:
			random_index = [
				#random.randint(0,4),random.randint(5,len)
				random.randint(5,len)
			]
	return random_index



def getUA():
	uaList = [
		#360
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
		#chrome
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
		#"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
		"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
		
		#firefox
		#"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
		"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0",
		
		#ie11
		#"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
		#ie8 
		#"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; 4399Box.1357; 4399Box.1253; 4399Box.1357)",
		
		#2345王牌
		#"Chrome/39.0.2171.99 Safari/537.36 2345Explorer/6.5.0.11018",
		
		#搜狗
		#"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
		#opera
		"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60"
		
	]
	headers = random.choice(uaList)
	return headers

#屏幕浏览器窗口大小
def getWindowSize():
	wind_size = [
		[1920,1080],
		[1600,900],
		[1280,720]
	]
	headers = random.choice(wind_size)
	return headers
	
	
#屏幕分辨率设置
def setDisplay():
	display_size = [
		[1920,1080],
		[1680,1050],
		[1600,900],
		[1440,900],
		[1400,1050]
	]
	d_size = random.choice(display_size)
	
	dm = win32api.EnumDisplaySettings(None, 0)
	dm.PelsWidth = d_size[0]
	dm.PelsHeight = d_size[1]
	dm.BitsPerPel = 32
	dm.DisplayFixedOutput = 0
	win32api.ChangeDisplaySettings(dm, 0)

	
#拨号	19ab68----643534		
g_adsl_account = {
	"name":"宽带连接",
	"username":"19ab68",
	"password":"643534"
}


#屏蔽点击的地址(竞争对手)
out_urls = [
	'zhimo.yuanzhumuban.cc',
	'bbs.yuanzhumuban.cc',
        'http://money.163.com/15/0416/11/ANANRECC00253B0H.html'
]


##内页词
targetURL = [
    
        ['http://www.hkuws.com','注册离岸公司'],
	['zs.efu.com.cn/mornfeeit/','梦菲雪'],
	['zs.efu.com.cn/chengshijiaren/','城市佳人'],
	['www.kidsnet.cn/exposition','童装展会'],
	#['top.kidsnet.cn/','童装加盟排行榜'],
	#['www.nynet.com.cn/','内衣网'],
	#['www.nzw.cn/','女装网'],
	['zs.efu.com.cn/ks/','卡索'],
	['zs.efu.com.cn/distin-kidny/','迪斯廷凯'],
	['zs.efu.com.cn/fuzhuang/luyidigao/','路易迪高童装代{过}{滤}理'],
	['brand.efu.com.cn/brandshow-1221090.html','凯帝龙驰'],
	['zs.efu.com.cn/rabbitjero/','兔子杰罗'],
	['zs.efu.com.cn/wmprince/','西瓜王子'],
        ['zs.efu.com.cn/betu','百图'],
        ['zs.efu.com.cn/pepco/','小猪班纳'],


	#['http://news.ifeng.com/a/20160518/48795120_0.shtml','华夏信财'],
	['http://weibo.com/huaxiafinance','华夏信财'],
	['http://p2p.hexun.com/2016-04-26/183531215.html','华夏信财'],
	#['http://news.xinhuanet.com/fortune/2016-04/26/c_128932834.htm','华夏信财'],
	['http://www.xcf.cn/gdyw/201605/t20160526_772682.htm','华夏信财'],
	['http://www.huaxiaoxia.com/','华夏信财'],
        #['https://lc.huaxiafinance.com/','华夏信财'],



        ['so.tedu.cn','网络营销培训机构'],
        ['www.cosatto.net.cn','个性安全座椅'],
        ['www.kaihuata.com/','开化旅游'],
        #['www.kaihuata.com/','开化'],

]


for targetInfo in targetURL:
	try:
		#更换ip
		disconnect()
		connect()
		
		while(1):
                    try:
                        socket.gethostbyname("baidu.com")
                        break;
                    except:
                        disconnect()
                        connect()
		#更换分辨率
		#setDisplay()
		
		
		#启动浏览器
		#driver = webdriver.Ie()
		#driver = webdriver.Chrome()
		#driver = webdriver.Firefox()
		
		#设置PhantomJS的user_agent
		dcap = dict(DesiredCapabilities.PHANTOMJS)
		user_agent = getUA()
		print(user_agent)
		dcap["phantomjs.page.settings.userAgent"] = (
				user_agent
		)
		#dcap["phantomjs.page.settings.resourceTimeout"] = (15000)
		dcap["phantomjs.page.settings.loadImages"] = (False)
		driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=['--load-images=no'])
		
		
		# UA = getUA()
		# print(UA)
		# webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.User-Agent'] = UA
		# driver = webdriver.PhantomJS()
		
		driver.implicitly_wait(30)
		
		#清cookie
		driver.delete_all_cookies()

		#driver.maximize_window() # 浏览器全屏显示

		#打开百度
		driver.get("http://www.baidu.com/")
		#driver.get("http://mch.weiba01.com/2.php")
		
		#设置浏览器窗口大小
		window_size = getWindowSize()
		driver.set_window_size(window_size[0], window_size[1])
				
				
		#搜索某个关键词
		print('打开百度成功',driver.title)
		target = targetInfo[0]
		keyword = targetInfo[1]
		if len(targetInfo)>2:
				error_keyword = targetInfo[random.randint(2,len(targetInfo)-1)]       
		print(">>>>>>>>>>>>>>>点击的关键词:",keyword,"--->目标地址:",target,">>>>>>>>>>>>>>>>>>>>")
		
		
		if len(targetInfo)>2:
			#模拟错误关键词
			print("点击错误关键词:",error_keyword);
			driver.find_element_by_id("kw").send_keys(error_keyword)
			time.sleep(2)
			driver.find_element_by_id("su").click()
			time.sleep(5)
			driver.find_element_by_id("kw").clear()
			time.sleep(2)
			print("错误关键词点击完毕")
			
		driver.find_element_by_id("kw").send_keys(keyword)
		#time.sleep(2)

		#点击搜索按钮
		print("...开始点击搜索按钮..")
		driver.find_element_by_id("su").click()
		#exit()
		print("...点击完毕..")
		time.sleep(2)

		
		#获取搜索结果页 0:着陆页  1:对应的链接对象
		urls_res = get_search_url(driver)
		real_urls = urls_res[0]
		#get_search_url(driver)[1][2].click()
		
		
		#real_urls = get_real_url(urls)
		print("搜索出来的可点击着陆页个数:",len(real_urls))
		print(real_urls)
		index = get_urlIndex(target,real_urls)
		print("目标index:",index)

		page = 1
		while index == -1 and page <= 4:
			if page == 1:
				#点击前面的几个着陆页,模拟用户真实行为
				items = get_random_index(index,len(real_urls))
				#items = [4]
				print(items)
				click_search_url(driver,items)
			
			#下一页
			driver = click_nextBtn(driver)
			time.sleep(3)
			urls_res = get_search_url(driver)
			real_urls = urls_res[0]
			#real_urls = get_real_url(urls)
			print(real_urls)
			index = get_urlIndex(target,real_urls)
			
			page = page+1

			
			
		if index > 4 and page == 1:
			#第一页,随机点击两个或一个
			int = random.randint(1,2)
			if int == 2:
				items = get_random_index(index,len(real_urls))
			else:
				items = [1]
			print(items)
			click_search_url(driver,items)
			
		if page >=5:
			print("没有找到目标地址,放弃搜索...")
			print("关闭浏览器")
			driver.quit()
			
			time.sleep(5)
			data = get_ip()
			data.append(keyword)
			data.append(target)
			data.append("no_find")
			data.append(-1)
			data.append(-1)
			insert_db(data)
			continue
		
		print("目标在page",page,"当前排名:",index,real_urls[index])
		print("反问最后的目标页...")
		#driver.get(real_urls[index])
		urls_res[1][index].click()
		time.sleep(5)
		
		nowhandle = driver.current_window_handle
		allhandles = driver.window_handles
		#目标页和搜索栏目页切换下
		for handle in allhandles:
			if handle != nowhandle:
				print("切换到当前窗口")
				driver.switch_to_window(handle)
				stime = random.randint(15,25)
				#stime = 5;
				print("目标页title:",driver.title,"停留-->",stime)
				time.sleep(stime)
				'''关闭当前窗口'''
				driver.close()
				
				'''回到原先的窗口'''
				print("切换到原来的窗口")
				driver.switch_to_window(nowhandle)
				print("title:",driver.title)
		
		
		#time.sleep(random.randint(40,60))
		#time.sleep(5)

		#清除所有cookie
		print("打印cookie")
		cookie= driver.get_cookies()
		print(cookie)
		print("清除cookie")
		driver.delete_all_cookies()
		print("打印cookie:")
		cookie= driver.get_cookies()
		print(cookie)

		#关闭浏览器
		print("关闭浏览器")
		time.sleep(5)
		#driver.close()
		driver.quit()
		#time.sleep(5)
		
		#数据库记录运行信息
		data = get_ip()
		data.append(keyword)
		data.append(target)
		data.append("success")
		data.append(page)
		data.append(index)
		insert_db(data)
	
	except:
		data = get_ip()
		data.append(keyword)
		data.append(target)
		data.append("faild")
		data.append(-1)
		data.append(-1)
		insert_db(data)
	


   附上附件: python_百度快排脚本.zip (6.15 KB, 下载次数: 57)

免费评分

参与人数 3吾爱币 +5 热心值 +3 收起 理由
newbie2019 + 1 + 1 感谢发布原创作品,吾爱破解论坛因你更精彩!
苏紫方璇 + 3 + 1 感谢发布原创作品,吾爱破解论坛因你更精彩!
99910369 + 1 + 1 谢谢@Thanks!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

推荐
bachelor66 发表于 2019-10-10 12:27
没有ADSL怎么办?                              
推荐
 楼主| 为人生而奋斗 发表于 2019-10-10 11:21 <

对,还是模拟点击,只是一个脚本模拟人点击,用宽带拨号换IP点
4#
luopo 发表于 2019-10-10 11:12
感谢大佬,新手问一下这个有什么作用啊???:
5#
 楼主| 为人生而奋斗 发表于 2019-10-10 11:13 <
luopo 发表于 2019-10-10 11:12
感谢大佬,新手问一下这个有什么作用啊???:

百度快速排名用的
6#
obeina 发表于 2019-10-10 11:14
这是把宽带账号密码都贴出来了?
7#
 楼主| 为人生而奋斗 发表于 2019-10-10 11:15 <
obeina 发表于 2019-10-10 11:14
这是把宽带账号密码都贴出来了?

哈哈,忘记改了
8#
zooo 发表于 2019-10-10 11:20
ADSL  点击  
9#
lovnie 发表于 2019-10-10 11:26
这效果怎么样呢
10#
zooo 发表于 2019-10-10 11:26
ip 还是痛点, 点多了就会被过滤了。
11#
wslaoding 发表于 2019-10-10 11:36
666啊,老铁,代码学习下。
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则 警告:禁止回复与主题无关内容,违者重罚!

快速回复 收藏帖子 返回列表 搜索

RSS订阅|小黑屋|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2019-10-14 22:02

Powered by Discuz!

© 2001-2017 Comsenz Inc.

快速回复 返回顶部 返回列表