吾爱破解 - LCG - LSG |安卓破解|病毒分析|www.52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 2784|回复: 12
收起左侧

[Python 原创] 使用py自带库_自己追更的小说下载脚本

  [复制链接]
onepc 发表于 2022-11-15 13:30
使用ini配置,支持接着下载,合适网上没有txt下载的刚发的小说并喜欢下载到手机看txt的这种场景
代码太差,请轻喷。

[Python] 纯文本查看 复制代码
#!/usr/bin/env python
#coding=utf8

import urllib.request,re,os,io,gzip,sys,configparser,time

RootDir = os.path.dirname(sys.argv[0])

TmpName = re.sub('\.exe$|\.py$','',os.path.basename(sys.argv[0]),flags=re.I)

ConfigFile = os.path.join(RootDir,'%s.ini' % TmpName )
XsDir = os.path.join(RootDir,'download')

XsLogFile = os.path.join(RootDir,'%s.log' % TmpName)

D_rule = {}
L_cfg =['config','regular']

headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',}



def ungzip(data):
  try:
    data=gzip.decompress(data)
  except:
    pass
  return data


#def geturldata(url,headers,code):
#  src=''
#  try:
#    request = urllib.request.Request(url=url,headers=headers)
#    response = urllib.request.urlopen(request)
#    src = ungzip(response.read()).decode(code)
#  except urllib.error.HTTPError:
#    print ('%s 网站无法打开.'%url)
#  except urllib.error.URLError:
#    print ('%s URL异常.'%url)
#  except:
#    print('异常无法访问 %s' % url)
#  #finally:
#  #  response.close()
#  return src





def writexs(con,xsrc,code,title,section):
  if section == 'bkneng':
    xsrc = xsrc.replace(' ',' ')#.replace(u'\xa0', u' ')
    xsrc = xsrc.replace('<br/>','').replace('<br />','').replace('<br>','').replace('<p>','\n').replace('</p>','\n').replace('\r','')
  else:
    xsrc = xsrc.replace(' ',' ')#.replace(u'\xa0', u' ')
    xsrc = xsrc.replace('<br/>','').replace('<br />','').replace('<br>','').replace('<p>','').replace('</p>','').replace('\r','')

  workxs=os.path.join(XsDir,title)
  if os.path.isfile(workxs):
    with open(workxs,'a',encoding=code) as f:
      f.write(con)
      f.write('\n')
      f.write(xsrc)
      f.write('\n')      
  else:
    with open(workxs,'w',encoding=code) as f:
      f.write(con)
      f.write('\n')      
      f.write(xsrc)
      f.write('\n')      
      



def getxs(link,cont,code,r_content,title,section):
  rq = urllib.request.Request(link,headers=headers)
  resp = urllib.request.urlopen(rq)
  contsrc = ungzip(resp.read()).decode(code)
  resp.close()
  #print (contsrc)
  #chk='Y'
  #contsrc=geturldata(link,headers,code)
  #if len(contsrc)==0:
  #  chk='N'
  contxs = re.findall(r_content,contsrc,re.S)
  if len(contxs)>0:
    writexs(cont,contxs[0],code,title,section)
  #return chk
  
def printmsg(a1,a2,a3,logflag):
  if logflag=='Y':
    currsj = time.strftime('[%Y-%m-%d %H:%M:%S]',time.localtime())
    #print (a1,a2,a3) 
    line = '%s %s %s %s\n' % (currsj,a1,a2,a3)
    with open(XsLogFile,'a',encoding='utf-8') as f:
      f.write(line)
    

def downxs(urllist,code,r_title,r_url,r_content,objcfg,section,logflag):
  for urlseq in urllist:
    desc = urlseq[0].strip()
    url = urlseq[1].split(',')[0].strip()
    seqnum = int(urlseq[1].split(',')[1].strip())
    #print (url)
    request = urllib.request.Request(url=url,headers=headers)
    response = urllib.request.urlopen(request)
    src = ungzip(response.read()).decode(code)
    response.close()
    #src=geturldata(url,headers,code)
    if len(src)==0:continue
    #time.sleep(1)
    shuname = re.findall(r_title,src)
    if len(shuname)>0:
      title = '%s.txt' % shuname[0]
    else:
      print ('%s 获取不了标题' % desc)
      continue
    xs=[]
    if '%s_tmpurl' % section in D_rule:
      xs_1 = re.findall(D_rule['%s_tmpurl' % section],src,re.S)
      if len(xs_1)==0:
        print ('%s 获取不了章节' % desc)
        continue
      xs = re.findall(r_url,xs_1[0])
    else:
      xs = re.findall(r_url,src)
    
    
    if len(xs)>0:
      xscount = len(xs)
      currsj = time.strftime('[%Y-%m-%d %H:%M:%S]',time.localtime())
      print ('%s %s 共%s章,正从第%s章开始下载...' % (currsj,shuname[0],str(xscount),str(seqnum+1) ))
      kk = 0
      #chkxs = 'Y'
      for i in xs:
        kk += 1
        if kk>seqnum:
          if section == 'biququ':
            printmsg (kk,'%s%s'%('https://www.biququ.com/',i[0]),i[1],logflag)
            getxs('%s%s'%('https://www.biququ.com/',i[0]),i[1],code,r_content,title,section)
          elif section == 'bkneng':
            printmsg (kk,'%s%s'%('https://wenxue.bkneng.com',i[0]),i[1],logflag)
            getxs('%s%s'%('https://wenxue.bkneng.com',i[0]),i[1],code,r_content,title,section)
          elif section == 'xibiquge':
            printmsg (kk,'%s%s'%('http://www.xibiquge.com',i[0]),i[1],logflag)
            getxs('%s%s'%('http://www.xibiquge.com',i[0]),i[1],code,r_content,title,section)
          else:
            printmsg (kk,i[0],i[1],logflag)
            getxs(i[0],i[1],code,r_content,title,section)
          #if chkxs =='N':
          #  continue
          objcfg.set(section, desc,'%s,%s' % (url,str(kk)))
          with open(ConfigFile, 'w',encoding='utf-8') as f:
            objcfg.write(f)
      currsj = time.strftime('[%Y-%m-%d %H:%M:%S]',time.localtime())      
      print ('%s %s 下载完成,共更新%s章!' % (currsj,shuname[0],str(kk-seqnum)))
      print ('-'*50)


if __name__ == '__main__':
  if not os.path.isfile(ConfigFile):
    print('%s 配置文件不存在.' % ConfigFile)
    sys.exit()
  
  if not os.path.exists(XsDir):
    os.mkdir(XsDir)
    
  
  objconf = configparser.ConfigParser()
  objconf.read(ConfigFile, encoding="utf8")
  l_rule = objconf.items("regular")
  
  logflag=objconf.get('config','logflag')
  
  
  for x in l_rule:
    D_rule[x[0]]=x[1]
      
  l_sections = objconf.sections()
  
  for x in l_sections:
    if x not in L_cfg:
      l_x = objconf.items(x)
      if len(l_x)>0 and '%s_title' % x in D_rule and '%s_url' % x in D_rule and '%s_content' % x in D_rule and '%s_code' % x in D_rule:
        #print(l_x)
        downxs(l_x,D_rule['%s_code' % x],D_rule['%s_title' % x],D_rule['%s_url' % x],D_rule['%s_content' % x],objconf,x,logflag)





ini配置文件:
[XML] 纯文本查看 复制代码
[config]
logflag = Y

[regular]
imayitxt_title = <h1 class="page-title ar_titled">(.*)?</h1>
imayitxt_url = <a href="(.*)" class="name">(.*)</a>
imayitxt_content = <div class="page-content " id="ChapterContents">(.*)</div>.*</div></div><div class="ft"><script>
imayitxt_code = utf-8
biququ_title = <dt>(.*)?全部章节</dt>
biququ_url = <dd><a href="(.*)">(.*)</a></dd>
biququ_content = <div id="content">\s*<div class="read_tj">.*?</div>(.*)<script>chaptererror\(\);</script>
biququ_code = utf-8
bkneng_title = <h2 class="left">(.*)?</h2>
bkneng_url = <a href="(.*)" title="(.*)">
bkneng_content = <div class="myContent" flag="1" style="position: relative">(.*?)</div>
bkneng_code = utf-8
xibiquge_title = <dt>《(.*)?》正文</dt>
xibiquge_tmpurl = 正文</dt>(.*)</dl>
xibiquge_url = <dd><a href="(.*)">(.*)</a></dd>
xibiquge_content = <div id="content">(.*?)</div>
xibiquge_code = gbk

[xibiquge]
西游:瞎眼五百年,弟子全是大妖 = http://www.xibiquge.com/33_33623/,189
深海余烬 = http://www.xibiquge.com/28_28931/,227
低调在修仙世界 = http://www.xibiquge.com/19_19514/,211


[imayitxt]
打工先知 = http://www.imayitxt.com/showclist/162715.html,292
开局失业,我让歌坛大魔王回归 = http://www.imayitxt.com/showclist/166684.html,261
我在异界肝经验 = http://www.imayitxt.com/showclist/161789.html,348
修炼从简化功法开始 = http://www.imayitxt.com/showclist/164116.html,334

免费评分

参与人数 1吾爱币 +7 热心值 +1 收起 理由
苏紫方璇 + 7 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!

查看全部评分

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

wangwlex1988 发表于 2022-11-15 14:53
学习着,有用
8359 发表于 2022-11-15 15:11
heimaoct 发表于 2022-11-15 15:56
qxlsl 发表于 2022-11-15 16:47
好东西,学到了
qianseshitou 发表于 2022-11-15 17:04
感谢无私分享
monoegod 发表于 2022-11-16 10:26
有点东西
qfxldhw 发表于 2022-11-16 13:10

有点东西
ERMU 发表于 2023-7-18 08:50
学习学习
lingwushexi 发表于 2023-7-18 08:59
先收藏慢慢学习
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则 警告:本版块禁止灌水或回复与主题无关内容,违者重罚!

快速回复 收藏帖子 返回列表 搜索

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-4-25 18:55

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表