from
lxml
import
etree
import
urllib.request
import
re
import
os
import
smtplib
from
email.header
import
Header
from
email.mime.text
import
MIMEText
SMTP_host
=
"smtp.163.com"
mail_user
=
""
from_passwd
=
""
from_account
=
''
toAccoutList
=
''
def
getHtmlCode(url):
req_one
=
urllib.request.Request(url)
req_one.add_header(
'User-Agent'
,
'Mozilla/6.0'
)
try
:
res_one
=
urllib.request.urlopen(req_one,timeout
=
1
)
htmlcode
=
res_one.read().decode(
'utf-8'
)
res_one.close()
return
htmlcode
except
:
print
(
"报错IP问题无法获得"
)
return
"error"
def
getInfoByXPath(code):
html
=
etree.HTML(code)
name
=
html.xpath(
'/html/body/div[10]/div[1]/div[2]/div[4]/div/div/h3/a'
)
price
=
html.xpath(
'/html/body/div[10]/div[1]/div[2]/div[4]/div[2]/div[1]/h3/b'
)
print
(
'通过XPath获取的最新的需求是:'
)
print
(name[
0
].text.replace(
' '
,'
').replace('
','
'))
print
(name[
0
].get(
'href'
))
print
(name[
0
].get(
'indus_name'
))
print
(price[
0
].text
+
'\r\n\r\n\r\n'
)
return
name[
0
].text.replace(
' '
,'
').replace('
','
'),name[0].get('
href
'),name[0].get('
indus_name')
def
getInfoByKeyWord(code):
startTarget
=
'<div class="task_class_list_li_box">'
taskStartIndex
=
code.index(startTarget)
+
len
(startTarget)
taskEndIndex
=
code.index(startTarget,taskStartIndex,
len
(code))
untreatedInfo
=
code[taskStartIndex:taskEndIndex]
taskUrl
=
re.search(
'https://task.epwk.com/\d*/'
, untreatedInfo) .group(
0
)
taskTitle
=
re.search(
'title=".*?"'
,untreatedInfo).group(
0
).replace(
'title='
,'
').replace('
"
','
')
print
(
'通过字符串获取的最新的需求是:'
)
print
(taskUrl)
print
(taskTitle)
return
taskUrl,taskTitle
def
saveTaskByOS(taskInfo):
with
open
(
'设计最新需求.txt'
,
'w'
,encoding
=
'utf-8'
) as
file
:
file
.write(taskInfo)
print
(
'保存成功!'
)
def
getOldTaskInfo():
if
os.path.exists(
'设计最新需求.txt'
):
with
open
(
'设计最新需求.txt'
,
'r'
,encoding
=
'utf-8'
) as fileread:
old
=
fileread.read()
print
(
'旧需求是:'
)
print
(old)
return
old
else
:
with
open
(
'设计最新需求.txt'
,
'w'
,encoding
=
'utf-8'
) as
file
:
file
.write(
'null'
)
return
'null'
print
(
'未发现文件,新建成功'
)
def
send_email(SMTP_host, from_account, from_passwd, to_account, subject, content):
message
=
MIMEText(content,
'HTML'
,
'utf-8'
)
message[
'Subject'
]
=
subject
try
:
smtpObj
=
smtplib.SMTP_SSL(SMTP_host,
465
)
smtpObj.login(from_account, from_passwd)
smtpObj.sendmail(from_account, to_account, message.as_string())
print
(
"向用户"
+
to_account
+
"发送邮件成功!"
)
except
smtplib.SMTPException as e:
print
(
"向用户"
+
to_account
+
"发送邮件失败!"
)
print
(e)
if
__name__
=
=
'__main__'
:
htmlCode
=
getHtmlCode(
"https://task.epwk.com/sj/?o=7"
)
infoList
=
getInfoByKeyWord(htmlCode)
taskInfo
=
infoList[
0
]
+
infoList[
1
]
msg
=
msg
+
=
'<a href="'
+
infoList[
0
]
+
'"> <h1>'
+
infoList[
1
]
+
'</h1> </a></a>'
msg
+
=
"<p>需求标题:"
+
infoList[
1
]
+
"</p>"
msg
+
=
oldInfo
=
getOldTaskInfo()
if
(taskInfo
=
=
oldInfo):
print
(
'未发现新需求'
)
else
:
print
(
'发现新需求:'
+
taskInfo)
saveTaskByOS(taskInfo)
send_email(SMTP_host, from_account, from_passwd, toAccoutList,infoList[
1
],msg)