zl程序教程

您现在的位置是:首页 >  后端

当前栏目

python实现的一个火车票转让信息采集器

Python 实现 一个 信息 采集器 火车票 转让
2023-06-13 09:15:37 时间

好吧,我承认我是对晚上看到一张合适的票转让但打过电话去说已经被搞走了这件事情感到蛋疼。直接上文件吧。

#coding:utf-8
"""
春运查询火车票转让信息
Author:piglei2007@gmail.com
Date:2011.01.25
"""
importre
importos
importtime
importurlparse
importdatetime
importtraceback
importurllib2
importsocket
socket.setdefaulttimeout(20)

BLANK_RE=re.compile(r"\s+")

opener=urllib2.build_opener(urllib2.HTTPCookieProcessor())
opener.addheaders=[
("User-agent","Mozilla/5.0(X11;U;FreeBSDi386;en-US;rv:1.9.1)Gecko/20090704Firefox/3.5"),
("Accept","*/*"),
]
urllib2.install_opener(opener)

fromBeautifulSoupimportBeautifulSoup

SOURCE={
"58":"http://bj.58.com/huochepiao/?Num=%(train)s&StartTime=%(date)s00",
"ganji":"http://bj.ganji.com/piao/cc_%(train)s/%(date)s/",
}
RECORD_FILE="/tmp/ticket_records.txt"

defparse_record():
try:
returnset([x.strip()forxinopen(RECORD_FILE,"r").readlines()])
exceptIOError:
open(RECORD_FILE,"w")
returnset()

defflush_record(records):
open(RECORD_FILE,"w").write("\n".join(records))

defmain(config):
"""
开始抓取
"""
existed=parse_record()
to_email=[]

fortraininconfig["trains"]:
fordateinconfig["dates"]:
fortype,_urlinSOURCE.items():
url=_url%dict(train=train,date=date)
content=urllib2.urlopen(url).read()
soup=BeautifulSoup(content)
result=parse_content(type,soup,train)
forurl,textinresult:
url=urlparse.urljoin(_url,url)
#只要卧铺!
ifurlnotinexistedandu"卧"intext:
to_email.append([text,url])
existed.add(url)
ifto_email:
content="".join(
[xforxin["|".join(y)foryinto_email]]
).encode("utf-8")
simple_mail(config["people"],content)
flush_record(existed)

defparse_content(type,soup,train):
"""
获得车次信息
"""
result=[]
iftype=="58":
info_table=soup.find("table",id="infolist")
ifinfo_table:
forxininfo_table.findAll("tr",text=re.compile(ur"%s(?!时刻表)"%train,re.I)):
a=x.parent
_text=BLANK_RE.sub("",a.text)
result.append([a["href"],_text])
iftype=="ganji":
forxinsoup.findAll("dl",{"class":"list_piao"}):
a=x.dt.a
result.append([a["href"],a.text])
returnresult

EMAIL_HOST="smtp.sohu.com"
EMAIL_HOST_USER="yourname@sohu.com"
EMAIL_HOST_PASSWORD="yourpassword"
EMAIL_PORT=25

defsimple_mail(to,content):
"""
发送邮件
"""
importsmtplib
fromemail.mime.textimportMIMEText

msgRoot=MIMEText(content,"html","UTF-8")
msgRoot["Subject"]="[%s]有票来啦!!!!"%datetime.datetime.today().isoformat("")
msgRoot["From"]=EMAIL_HOST_USER
msgRoot["To"]=",".join(to)

s=smtplib.SMTP(EMAIL_HOST,EMAIL_PORT)
s.login(EMAIL_HOST_USER,EMAIL_HOST_PASSWORD)
s.sendmail(EMAIL_HOST_USER,to,msgRoot.as_string())
s.close()

defswitch_time_zone():
"""
切换时区
"""
os.environ["TZ"]="Asia/Shanghai"
time.tzset()

switch_time_zone()

if__name__=="__main__":
config={
"trains":("k471",),
"dates":("20110129",),
"people":(
"youremail@sohu.com",
)
}
try:
main(config)
print"%s:ok"%datetime.datetime.today()
exceptException,e:
printtraceback.format_exc()

然后放入cron,你懂的。