一、需求
1、要求没10分钟扒取一次网易彩票网站的大乐透彩种最新期号;
2、把最新的期号插入到数据库,如果不是最新的就不插入;
二、实现
1、方法很多种,我这里想到的用urllib获取返回页面元素,然后对页面元素进行过滤
2、上代码
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from threading import Timer
import urllib
import urllib2
import re
import copy
import time
import MySQLdb
class GetNewIssue:
'''
获取网易彩票最新大乐透彩种的期号
'''
def __init__(self, url=0, page=0):
'''
初始化URL地址
'''
self.url = url
self.page = page
def get_page(self):
try:
self.url = 'http://caipiao.163.com/order/dlt/#from=leftnav'
self.page = urllib.urlopen(self.url).read().decode('utf8')
except urllib2.URLError:
print u'请检查URL地址!!!'
return self.page
def get_issue(self):
'''
处理网易彩票页面返回的元素,返回最新的期号
'''
RE1 = re.compile('id="bet_period">\d{5}', re.S)
page = self.get_page()
issue_number_lis = re.findall(RE1, page)
for issues in issue_number_lis:
issue = int(issues[16:21])
return issue
def get_end_time(self):
'''
处理网易彩票页面返回的元素,返回当期的截止时间
'''
RE2 = re.compile(ur'id="bet_time">代购截止: .*')
page = self.get_page()
end_time_list = re.findall(RE2, page)
for end_times in end_time_list:
end_time = str(end_times[20:36] + ':00')
return end_time
@staticmethod
def get_sys_time():
'''
取系统时间
'''
create_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
return create_time
def send_data(self):
'''
连接数据库,提交获取到的期号、截止时间和系统时间
'''
conn = MySQLdb.connect(
host='192.168.0.0',
port=3306,
user='username',
passwd='password',
db='db_name',
charset='utf8'
)
cur = conn.cursor()
issue = self.get_issue()
print u'扒取到的最新期号为:%s' % issue
end_time = repr(self.get_end_time())
create = repr(self.get_sys_time())
get_data = [(issue, create, end_time)]
try:
cur.execute("SELECT issue FROM dlt_issue ORDER BY issue DESC LIMIT 1")
select_db_results = cur.fetchall()
for i in xrange(len(select_db_results)):
for j in xrange(len(select_db_results[i])):
pass
# print u'数据库存储期号为:%s' % (select_db_results[i][j])
select_db_result = select_db_results[i][j]
print u'数据库存储期号为:%s' % select_db_result
if int(select_db_result) >= issue:
print u'*****<<数据已经是最新的,不需要更新!>>*****'
else:
cur.execute(
"INSERT INTO dlt_issue(issue,create_time,end_time) VALUES(%s,%s,%s)" % (issue, create, end_time)
)
conn.commit()
print u'*****<<更新期号成功,更新内容是:%s>>*****' % str(get_data)
except Exception, e:
print e
finally:
cur.close()
conn.close()
if __name__ == '__main__':
def run_func():
while True:
run = GetNewIssue()
run.send_data()
global timer
# (3, run_func) 3控制函数调用的时间间隔
timer = Timer(3, run_func)
timer.start()
run = GetNewIssue()
# Timer(1, run_func) 1控制函数开始调用的等待时间
timer = Timer(1, run_func)
timer.start()
# sleep(15) 15控制函数的总运行时间
time.sleep(15)
timer.cancel()