每月26号,是深圳小汽车增量摇号开始的日子,等摇号结束后,都要去看看自己有没有中签。以前微信没推出城市
服务的时候,都要自己去网站查询,又要输入自己的申请号,估计也没多少人知道。嫌麻烦,就开始写了一个采集
程序。很简单的功能。用了很久,现在不想维护了,先放在博客做个备忘。
#!/usr/bin/python
# -*-coding:utf-8 -*-
import urllib
import urllib2
import re
def main():
url='http://apply.sztb.gov.cn/apply/app/status/norm/person'
current_range='201801'
total_pageNo=612
values = {'pageNo': '1', 'issueNumber': current_range,'applyCode': ''}
f = open('f'+current_range+'.txt', 'a')
for num in range(1,total_pageNo):
values["pageNo"]=num
data = urllib.urlencode(values)
req = urllib2.Request(url, data)
response = urllib2.urlopen(req)
content = response.read()
r_Table = r'<table\s*class="ge2_content".*?</table>'
id_re = re.compile(r_Table,re.S)
tablecontent = id_re.search(content)
tablecontent=tablecontent.group()
print tablecontent
r_id = r'<tr\s*class="content_data">.*?<td >(\d+)</td>.*?<td >(.*?)</td>.*?</tr>'
id_re = re.compile(r_id,re.S)
id_regx = id_re.findall(tablecontent)
print id_regx
for tx in id_regx:
v1=tx[0]
v2=tx[1]
f.write(v1+" "+v2)
f.write('\n')
f.close()
if __name__ == "__main__":
main()
添加python3 的代码如下:
#!/usr/bin/python
# -*-coding:utf-8 -*-
import urllib
import urllib.request
import re
def main():
url='http://apply.sztb.gov.cn/apply/app/status/norm/person'
current_range='201812'
total_pageNo=705
values = {'pageNo': '1', 'issueNumber': current_range,'applyCode': ''}
f = open('f'+current_range+'.txt', 'a')
for num in range(1,total_pageNo):
values["pageNo"]=num
data =urllib.parse.urlencode(values).encode('utf-8')
response = urllib.request.urlopen(url,data)
content = response.read().decode("utf-8")
r_Table = r'<table\s*class="ge2_content".*?</table>'
id_re = re.compile(r_Table,re.S)
tablecontent = id_re.search(content)
tablecontent=tablecontent.group()
print(tablecontent)
r_id = r'<tr\s*class="content_data">.*?<td >(\d+)</td>.*?<td >(.*?)</td>.*?</tr>'
id_re = re.compile(r_id,re.S)
id_regx = id_re.findall(tablecontent)
print(id_regx)
for tx in id_regx:
v1=tx[0]
v2=tx[1]
f.write(v1+" "+v2)
f.write('\n')
f.close()
if __name__ == "__main__":
main()