使用python2.7,rullib2,re
对http://www.txsec.com/inc1/gpdm.asp进行数据获取,见代码:
# -*- coding: utf-8 -*-
import urllib
import urllib2
import re
import csv
post_url = 'http://www.txsec.com/inc1/gpdm.asp'
header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',
'Origin':'http://www.cninfo.com.cn'}
try:
request = urllib2.Request(post_url,headers=header)
response = urllib2.urlopen(request)
pageCode = response.read().decode('gbk')
except Exception as e:
print e
pattern = re.compile('<tr .*?>.*?<td .*?>(.*?)</td>.*?<td .*?>(.*?)</td>.*?'+
'<td .*?>(.*?)</td>.*?<td .*?>(.*?)</td>.*?</tr>.*?'+'<tr>.*?<td .*?>(.*?)</td>.*?<td .*?>(.*?)</td>.*?'+
'<td .*?>(.*?)</td>.*?<td .*?>(.*?)</td&