http://www.cbrc.gov.cn/chinese/jrjg/index.html
爬取所有银行的银行名称和官网地址(如果没有官网就忽略),并写入数据库;
import re
from urllib.request import urlopen
from urllib import request
import pymysql
def getbank():
url = 'http://www.cbrc.gov.cn/chinese/jrjg/index.html'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0'
req = request.Request(url, headers={'User-Agent':user_agent})
content = urlopen(req).read().decode('utf-8')
print("正在爬取地址")
pattern = r'<a href="(?P<bankaddress>.+)" target="_blank" style="color:#08619D">\s*(?P<bankname>[\u4e00-\u9fa5]+)\s*</a>'
findbankadd = re.findall(pattern,content)
print(findbankadd)
conn = pymysql.connect(user='root',
password='971203', charset='utf8', autocommit=True)
cur = conn.cursor()
conn.select_db('bank')
create_sql = 'create table