import requests
from bs4 import BeautifulSoup
import bs4
url = ‘http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html’
ret = requests.get(url=url)
修改字符编码
ret.encoding = ret.apparent_encoding
data = ret.text
将网页文本以‘html.parser’做格式化操作
soup = BeautifulSoup(data, ‘html.parser’)
获取所有的‘tbody’标签的子标签
tr_list = soup.find(‘tbody’).children
ulist = []
for tr in tr_list:
# 判断tr是否是一个标签对象
if isinstance(tr, bs4.element.Tag):
# 拿到tr中所有的td
tds = tr(‘td’)
ulist.append([tds[0].string, tds[1].string,
tds[2].string, tds[3].string])
def print_ulist(ulist, pro):
print(“中国2019{}地区最好大学排名”.format(pro))
tplt = “{0:10}\t{1:{4}10}\t{2:10}\t{3:10}”
print(tplt.format(‘排名’,‘学校排名’,‘地区’,‘评分’,chr(12288)))
if pro == ‘浙江’:
pass
for i in range(len(ulist)):
u = ulist[i]
if u[2] == ‘浙江’:
print(tplt.format(u[0],u[1],u[2],u[3],chr(12288)))
print_ulist(ulist, ‘浙江’)