- 花了一晚上的时间调试,练习,终于把这个程序改动好了
- 通过这个更熟悉了BeautifulSoup库的用法,也练习了正则表达式的使用
import requests
from bs4 import BeautifulSoup
import bs4
import traceback
import re
def getHTMLText(url, code="utf-8"):
try:
Headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
}
r = requests.get(url, headers=Headers, timeout=30)
r.raise_for_status()
r.encoding = code
return r.text
except:
return "网页访问失败"
def getFundList(lst, fundURL):
html = getHTMLText(fundURL, "GB2312")
soup = BeautifulSoup(html, 'html.parser')
tr = soup.find_all('tr')
fo