过程:
1、从“东方财富网”获取股票列表;
2、根据获取的列表从“百度股票”获取相关信息;
3、存储
import requests
from bs4 import BeautifulSoup
import re
import bs4
import traceback
#1、获取URL对应的页面,返回一个空字符串作为语句
def getHtmlText(url):
try:
r = requests.get(url, timeout=30)
if r.status_code == 200:
r.raise_for_status()
r.encoding = r.apparent_encoding
html = r.text
return html
#print(r.text)
return None
except:
return None
#2、获得股票的信息列表;lst保存的股票列表信息,保存股票列表
def getStockList(lst,stockURL):
html = getHtmlText(stockURL)
soup = BeautifulSoup(html,'html.parser')
a = soup.find_all('a')
for i in a:
try:
href = i.attrs['href']
lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
except:
continue
#3、获得每一支个股的股票信息的网址,然后获得网页内容,并且把它存储到一个数据结构.
def getStockInfo(lst,stockURL,fpath):
for stock in lst:
url = stockURL + stock + ".html"
html = getHtmlText(url)
try:
if html == "":
continue
infoDict = { }
soup = BeautifulSoup(html, 'html.parser')
stockInfo = soup.find('div',class_='stock-info')
#print(stockInfo.get_text())
#print(stockInfo.prettify())
if isinstance(stockInfo, bs4.element.Tag):
name = stockInfo.find_all(class_="bets-name")[0]
#print(name.text.split()[0])
infoDict.update({'股票名称': name.text.split()[0]})
#print(infoDict)
keylist = stockInfo.find_all('dt')
valuelist = stockInfo.find_all('dd')
# 键值对列表进行赋值,将他们还原为键值对,并存在字典中
for i in range(len(keylist)):
key = keylist[i].text
val = valuelist[i].text
infoDict[key] = val
print(infoDict)
with open(fpath, 'a', encoding='utf-8') as f:
f.write(str(infoDict) + '\n')
except:
traceback.print_exc()
continue
def main():
stock_list_url = "http://quote.eastmoney.com/stock_list.html"
slist = [] # 股票信息存成一个slist
getStockList(slist, stock_list_url) # 调用函数,获取股票列表
stock_info_url = "https://gupiao.baidu.com/stock/"
output_file = "D://BaiduStockInfo.txt"
# 根据股票列表到相关网站获取股票信息,并存储到相关文件中
getStockInfo(slist, stock_info_url, output_file)
if __name__ == '__main__':
main()