selenium 网页爬虫 读取基金代码列表文件 flist.txt
flist.txt 一行一个基金代码:6位数字
先输入日期查询,再抓取天天基金网上的基金净值
fund3.py
# -*- coding: utf-8 -*-
import os, sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
from datetime import date
class Fund(unittest.TestCase):
def setUp(self):
today = date.today().strftime("%Y%m%d")
ch_driver = os.path.abspath(r"D:\selenium\chromedriver.exe")
os.environ["webdriver.chrome.driver"]= ch_driver
self.driver = webdriver.Chrome()
self.driver.implicitly_wait(30)
self.base_url = "http://fund.eastmoney.com"
self.verificationErrors = []
self.accept_next_alert = True
self.flist = [] # fund list
fp = open("flist.txt",'r')
for line in fp:
if len(line.strip()) ==6:
self.flist.append(line.strip())
fp.close()
self.fp = open(today +'.txt','w')
print today +'.txt'
def test_fund(self):
flist = self.flist
fp = self.fp
driver = self.driver
for f1 in flist:
driver.get(self.base_url + "/f10/jjjz_%s.html" % (f1))
driver.find_element_by_id("lsjzSDate").clear()
driver.find_element_by_id("lsjzSDate").send_keys("2017-06-29")
driver.find_element_by_id("lsjzEDate").clear()
driver.find_element_by_id("lsjzEDate").send_keys("2017-07-07")
driver.find_element_by_css_selector("input.search").click()
time.sleep(1)
try:
div = driver.find_element_by_id("jztable")
table = div.find_elements_by_tag_name("table")
tbody = table[0].find_elements_by_tag_name("tbody")
t_rows = tbody[0].find_elements_by_tag_name('tr')
for row in t_rows:
fp.write(f1+' ')
tds = row.find_elements_by_tag_name('td')
for td in tds[0:4]:
fp.write(td.text +' ')
fp.write('\n')
except Exception as msg:
print msg
#
def is_element_present(self, how, what):
try: self.driver.find_element(by=how, value=what)
except NoSuchElementException as e: return False
return True
def is_alert_present(self):
try: self.driver.switch_to_alert()
except NoAlertPresentException as e: return False
return True
def close_alert_and_get_its_text(self):
try:
alert = self.driver.switch_to_alert()
alert_text = alert.text
if self.accept_next_alert:
alert.accept()
else:
alert.dismiss()
return alert_text
finally: self.accept_next_alert = True
def tearDown(self):
self.fp.close()
self.driver.quit()
self.assertEqual([], self.verificationErrors)
if __name__ == "__main__":
unittest.main()
flist.txt 一行一个基金代码:6位数字

本文介绍了一个使用Selenium的Python脚本,该脚本能够从天天基金网上爬取特定日期范围内的基金净值数据。首先读取包含基金代码的文本文件,然后针对每个基金代码访问天天基金网并抓取历史净值。
9万+





