python 爬虫Demo 下载每日股票数据

博主曾用Python做股票资料抓取项目,但效率低,跑一圈需几十分钟,仅适合做demo。后来用Java调用sino js函数,25秒就能收集一次全部股票资料并写入Redis。该Python demo涉及数据库操作、定时任务、爬虫等技能,还给出代码下载链接。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

以前写的Python抓取股票资料的项目,因为效率太低后来没有用了,用python跑完一圈大概要几十分钟,没法使用,但是当python的demo还是不错的(后来用java写调用sino js函数的,25秒收集一次全部股票的资料并写入redis,基本够用了),这个demo用到的技能包括

数据库操作,定时任务,用BeautifulSoup爬虫,python的字典,正则等

数据库储存的情况:

项目结构

可以在这里下载代码

https://download.youkuaiyun.com/download/howard789/11222353

定时任务main方法

import datetime
import schedule
import threading
import time

from src.ScrapyStockTradeInfo import collectDailyTradeInfo

def job1():
    print("job1 collectDailyTradeInfo start:", datetime.datetime.now())
    collectDailyTradeInfo();
    print("job1 collectDailyTradeInfo end:", datetime.datetime.now())

def job1_task():
    threading.Thread(target=job1).start()

def run():
    # 立即执行
    if (1==1):
        job1()

    # 定时执行
    if (1==2):
        schedule.every(10).seconds.do(job1)

    # 定时执行
    if (1==2):
        schedule.every().day.at("22:40").do(job1)


    while True:
        schedule.run_pending()
        time.sleep(1)

run()

主方法

#!user/bin/env python3
# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup
import time
import re
import traceback
from src import Method, dbUtil

# url,html,soap,div
from src.Method import getDicModel, saveStockInfo
from src.dbUtil import Insert

BB=","
BB2="\',\'"

def _getFileName(market='sh'):
    name = time.strftime('%Y%m%d', time.localtime(time.time()))
    return name + market

def _getStockListFromDatabase(stock_list):
    sql='select stock_id_full from stock_list';
    result=dbUtil.Select(sql)
    for i in result:
        stock_list.append(i[0]);
    return stock_list;

def _getStockList(stock_list, stock_list_url):
    html = getHtml(stock_list_url)
    soup = BeautifulSoup(html, 'html.parser')
    a = soup.find_all('a')
    for i in a:
        try:
            href = i.attrs['href']
            tmp = re.findall(r'sh6\d{5}', href)
            if (len(tmp) > 0):
                stock_list.append(tmp[0])
            tmp2 = re.findall(r'sz\d{6}', href)
            if (len(tmp2) > 0):
                stock_list.append(tmp2[0])
        except:
            traceback.print_exc()


def _getStockInfo(stock_list, stock_info_url, outPutFilePath,tableName):
    # print(now)

    for stockNo in stock_list:
        try:
            url = stock_info_url + stockNo + '.html'
            print(url)
            html = getHtml(url)
            infoDic = {}
            if html == '':
                continue
            soap = BeautifulSoup(html, 'html.parser')
            stock_info = soap.find('div', attrs={'class': 'stock-info'})
            if (stock_info == None):
                continue

            keyList = stock_info.find_all('dt')
            valList = stock_info.find_all('dd')
            for i in range(len(keyList)):
                key = keyList[i].text
                value = valList[i].text.strip()
                infoDic[key] = value
            saveStockInfo(infoDic,stockNo,tableName)
            print('写入'+stockNo)
        except:
            traceback.print_exc()
            print('取得'+stockNo+'失败')


def getHtml(url):
    try:
        user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
        headers = {'User-Agent': user_agent}
        r = requests.get(url, headers=headers, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:

        traceback.print_exc()
        return '';

def collectDailyTradeInfo():
    print("start collectDailyTradeInfo")
    stock_info_url = 'https://gupiao.baidu.com/stock/'
    print('p1')
    outPutFilePath = './' + _getFileName() + '.txt'
    stock_list = []
    # _getStockList(stock_list, stock_list_url)

    # 从数据库取得股票列表
    _getStockListFromDatabase(stock_list)

    # 打印列表
    print(stock_list)

    # 创建用于存放数据的新表
    tableName=Method.createTable();

    # 下载解析数据,并将数据写入表中
    _getStockInfo(stock_list, stock_info_url, outPutFilePath,tableName)




if __name__ == "__main__":
    collectDailyTradeInfo()







数据库方法

import pymysql


def getDB():
    db = pymysql.connect(host="localhost", port=3306, user="root", passwd="root", db="test", charset="utf8mb4")
    return db

# 关闭数据库连接
def _Close(db, cursor):
    cursor.close()
    db.close()

def _executeSql(sql):
    db = getDB()
    # 使用cursor()方法获取操作游标
    cursor = db.cursor()
    # SQL 删除语句
    # sql = "DELETE FROM EMPLOYEE WHERE AGE > '%d'" % (20)
    try:
        # 执行SQL语句
        cursor.execute(sql)
        # 提交修改
        db.commit()
    except:
        # 发生错误时回滚
        db.rollback()
    # 关闭连接
    _Close(db,cursor)


def Select(sql):
    db=getDB()
    cursor = db.cursor()
    cursor.execute(sql)
    # 获取所有记录列表
    results = cursor.fetchall()
    _Close(db,cursor)
    return results


def Count(sql):
    db=getDB()
    cursor = db.cursor()
    # sql = "SELECT * FROM main"
    cursor.execute(sql)
    cursor.close()
    db.close()
    # 获取所有记录列表
    results = cursor.fetchone()
    return results

def createTable(tableName):
    sql = "DROP TABLE IF EXISTS "+tableName;
    _executeSql(sql)
    sql=getTemplateCreateTableSql();
    sql=sql.replace("scrapy_example",tableName)
    _executeSql(sql)

# 增
def Insert(sql):
    db=getDB()
    cursor=db.cursor()
    # 执行SQL语句
    cursor.execute(sql)
    # 没有设置默认自动提交,需要主动提交,以保存所执行的语句
    db.commit()
    _Close(db,cursor)


# 删
def Delect(db, cursor):
    sql = "DELETE FROM main WHERE Name = '修改后的名字'"
    cursor.execute(sql)
    db.commit()

# 查
def Select(sql):
    db=getDB()
    cursor=db.cursor()
    cursor.execute(sql)
    # 获取所有记录列表
    results = cursor.fetchall()
    return results

# def insertStockInfo(sql):

def getTemplateCreateTableSql():
    sql = '''
        CREATE TABLE `scrapy_example` (
        `stockId` varchar(32) NOT NULL,
        `open`  varchar(32) DEFAULT NULL COMMENT '今开',
        `volumn`  varchar(32) DEFAULT NULL COMMENT '成交量',
         `high`  varchar(32) DEFAULT NULL COMMENT '最高',
          `limitUp`  varchar(32) DEFAULT NULL,
          `insidePlate`  varchar(32) DEFAULT NULL COMMENT '内盘',
          `turnover`  varchar(32) DEFAULT NULL COMMENT '成交额',
          `committee`  varchar(32) DEFAULT NULL COMMENT '委比',
          `liquidity`  varchar(32) DEFAULT NULL COMMENT '流通市值',
          `peMRQ` varchar(32) DEFAULT NULL COMMENT '市盈率MRQ',
          `eps` varchar(32) DEFAULT NULL COMMENT '每股收益',
          `equity` varchar(32) DEFAULT NULL COMMENT '总股本',
          `previousClose` varchar(32) DEFAULT NULL COMMENT '昨收',
          `turnoverRate` varchar(32) DEFAULT NULL COMMENT '换手率',
          `low` varchar(32) NOT NULL COMMENT '最低',
          `limitDown` varchar(32) DEFAULT NULL COMMENT '跌停',
          `outsidePlate` varchar(32) DEFAULT NULL COMMENT '外盘',
          `dayRange` varchar(32) DEFAULT NULL COMMENT '振幅',
          `qrr` varchar(32) DEFAULT NULL COMMENT '量比quantity relative ratio',
          `marketCap` varchar(32) DEFAULT NULL COMMENT '总市值',
          `pb` varchar(32) DEFAULT NULL COMMENT '市净率',
          `bps` varchar(32) DEFAULT NULL COMMENT '每股净资产',
          `tradableShare` varchar(32) DEFAULT NULL COMMENT '流通股本',
          `date` datetime NOT NULL
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8;'''
    return sql




def getTemplateCreateTableSql_backUp():
    sql = '''
        CREATE TABLE `scrapy_example` (
        `stockId` varchar(10) NOT NULL,
        `open` decimal(10,2) DEFAULT NULL COMMENT '今开',
        `volumn` decimal(20,2) DEFAULT NULL COMMENT '成交量',
         `high` decimal(10,2) DEFAULT NULL COMMENT '最高',
          `limitUp` decimal(10,2) DEFAULT NULL,
          `insidePlate` decimal(10,2) DEFAULT NULL COMMENT '内盘',
          `turnover` decimal(20,2) DEFAULT NULL COMMENT '成交额',
          `committee` decimal(10,2) DEFAULT NULL COMMENT '委比',
          `liquidity` decimal(20,2) DEFAULT NULL COMMENT '流通市值',
          `peMRQ` decimal(10,2) DEFAULT NULL COMMENT '市盈率MRQ',
          `eps` decimal(10,2) DEFAULT NULL COMMENT '每股收益',
          `equity` decimal(20,2) DEFAULT NULL COMMENT '总股本',
          `previousClose` decimal(10,2) DEFAULT NULL COMMENT '昨收',
          `turnoverRate` decimal(10,2) DEFAULT NULL COMMENT '换手率',
          `low` decimal(10,2) NOT NULL COMMENT '最低',
          `limitDown` decimal(10,2) DEFAULT NULL COMMENT '跌停',
          `outsidePlate` decimal(10,2) DEFAULT NULL COMMENT '外盘',
          `dayRange` decimal(10,2) DEFAULT NULL COMMENT '振幅',
          `qrr` decimal(10,2) DEFAULT NULL COMMENT '量比quantity relative ratio',
          `marketCap` decimal(20,2) DEFAULT NULL COMMENT '总市值',
          `pb` decimal(10,2) DEFAULT NULL COMMENT '市净率',
          `bps` double(10,2) DEFAULT NULL COMMENT '每股净资产',
          `tradableShare` double(20,0) DEFAULT NULL COMMENT '流通股本',
          `date` datetime NOT NULL
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8;'''
    return sql

 

字符串操作

import time

from src import dbUtil
from src.dbUtil import Insert


def _parseWanShou(str):
    if(str.strip()=='--'):
        return 0
    # str=str.replace('万手','').strip()
    if('万手' in str):
        str=str.replace('万手','').strip()
    if('手' in str):
        str=str.replace('手','').strip()
    return float(str)

def _parseYi(str):
    if(str.strip()=='--'):
        return 0
    if('亿' in str):
        str=str.replace('亿','').strip()
    if('万' in str):
        str=str.replace('万','').strip()
    return float(str)

def _parsePercent(str):
    if(str.strip()=='--'):
        return 0
    str=str.replace('%','')
    return float(str)

def _parseNum(str):
    if(str.strip()=='--'):
        return 0
    return float(str)

def getDicModel(dic,stockNo):
    rtnDic={}
    rtnDic['stockId']=stockNo
    rtnDic['open']=_parseNum(dic['今开'])
    rtnDic['volumn']=_parseWanShou(dic['成交量'])
    rtnDic['high']=_parseNum(dic['最高'])
    rtnDic['limitUp']=_parseNum(dic['涨停'])
    rtnDic['insidePlate']=_parseWanShou(dic['内盘'])
    rtnDic['turnover']=_parseYi(dic['成交额'])
    rtnDic['committee']=_parsePercent(dic['委比'])
    rtnDic['liquidity']=_parseYi(dic['流通市值'])
    rtnDic['peMRQ']=_parseNum(dic['市盈率MRQ'])
    rtnDic['eps']=_parseNum(dic['每股收益'])
    rtnDic['equity']=_parseYi(dic['总股本'])
    rtnDic['previousClose']=_parseNum(dic['昨收'])
    rtnDic['turnoverRate']=_parsePercent(dic['换手率'])
    rtnDic['low']=_parseNum(dic['最低'])
    rtnDic['limitDown']=_parseNum(dic['跌停'])
    rtnDic['outsidePlate']=_parseWanShou(dic['外盘'])
    rtnDic['dayRange']=_parsePercent(dic['振幅'])
    rtnDic['qrr']=_parseNum(dic['量比'])
    rtnDic['marketCap']=_parseYi(dic['总市值'])
    rtnDic['pb']=_parseNum(dic['市净率'])
    rtnDic['bps']=_parseNum(dic['每股净资产'])
    rtnDic['tradableShare']=_parseYi(dic['流通股本'])
    rtnDic['datetime']=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    return rtnDic

def _getSql(infoDic,tableName):
    BB=","
    sql="INSERT INTO "+ tableName+" VALUES (\'"+infoDic['stockId']+"\',"+str(infoDic['open'])+BB+str(infoDic['volumn'])+BB+ str(infoDic['high'])+BB+ str(infoDic['limitUp'])+BB+ str(infoDic['insidePlate'])+BB+ str(infoDic['turnover'])+BB+ str(infoDic['committee'])+BB+str(infoDic['liquidity'])+BB+ str(infoDic['peMRQ'])+BB+ str(infoDic['eps'])+BB+ str(infoDic['equity'])+BB+str(infoDic['previousClose'])+BB+ str(infoDic['turnoverRate'])+BB+ str(infoDic['low'])+BB+ str(infoDic['limitUp'])+BB+ str(infoDic['outsidePlate'])+BB+ str(infoDic['dayRange'])+BB+ str(infoDic['qrr'])+BB+ str(infoDic['marketCap'])+BB+ str(infoDic['pb'])+BB+ str(infoDic['bps'])+BB+ str(infoDic['tradableShare'])+",\'"+infoDic['datetime']+"\')";
    return sql

# {'stockId': 'sz300059', 'open': 12.7, 'volumn': 164.58, 'high': 13.28,
#  'limitUp': 13.9, 'insidePlate': 77.37, 'turnover': 21.35, 'committee': 67.91,
#  'liquidity': 535.38, 'peMRQ': 55.32, 'eps': 0.07, 'equity': 51.68,
#  'previousClose': 12.64, 'turnoverRate': 3.96, 'low': 12.61,
#  'limitDown': 11.38, 'outsidePlate': 87.21, 'dayRange': 5.3,
#  'qrr': 3.82, 'marketCap': 666.18, 'pb': 4.02, 'bps': 3.21, 'tradableShare': 41.53,
#  'datetime': '2018-07-07 00:31:49'}


def saveStockInfo(dic,stockNo,tableName):
    infoDic=getDicModel(dic,stockNo)
    sql=_getSql(infoDic,tableName)
    Insert(sql)



def createTable():
    tableName=time.strftime('%Y%m%d', time.localtime(time.time()))
    tableName='z_daily_trade_info_'+tableName
    dbUtil.createTable(tableName);
    return tableName

 

解析数据

import time

from src import dbUtil
from src.dbUtil import Insert


def _parseWanShou(str):
    if(str.strip()=='--'):
        return 0
    # str=str.replace('万手','').strip()
    if('万手' in str):
        str=str.replace('万手','').strip()
    if('手' in str):
        str=str.replace('手','').strip()
    return float(str)

def _parseYi(str):
    if(str.strip()=='--'):
        return 0
    if('亿' in str):
        str=str.replace('亿','').strip()
    if('万' in str):
        str=str.replace('万','').strip()
    return float(str)

def _parsePercent(str):
    if(str.strip()=='--'):
        return 0
    str=str.replace('%','')
    return float(str)

def _parseNum(str):
    if(str.strip()=='--'):
        return 0
    return float(str)

def getDicModel(dic,stockNo):
    rtnDic={}
    rtnDic['stockId']=stockNo
    rtnDic['open']=_parseNum(dic['今开'])
    rtnDic['volumn']=_parseWanShou(dic['成交量'])
    rtnDic['high']=_parseNum(dic['最高'])
    rtnDic['limitUp']=_parseNum(dic['涨停'])
    rtnDic['insidePlate']=_parseWanShou(dic['内盘'])
    rtnDic['turnover']=_parseYi(dic['成交额'])
    rtnDic['committee']=_parsePercent(dic['委比'])
    rtnDic['liquidity']=_parseYi(dic['流通市值'])
    rtnDic['peMRQ']=_parseNum(dic['市盈率MRQ'])
    rtnDic['eps']=_parseNum(dic['每股收益'])
    rtnDic['equity']=_parseYi(dic['总股本'])
    rtnDic['previousClose']=_parseNum(dic['昨收'])
    rtnDic['turnoverRate']=_parsePercent(dic['换手率'])
    rtnDic['low']=_parseNum(dic['最低'])
    rtnDic['limitDown']=_parseNum(dic['跌停'])
    rtnDic['outsidePlate']=_parseWanShou(dic['外盘'])
    rtnDic['dayRange']=_parsePercent(dic['振幅'])
    rtnDic['qrr']=_parseNum(dic['量比'])
    rtnDic['marketCap']=_parseYi(dic['总市值'])
    rtnDic['pb']=_parseNum(dic['市净率'])
    rtnDic['bps']=_parseNum(dic['每股净资产'])
    rtnDic['tradableShare']=_parseYi(dic['流通股本'])
    rtnDic['datetime']=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    return rtnDic

def _getSql(infoDic,tableName):
    BB=","
    sql="INSERT INTO "+ tableName+" VALUES (\'"+infoDic['stockId']+"\',"+str(infoDic['open'])+BB+str(infoDic['volumn'])+BB+ str(infoDic['high'])+BB+ str(infoDic['limitUp'])+BB+ str(infoDic['insidePlate'])+BB+ str(infoDic['turnover'])+BB+ str(infoDic['committee'])+BB+str(infoDic['liquidity'])+BB+ str(infoDic['peMRQ'])+BB+ str(infoDic['eps'])+BB+ str(infoDic['equity'])+BB+str(infoDic['previousClose'])+BB+ str(infoDic['turnoverRate'])+BB+ str(infoDic['low'])+BB+ str(infoDic['limitUp'])+BB+ str(infoDic['outsidePlate'])+BB+ str(infoDic['dayRange'])+BB+ str(infoDic['qrr'])+BB+ str(infoDic['marketCap'])+BB+ str(infoDic['pb'])+BB+ str(infoDic['bps'])+BB+ str(infoDic['tradableShare'])+",\'"+infoDic['datetime']+"\')";
    return sql

# {'stockId': 'sz300059', 'open': 12.7, 'volumn': 164.58, 'high': 13.28,
#  'limitUp': 13.9, 'insidePlate': 77.37, 'turnover': 21.35, 'committee': 67.91,
#  'liquidity': 535.38, 'peMRQ': 55.32, 'eps': 0.07, 'equity': 51.68,
#  'previousClose': 12.64, 'turnoverRate': 3.96, 'low': 12.61,
#  'limitDown': 11.38, 'outsidePlate': 87.21, 'dayRange': 5.3,
#  'qrr': 3.82, 'marketCap': 666.18, 'pb': 4.02, 'bps': 3.21, 'tradableShare': 41.53,
#  'datetime': '2018-07-07 00:31:49'}


def saveStockInfo(dic,stockNo,tableName):
    infoDic=getDicModel(dic,stockNo)
    sql=_getSql(infoDic,tableName)
    Insert(sql)



# '成交额': '21.35亿',turnover
# '委比': '67.91%',committee
# '流通市值': '535.38亿',liquidity
# '市盈率MRQ': '55.32',peMRQ
# '每股收益': '0.07',eps
# '总股本': '51.68亿',equity
# '昨收': '12.64', previousClose
# '换手率': '3.96%', turnoverRate
# '最低': '12.61', low
# '跌停': '11.38', limitDown
# '外盘': '87.21万手', outsidePlate
# '振幅': '5.30%', dayRange
# '量比': '3.82', qrr
# '总市值': '666.18亿', marketCap
# '市净率': '4.02', pb
# '每股净资产': '3.21', equityPS
# '流通股本': '41.53亿'tradableShare}



def createTable():
    tableName=time.strftime('%Y%m%d', time.localtime(time.time()))
    tableName='z_daily_trade_info_'+tableName
    dbUtil.createTable(tableName);
    return tableName

Stock List的 sql  Demo


SET FOREIGN_KEY_CHECKS=0;


DROP TABLE IF EXISTS `stock_list`;
CREATE TABLE `stock_list` (
  `stock_id_full` varchar(32) DEFAULT NULL,
  `stock_id_number` varchar(32) DEFAULT NULL,
  `market` varchar(32) DEFAULT NULL,
  `stock_name` varchar(32) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;


INSERT INTO `stock_list` VALUES ('sh600000', '600000', 'sh', '浦发银行');
INSERT INTO `stock_list` VALUES ('sh600001', '600001', 'sh', '邯郸钢铁');
INSERT INTO `stock_list` VALUES ('sh600002', '600002', 'sh', '齐鲁石化');
INSERT INTO `stock_list` VALUES ('sh600003', '600003', 'sh', 'ST东北高');
INSERT INTO `stock_list` VALUES ('sh600004', '600004', 'sh', '白云机场');
INSERT INTO `stock_list` VALUES ('sh600005', '600005', 'sh', '武钢股份');
INSERT INTO `stock_list` VALUES ('sh600006', '600006', 'sh', '东风汽车');

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值