Python股票历史数据预处理(二)
从网上下载的股票历史数据往往不能直接使用,需要转换为自己所需要的格式。下面以Python代码编程为工具,将csv文件中存储的股票历史数据提取出来并处理。处理的数据结果为是30天涨跌幅子数据库,下载地址为:http://download.youkuaiyun.com/detail/suiyingy/9688605。
主要步骤有(Python csv数据读写):
- #csv文件读取股票历史涨跌幅数据;
- #随机选取30个历史涨跌幅数据;
- #构建自己的数据库;
- #将处理结果保存为新的csv文件。
具体代码如下:
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 17 23:04:33 2016
csv格式股票历史涨跌幅数据处理
@author: yehxqq1513760265
"""
import numpy as np
import pandas as pd
import random
import csv
import sys
reload ( sys )
sys. setdefaultencoding ( 'utf-8' )
'''
- 加载csv格式数据
'''
def loadCSVfile1 (datafile ):
filelist = [ ]
with open (datafile ) as file:
lines = csv. reader ( file )
for oneline in lines:
filelist. append (oneline )
filelist = np. array (filelist )
return filelist
#数据处理
#随机选取30个历史涨跌幅数据
#构建自己的数据库
def dataProcess (dataArr , subLen ):
totLen , totWid = np. shape (data )
print totLen , totWid
lenArr = dataArr [totLen- 1 , 2:totWid ]
columnCnt = 1
dataOut = [ ]
for lenData in lenArr:
columnCnt = columnCnt + 1
N60 = int (lenData ) / ( 2 * subLen )
print N60
if N60 > 0:
randIndex = random. sample ( range (totLen- int (lenData )- 1 ,totLen-subLen ) , N60 )
for i in randIndex:
dataOut. append (dataArr [i: (i+subLen ) ,columnCnt ] )
dataOut = np. array (dataOut )
return dataOut
if __name__ == "__main__":
datafile = "00100 (3).csv"
data = loadCSVfile1 (datafile )
df = pd. DataFrame (data )
m , n = np. shape (data )
dataOut = dataProcess (data , 30 )
m , n = np. shape (dataOut )
#保存处理结果
csvfile = file ( 'csvtest.csv' , 'wb' )
writer = csv. writer (csvfile )
writer. writerows (dataOut )
csvfile. close ( )
"""
Created on Thu Nov 17 23:04:33 2016
csv格式股票历史涨跌幅数据处理
@author: yehxqq1513760265
"""
import numpy as np
import pandas as pd
import random
import csv
import sys
reload ( sys )
sys. setdefaultencoding ( 'utf-8' )
'''
- 加载csv格式数据
'''
def loadCSVfile1 (datafile ):
filelist = [ ]
with open (datafile ) as file:
lines = csv. reader ( file )
for oneline in lines:
filelist. append (oneline )
filelist = np. array (filelist )
return filelist
#数据处理
#随机选取30个历史涨跌幅数据
#构建自己的数据库
def dataProcess (dataArr , subLen ):
totLen , totWid = np. shape (data )
print totLen , totWid
lenArr = dataArr [totLen- 1 , 2:totWid ]
columnCnt = 1
dataOut = [ ]
for lenData in lenArr:
columnCnt = columnCnt + 1
N60 = int (lenData ) / ( 2 * subLen )
print N60
if N60 > 0:
randIndex = random. sample ( range (totLen- int (lenData )- 1 ,totLen-subLen ) , N60 )
for i in randIndex:
dataOut. append (dataArr [i: (i+subLen ) ,columnCnt ] )
dataOut = np. array (dataOut )
return dataOut
if __name__ == "__main__":
datafile = "00100 (3).csv"
data = loadCSVfile1 (datafile )
df = pd. DataFrame (data )
m , n = np. shape (data )
dataOut = dataProcess (data , 30 )
m , n = np. shape (dataOut )
#保存处理结果
csvfile = file ( 'csvtest.csv' , 'wb' )
writer = csv. writer (csvfile )
writer. writerows (dataOut )
csvfile. close ( )