Chapter8.金融时间序列
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
mpl.rcParams['font.sans-serif'] = ['KaiTi']
mpl.rcParams['font.serif'] = ['KaiTi']
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题,或者转换负号为字符串
sns.set_style("darkgrid",{"font.sans-serif":['KaiTi', 'Arial']})
一、文章结构
1)经济数据
2)移动统计量,如:移动平均
3)相关性分析
4)高频数据
ps:本章节使用pandas 可以阅读《python for analysis》进一步了解
二、经济数据
#使用pandas读取数据更为方便
data = pd.read_csv(r"C:\Users\Administrator\Desktop\source\tr_eikon_eod_data.csv",index_col=0,parse_dates=True)
data #index_col 指定索引列为第几列 parse_dates 是否将类似时间的文本数据,解析为 date数据
AAPL.O | MSFT.O | INTC.O | AMZN.O | GS.N | SPY | .SPX | .VIX | EUR= | XAU= | GDX | GLD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2010-01-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.4323 | 1096.35 | NaN | NaN |
2010-01-04 | 30.572827 | 30.950 | 20.88 | 133.90 | 173.08 | 113.33 | 1132.99 | 20.04 | 1.4411 | 1120.00 | 47.71 | 109.80 |
2010-01-05 | 30.625684 | 30.960 | 20.87 | 134.69 | 176.14 | 113.63 | 1136.52 | 19.35 | 1.4368 | 1118.65 | 48.17 | 109.70 |
2010-01-06 | 30.138541 | 30.770 | 20.80 | 132.25 | 174.26 | 113.71 | 1137.14 | 19.16 | 1.4412 | 1138.50 | 49.34 | 111.51 |
2010-01-07 | 30.082827 | 30.452 | 20.60 | 130.00 | 177.67 | 114.19 | 1141.69 | 19.06 | 1.4318 | 1131.90 | 49.10 | 110.82 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2018-06-25 | 182.170000 | 98.390 | 50.71 | 1663.15 | 221.54 | 271.00 | 2717.07 | 17.33 | 1.1702 | 1265.00 | 22.01 | 119.89 |
2018-06-26 | 184.430000 | 99.080 | 49.67 | 1691.09 | 221.58 | 271.60 | 2723.06 | 15.92 | 1.1645 | 1258.64 | 21.95 | 119.26 |
2018-06-27 | 184.160000 | 97.540 | 48.76 | 1660.51 | 220.18 | 269.35 | 2699.63 | 17.91 | 1.1552 | 1251.62 | 21.81 | 118.58 |
2018-06-28 | 185.500000 | 98.630 | 49.25 | 1701.45 | 223.42 | 270.89 | 2716.31 | 16.85 | 1.1567 | 1247.88 | 21.93 | 118.22 |
2018-06-29 | 185.110000 | 98.610 | 49.71 | 1699.80 | 220.57 | 271.28 | 2718.37 | 16.09 | 1.1683 | 1252.25 | 22.31 | 118.65 |
2216 rows × 12 columns
data.head()
AAPL.O | MSFT.O | INTC.O | AMZN.O | GS.N | SPY | .SPX | .VIX | EUR= | XAU= | GDX | GLD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2010-01-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.4323 | 1096.35 | NaN | NaN |
2010-01-04 | 30.572827 | 30.950 | 20.88 | 133.90 | 173.08 | 113.33 | 1132.99 | 20.04 | 1.4411 | 1120.00 | 47.71 | 109.80 |
2010-01-05 | 30.625684 | 30.960 | 20.87 | 134.69 | 176.14 | 113.63 | 1136.52 | 19.35 | 1.4368 | 1118.65 | 48.17 | 109.70 |
2010-01-06 | 30.138541 | 30.770 | 20.80 | 132.25 | 174.26 | 113.71 | 1137.14 | 19.16 | 1.4412 | 1138.50 | 49.34 | 111.51 |
2010-01-07 | 30.082827 | 30.452 | 20.60 | 130.00 | 177.67 | 114.19 | 1141.69 | 19.06 | 1.4318 | 1131.90 | 49.10 | 110.82 |
data.plot(figsize=(10,12),subplots=True) #横坐标自动按年划分,十分智能化
array([<matplotlib.axes._subplots.AxesSubplot object at 0x0F134390>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0F140690>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EF48AF0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EF65D90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EEB2FF0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EECFCF0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EEDDF50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EF071D0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EF07750>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EF78A70>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EFB7E70>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0EFD5B30>],
dtype=object)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-tmrZRX3H-1581857844288)(output_9_1.png)]
instruments = ['Apple Stock', 'Microsoft Stock',
'Intel Stock', 'Amazon Stock', 'Goldman Sachs Stock',
'SPDR S&P 500 ETF Trust', 'S&P 500 Index',
'VIX Volatility Index', 'EUR/USD Exchange Rate',
'Gold Price', 'VanEck Vectors Gold Miners ETF',
'SPDR Gold Trust']
#将列名与实际名字一一对应,使用zip函数
for ric,name in zip(data.columns,instruments):
print("{:8s}|{}".format(ric,name)) #foramt的正则格式: 8个长度的space
AAPL.O |Apple Stock
MSFT.O |Microsoft Stock
INTC.O |Intel Stock
AMZN.O |Amazon Stock
GS.N |Goldman Sachs Stock
SPY |SPDR S&P 500 ETF Trust
.SPX |S&P 500 Index
.VIX |VIX Volatility Index
EUR= |EUR/USD Exchange Rate
XAU= |Gold Price
GDX |VanEck Vectors Gold Miners ETF
GLD |SPDR Gold Trust
概括型统计量
data.info() #数据大概信息
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2216 entries, 2010-01-01 to 2018-06-29
Data columns (total 12 columns):
AAPL.O 2138 non-null float64
MSFT.O 2138 non-null float64
INTC.O 2138 non-null float64
AMZN.O 2138 non-null float64
GS.N 2138 non-null float64
SPY 2138 non-null float64
.SPX 2138 non-null float64
.VIX 2138 non-null float64
EUR= 2216 non-null float64
XAU= 2211 non-null float64
GDX 2138 non-null float64
GLD 2138 non-null float64
dtypes: float64(12)
memory usage: 225.1 KB
data.describe() #数据的统计信息
AAPL.O | MSFT.O | INTC.O | AMZN.O | GS.N | SPY | .SPX | .VIX | EUR= | XAU= | GDX | GLD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 2138.000000 | 2138.000000 | 2138.000000 | 2138.000000 | 2138.000000 | 2138.000000 | 2138.000000 | 2138.000000 | 2216.000000 | 2211.000000 | 2138.000000 | 2138.00000 |
mean | 93.455973 | 44.561115 | 29.364192 | 480.461251 | 170.216221 | 180.323029 | 1802.713106 | 17.027133 | 1.248587 | 1349.014130 | 33.566525 | 130.08659 |
std | 40.551559 | 19.527712 | 8.172160 | 372.307818 | 42.483935 | 48.190071 | 483.337146 | 5.883428 | 0.113711 | 188.745856 | 15.169564 | 18.78359 |
min | 27.435687 | 23.010000 | 17.665000 | 108.610000 | 87.700000 | 102.200000 | 1022.580000 | 9.140000 | 1.038500 | 1051.360000 | 12.470000 | 100.50000 |
25% | 60.294225 | 28.566875 | 22.510000 | 213.595000 | 146.607500 | 133.987500 | 1338.567500 | 13.072500 | 1.131500 | 1221.525000 | 22.142500 | 117.40250 |
50% | 90.550624 | 39.660000 | 27.330000 | 322.065000 | 164.430000 | 186.320000 | 1863.080000 | 15.585000 | 1.270250 | 1292.610000 | 25.625000 | 124.00500 |
75% | 117.235000 | 54.372500 | 34.710000 | 698.850000 | 192.132500 | 210.987500 | 2108.942500 | 19.067500 | 1.345400 | 1428.240000 | 48.342500 | 139.00250 |
max | 193.980000 | 102.490000 | 57.080000 | 1750.080000 | 273.380000 | 286.580000 | 2872.870000 | 48.000000 | 1.482600 | 1898.990000 | 66.630000 | 184.59000 |
#aggregate,可以缩写为agg 用处比较广泛,参数可以是自定义函数
data.agg([np.min,np.max,np.mean]) #内置的统计函数
AAPL.O | MSFT.O | INTC.O | AMZN.O | GS.N | SPY | .SPX | .VIX | EUR= | XAU= | GDX | GLD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
amin | 27.435687 | 23.010000 | 17.665000 | 108.610000 | 87.700000 | 102.200000 | 1022.580000 | 9.140000 | 1.038500 | 1051.36000 | 12.470000 | 100.50000 |
amax | 193.980000 | 102.490000 | 57.080000 | 1750.080000 | 273.380000 | 286.580000 | 2872.870000 | 48.000000 | 1.482600 | 1898.99000 | 66.630000 | 184.59000 |
mean | 93.455973 | 44.561115 | 29.364192 | 480.461251 | 170.216221 | 180.323029 | 1802.713106 | 17.027133 | 1.248587 | 1349.01413 | 33.566525 | 130.08659 |
#比如:我想自定义一个统计量,如极差
def my_jicha(arr):
return arr.max() - arr.min()
data.agg(my_jicha)
AAPL.O 166.544313
MSFT.O 79.480000
INTC.O 39.415000
AMZN.O 1641.470000
GS.N 185.680000
SPY 184.380000
.SPX 1850.290000
.VIX 38.860000
EUR= 0.444100
XAU= 847.630000
GDX 54.160000
GLD 84.090000
dtype: float64
一段时间内变化
data.diff().mean() #n阶差分,默认值为1
#这个的实际含义:一段时间内,数据与前一数据相比产生的变化,对这个变化求平均;
#狭义理解一下,取绝对值后,数据越大,波动越大;若以股票为例,+代表涨,-代表跌
AAPL.O 0.064737
MSFT.O 0.031246
INTC.O 0.013540
AMZN.O 0.706608
GS.N 0.028224
SPY 0.072103
.SPX 0.732659
.VIX -0.019583
EUR= -0.000119
XAU= 0.041887
GDX -0.015071
GLD -0.003455
dtype: float64
#数值大小往往不具参考意义,我们更喜欢比率的变化
#所以,引入了pct_change 来表示n期的变化比率
bar_data = data.pct_change().mean()
bar_data
AAPL.O 0.000936
MSFT.O 0.000621
INTC.O 0.000499
AMZN.O 0.001332
GS.N 0.000242
SPY 0.000436
.SPX 0.000438
.VIX 0.002888
EUR= -0.000075
XAU= 0.000109
GDX -0.000072
GLD 0.000085
dtype: float64
bar_data.plot(kind="bar") #个人觉得:pd.plot更为方便
<matplotlib.axes._subplots.AxesSubplot at 0x11f83750>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-UmZxnf23-1581857844293)(output_23_1.png)]
#进行取对差分
rets = np.log(data/data.shift(1)) #由log性质可知,这是将数据取对之后,再进行差分
rets.cumsum().apply(np.exp).plot(figsize=(12,10)) #Q:实际意义是什么?
<matplotlib.axes._subplots.AxesSubplot at 0x13337a30>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-EnIhSL1o-1581857844294)(output_26_1.png)]
重抽样
#改变时间间隔,比如:原时间序列时间间隔为日,改变为周;原间隔为周,改变为年
data.head()
AAPL.O | MSFT.O | INTC.O | AMZN.O | GS.N | SPY | .SPX | .VIX | EUR= | XAU= | GDX | GLD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2010-01-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.4323 | 1096.35 | NaN | NaN |
2010-01-04 | 30.572827 | 30.950 | 20.88 | 133.90 | 173.08 | 113.33 | 1132.99 | 20.04 | 1.4411 | 1120.00 | 47.71 | 109.80 |
2010-01-05 | 30.625684 | 30.960 | 20.87 | 134.69 | 176.14 | 113.63 | 1136.52 | 19.35 | 1.4368 | 1118.65 | 48.17 | 109.70 |
2010-01-06 | 30.138541 | 30.770 | 20.80 | 132.25 | 174.26 | 113.71 | 1137.14 | 19.16 | 1.4412 | 1138.50 | 49.34 | 111.51 |
2010-01-07 | 30.082827 | 30.452 | 20.60 | 130.00 | 177.67 | 114.19 | 1141.69 | 19.06 | 1.4318 | 1131.90 | 49.10 | 110.82 |
data.resample("3W",label="right").last() #间隔为3周
AAPL.O | MSFT.O | INTC.O | AMZN.O | GS.N | SPY | .SPX | .VIX | EUR= | XAU= | GDX | GLD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2010-01-03 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.4323 | 1096.35 | NaN | NaN |
2010-01-24 | 28.249972 | 28.9600 | 19.91 | 121.43 | 154.12 | 109.210 | 1091.76 | 27.31 | 1.4137 | 1092.60 | 43.790 | 107.17 |
2010-02-14 | 28.625686 | 27.9300 | 20.43 | 119.66 | 153.93 | 108.040 | 1075.51 | 22.73 | 1.3615 | 1092.40 | 43.940 | 107.04 |
2010-03-07 | 31.278540 | 28.5875 | 20.79 | 128.91 | 167.18 | 114.250 | 1138.70 | 17.42 | 1.3621 | 1131.65 | 46.430 | 110.81 |
2010-03-28 | 32.985681 | 29.6600 | 22.24 | 135.06 | 172.87 | 116.582 | 1166.59 | 17.77 | 1.3410 | 1105.60 | 43.712 | 108.59 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2018-04-15 | 174.730000 | 93.0800 | 51.86 | 1430.79 | 255.92 | 265.150 | 2656.30 | 17.41 | 1.2329 | 1345.01 | 22.890 | 127.45 |
2018-05-06 | 183.830000 | 95.1600 | 52.78 | 1580.95 | 234.94 | 266.020 | 2663.42 | 14.77 | 1.1958 | 1314.65 | 22.650 | 124.54 |
2018-05-27 | 188.580000 | 98.3600 | 55.44 | 1610.15 | 235.01 | 272.150 | 2721.33 | 13.22 | 1.1650 | 1301.17 | 22.310 | 123.21 |
2018-06-17 | 188.840000 | 100.1300 | 55.11 | 1715.97 | 231.92 | 277.130 | 2779.66 | 11.98 | 1.1607 | 1279.00 | 22.230 | 121.34 |
2018-07-08 | 185.110000 | 98.6100 | 49.71 | 1699.80 | 220.57 | 271.280 | 2718.37 | 16.09 | 1.1683 | 1252.25 | 22.310 | 118.65 |
149 rows × 12 columns
data.resample("1m",label="left",closed="left").last() #间隔为1个月
AAPL.O | MSFT.O | INTC.O | AMZN.O | GS.N | SPY | .SPX | .VIX | EUR= | XAU= | GDX | GLD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2009-12-31 | 27.437544 | 28.180 | 19.40 | 125.410 | 148.72 | 107.3900 | 1073.87 | 24.62 | 1.3862 | 1081.05 | 40.72 | 105.960 |
2010-01-31 | 29.231399 | 28.670 | 20.53 | 118.400 | 156.35 | 110.7400 | 1104.49 | 19.50 | 1.3625 | 1116.10 | 43.89 | 109.430 |
2010-02-28 | 33.692109 | 29.770 | 22.34 | 136.579 | 171.38 | 117.4000 | 1173.27 | 17.13 | 1.3411 | 1102.60 | 43.86 | 107.970 |
2010-03-31 | 38.377104 | 31.003 | 23.49 | 141.730 | 160.24 | 120.8575 | 1206.78 | 18.44 | 1.3224 | 1166.10 | 50.31 | 114.280 |
2010-04-30 | 36.697106 | 25.800 | 21.42 | 125.460 | 144.26 | 109.3690 | 1089.41 | 32.07 | 1.2267 | 1213.81 | 49.86 | 118.881 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2018-01-31 | 178.390000 | 94.200 | 49.91 | 1511.980 | 267.93 | 274.4300 | 2744.28 | 18.59 | 1.2231 | 1318.14 | 21.49 | 125.130 |
2018-02-28 | 167.780000 | 91.270 | 52.08 | 1447.340 | 251.86 | 263.1500 | 2640.87 | 19.97 | 1.2321 | 1324.00 | 21.98 | 125.790 |
2018-03-31 | 162.320000 | 95.820 | 52.73 | 1572.620 | 239.80 | 266.5600 | 2669.91 | 15.41 | 1.2128 | 1321.89 | 22.73 | 125.500 |
2018-04-30 | 187.500000 | 98.950 | 55.68 | 1624.890 | 229.16 | 272.6100 | 2724.01 | 14.94 | 1.1661 | 1301.03 | 22.44 | 123.370 |
2018-05-31 | 185.110000 | 98.610 | 49.71 | 1699.800 | 220.57 | 271.2800 | 2718.37 | 16.09 | 1.1683 | 1252.25 | 22.31 | 118.650 |
102 rows × 12 columns
rets.cumsum().apply(np.exp).resample("1m",label="right").last().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1141fb10>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-XiWauNvR-1581857844296)(output_32_1.png)]
三、移动统计值
data = pd.DataFrame(data['AAPL.O']).dropna()
data.head()
AAPL.O | |
---|---|
Date | |
2010-01-04 | 30.572827 |
2010-01-05 | 30.625684 |
2010-01-06 | 30.138541 |
2010-01-07 | 30.082827 |
2010-01-08 | 30.282827 |
综述
window = 20 #20日的移动
data["min"] = data['AAPL.O'].rolling(window=window).min()
data["median"] = data['AAPL.O'].rolling(window=window).median()
data["max"] = data['AAPL.O'].rolling(window=window).max()
data["std"] = data['AAPL.O'].rolling(window=window).std()
data["ewma"] = data['AAPL.O'].ewm(halflife=0.5,min_periods=window).mean() #加权指数平滑,里面的参数不明白!!!!
data
AAPL.O | min | std | ewma | median | max | |
---|---|---|---|---|---|---|
Date | ||||||
2010-01-04 | 30.572827 | NaN | NaN | NaN | NaN | NaN |
2010-01-05 | 30.625684 | NaN | NaN | NaN | NaN | NaN |
2010-01-06 | 30.138541 | NaN | NaN | NaN | NaN | NaN |
2010-01-07 | 30.082827 | NaN | NaN | NaN | NaN | NaN |
2010-01-08 | 30.282827 | NaN | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... |
2018-06-25 | 182.170000 | 182.17 | 3.262549 | 182.907462 | 189.54 | 193.98 |
2018-06-26 | 184.430000 | 182.17 | 3.423931 | 184.049366 | 189.54 | 193.98 |
2018-06-27 | 184.160000 | 182.17 | 3.580486 | 184.132341 | 189.54 | 193.98 |
2018-06-28 | 185.500000 | 182.17 | 3.633386 | 185.158085 | 189.54 | 193.98 |
2018-06-29 | 185.110000 | 182.17 | 3.706497 | 185.122021 | 188.79 | 193.98 |
2138 rows × 6 columns
#选用数据进行作图可视化
%matplotlib notebook
fig1 = plt.figure()
<IPython.core.display.Javascript object>

data[["AAPL.O"]].plot()
<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x14e28e10>
data[["min","max","median"]].plot()
<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x14dd0e50>
一个技术分析的例子
#利用短期MA线 和 长期MA线 改进策略
#当短期MA 向上突破长期线 : 可考虑买入
#当短期MA 向下突破长期线: 可考虑卖出
data["sma1"] = data["AAPL.O"].rolling(42).mean()
data["sma2"] = data["AAPL.O"].rolling(252).mean()
data[["AAPL.O","sma1","sma2"]].plot(style=["b-","g--","r--"])
<IPython.core.display.Javascript object>
<img src="