时间序列
pandas在对频率转换进行重新采样时拥有着简单,强大而且高效的功能(例如把按秒采样的数据转换为按5分钟采样的数据)。这在金融领域很常见,但又不限于此。
rng = pd.date_range('1/1/2012', periods=50, freq='S')
rng
'''
DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
'2012-01-01 00:00:02', '2012-01-01 00:00:03',
'2012-01-01 00:00:04', '2012-01-01 00:00:05',
'2012-01-01 00:00:06', '2012-01-01 00:00:07',
'2012-01-01 00:00:08', '2012-01-01 00:00:09',
'2012-01-01 00:00:10', '2012-01-01 00:00:11',
'2012-01-01 00:00:12', '2012-01-01 00:00:13',
'2012-01-01 00:00:14', '2012-01-01 00:00:15',
'2012-01-01 00:00:16', '2012-01-01 00:00:17',
'2012-01-01 00:00:18', '2012-01-01 00:00:19',
'2012-01-01 00:00:20', '2012-01-01 00:00:21',
'2012-01-01 00:00:22', '2012-01-01 00:00:23',
'2012-01-01 00:00:24', '2012-01-01 00:00:25',
'2012-01-01 00:00:26', '2012-01-01 00:00:27',
'2012-01-01 00:00:28', '2012-01-01 00:00:29',
'2012-01-01 00:00:30', '2012-01-01 00:00:31',
'2012-01-01 00:00:32', '2012-01-01 00:00:33',
'2012-01-01 00:00:34', '2012-01-01 00:00:35',
'2012-01-01 00:00:36', '2012-01-01 00:00:37',
'2012-01-01 00:00:38', '2012-01-01 00:00:39',
'2012-01-01 00:00:40', '2012-01-01 00:00:41',
'2012-01-01 00:00:42', '2012-01-01 00:00:43',
'2012-01-01 00:00:44', '2012-01-01 00:00:45',
'2012-01-01 00:00:46', '2012-01-01 00:00:47',
'2012-01-01 00:00:48', '2012-01-01 00:00:49'],
dtype='datetime64[ns]', freq='S')
'''
ts = pd.Series(np.random.randint(0,500,len(rng)), index=rng)
ts
'''
2012-01-01 00:00:00 72
2012-01-01 00:00:01 233
2012-01-01 00:00:02 243
2012-01-01 00:00:03 146
2012-01-01 00:00:04 136
2012-01-01 00:00:05 494
2012-01-01 00:00:06 352
2012-01-01 00:00:07 309
2012-01-01 00:00:08 414
2012-01-01 00:00:09 279
2012-01-01 00:00:10 249
2012-01-01 00:00:11 197
2012-01-01 00:00:12 321
2012-01-01 00:00:13 177
2012-01-01 00:00:14 71
2012-01-01 00:00:15 126
2012-01-01 00:00:16 18
2012-01-01 00:00:17 290
2012-01-01 00:00:18 292
2012-01-01 00:00:19 365
2012-01-01 00:00:20 23
2012-01-01 00:00:21 297
2012-01-01 00:00:22 186
2012-01-01 00:00:23 18
2012-01-01 00:00:24 145
2012-01-01 00:00:25 221
2012-01-01 00:00:26 325
2012-01-01 00:00:27 346
2012-01-01 00:00:28 490
2012-01-01 00:00:29 439
2012-01-01 00:00:30 170
2012-01-01 00:00:31 198
2012-01-01 00:00:32 19
2012-01-01 00:00:33 461
2012-01-01 00:00:34 200
2012-01-01 00:00:35 381
2012-01-01 00:00:36 382
2012-01-01 00:00:37 417
2012-01-01 00:00:38 152
2012-01-01 00:00:39 270
2012-01-01 00:00:40 492
2012-01-01 00:00:41 163
2012-01-01 00:00:42 303
2012-01-01 00:00:43 200
2012-01-01 00:00:44 317
2012-01-01 00:00:45 465
2012-01-01 00:00:46 61
2012-01-01 00:00:47 214
2012-01-01 00:00:48 39
2012-01-01 00:00:49 28
Freq: S, dtype: int32
'''
ts.resample('5Min', how='sum')
'''
2012-01-01 12454
Freq: 5T, dtype: int32
'''
时区表示
rng = pd.date_range('3/6/2012', periods=5, freq='D')
rng
'''
DatetimeIndex(['2012-03-06', '2012-03-07', '2012-03-08', '2012-03-09',
'2012-03-10'],
dtype='datetime64[ns]', freq='D')
'''
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
'''
2012-03-06 0.512563
2012-03-07 -1.226835
2012-03-08 -0.432962
2012-03-09 -0.332179
2012-03-10 -0.541199
Freq: D, dtype: float64
'''
ts_utc = ts.tz_localize('UTC')
ts_utc
'''
2012-03-06 00:00:00+00:00 0.512563
2012-03-07 00:00:00+00:00 -1.226835
2012-03-08 00:00:00+00:00 -0.432962
2012-03-09 00:00:00+00:00 -0.332179
2012-03-10 00:00:00+00:00 -0.541199
Freq: D, dtype: float64
'''
时区转换
ts_utc.tz_convert('US/Eastern')
'''
2012-03-05 19:00:00-05:00 0.512563
2012-03-06 19:00:00-05:00 -1.226835
2012-03-07 19:00:00-05:00 -0.432962
2012-03-08 19:00:00-05:00 -0.332179
2012-03-09 19:00:00-05:00 -0.541199
Freq: D, dtype: float64
'''
时间跨度转换
rng = pd.date_range('1/1/2012', periods=5, freq='M')
rng
'''
DatetimeIndex(['2012-01-31', '2012-02-29', '2012-03-31', '2012-04-30',
'2012-05-31'],
dtype='datetime64[ns]', freq='M')
'''
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
'''
2012-01-31 1.246855
2012-02-29 0.623765
2012-03-31 -2.522536
2012-04-30 0.092127
2012-05-31 0.379715
Freq: M, dtype: float64
'''
ps = ts.to_period()
ps
'''
2012-01-31 1.246855
2012-02-29 0.623765
2012-03-31 -2.522536
2012-04-30 0.092127
2012-05-31 0.379715
Freq: M, dtype: float64
'''
ps.to_timestamp()
'''
2012-01-01 1.246855
2012-02-01 0.623765
2012-03-01 -2.522536
2012-04-01 0.092127
2012-05-01 0.379715
Freq: MS, dtype: float64
'''
日期与时间戳之间的转换使得可以使用一些方便的算术函数。例如,我们把以11月为年底的季度数据转换为当前季度末月底为始的数据。
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
prng
'''
PeriodIndex(['1990Q1', '1990Q2', '1990Q3', '1990Q4', '1991Q1', '1991Q2',
'1991Q3', '1991Q4', '1992Q1', '1992Q2', '1992Q3', '1992Q4',
'1993Q1', '1993Q2', '1993Q3', '1993Q4', '1994Q1', '1994Q2',
'1994Q3', '1994Q4', '1995Q1', '1995Q2', '1995Q3', '1995Q4',
'1996Q1', '1996Q2', '1996Q3', '1996Q4', '1997Q1', '1997Q2',
'1997Q3', '1997Q4', '1998Q1', '1998Q2', '1998Q3', '1998Q4',
'1999Q1', '1999Q2', '1999Q3', '1999Q4', '2000Q1', '2000Q2',
'2000Q3', '2000Q4'],
dtype='period[Q-NOV]', freq='Q-NOV')
'''
ts = pd.Series(np.random.randn(len(prng)), index = prng)
ts
'''
1990Q1 1.272780
1990Q2 0.081477
1990Q3 0.857023
1990Q4 -0.731177
1991Q1 -2.023644
1991Q2 0.990468
1991Q3 -1.103367
1991Q4 0.700913
1992Q1 0.298691
1992Q2 -2.032672
1992Q3 0.059544
1992Q4 1.923286
1993Q1 0.108547
1993Q2 -0.587671
1993Q3 0.055746
1993Q4 0.391453
1994Q1 -0.349526
1994Q2 -1.791296
1994Q3 -0.103650
1994Q4 -0.072721
1995Q1 0.083551
1995Q2 -0.643590
1995Q3 -0.590049
1995Q4 1.097337
1996Q1 1.130710
1996Q2 0.022466
1996Q3 -0.237904
1996Q4 -1.132273
1997Q1 -1.045858
1997Q2 -0.817864
1997Q3 -0.220546
1997Q4 0.927126
1998Q1 -1.268038
1998Q2 -0.408925
1998Q3 -0.362332
1998Q4 -1.095481
1999Q1 0.502225
1999Q2 -2.800133
1999Q3 -0.274502
1999Q4 0.021417
2000Q1 -0.555724
2000Q2 -1.321482
2000Q3 1.043381
2000Q4 0.146324
Freq: Q-NOV, dtype: float64
'''
ts.index = (prng.asfreq('M', 'end') ) .asfreq('H', 'start') +9
ts
'''
1990-02-01 09:00 1.272780
1990-05-01 09:00 0.081477
1990-08-01 09:00 0.857023
1990-11-01 09:00 -0.731177
1991-02-01 09:00 -2.023644
1991-05-01 09:00 0.990468
1991-08-01 09:00 -1.103367
1991-11-01 09:00 0.700913
1992-02-01 09:00 0.298691
1992-05-01 09:00 -2.032672
1992-08-01 09:00 0.059544
1992-11-01 09:00 1.923286
1993-02-01 09:00 0.108547
1993-05-01 09:00 -0.587671
1993-08-01 09:00 0.055746
1993-11-01 09:00 0.391453
1994-02-01 09:00 -0.349526
1994-05-01 09:00 -1.791296
1994-08-01 09:00 -0.103650
1994-11-01 09:00 -0.072721
1995-02-01 09:00 0.083551
1995-05-01 09:00 -0.643590
1995-08-01 09:00 -0.590049
1995-11-01 09:00 1.097337
1996-02-01 09:00 1.130710
1996-05-01 09:00 0.022466
1996-08-01 09:00 -0.237904
1996-11-01 09:00 -1.132273
1997-02-01 09:00 -1.045858
1997-05-01 09:00 -0.817864
1997-08-01 09:00 -0.220546
1997-11-01 09:00 0.927126
1998-02-01 09:00 -1.268038
1998-05-01 09:00 -0.408925
1998-08-01 09:00 -0.362332
1998-11-01 09:00 -1.095481
1999-02-01 09:00 0.502225
1999-05-01 09:00 -2.800133
1999-08-01 09:00 -0.274502
1999-11-01 09:00 0.021417
2000-02-01 09:00 -0.555724
2000-05-01 09:00 -1.321482
2000-08-01 09:00 1.043381
2000-11-01 09:00 0.146324
Freq: H, dtype: float64
'''