#带有重复索引的时间序列
dates=pd.DatetimeIndex(['1/1/2000','1/2/2000','1/2/2000','1/2/2000','1/3/2000'])
dup_ts=Series(np.arange(5),index=dates)
#检查索引的is_unique属性
dup_ts.index.is_unique
False
grouped=dup_ts.groupby(level=0)
grouped.mean()
Out[88]:
2000-01-01 0
2000-01-02 2
2000-01-03 4
dtype: int32
grouped.count()
Out[89]:
2000-01-01 1
2000-01-02 3
2000-01-03 1
dtype: int64
#日期的范围、频率及移动
ts
Out[90]:
2011-01-02 1.582904
2011-01-05 -1.439438
2011-01-07 -0.411849
2011-01-08 0.747215
2011-01-10 0.384336
2011-01-12 0.432144
dtype: float64
ts.resample('D')
Out[91]: DatetimeIndexResampler [freq=<Day>, axis=0, closed=left, label=left, convention=start, base=0]
生成日期范围,date_range()按天计算时间点,如果只有起始和结束日期,需要在传入一个表示一段时间的数字
index=pd.date_range('4/1/2017','6/28/2017')
index
Out[93]:
DatetimeIndex(['2017-04-01', '2017-04-02', '2017-04-03', '2017-04-04',
'2017-04-05', '2017-04-06', '2017-04-07', '2017-04-08',
'2017-04-09', '2017-04-10', '2017-04-11', '2017-04-12',
'2017-04-13', '2017-04-14', '2017-04-15', '2017-04-16',
'2017-04-17', '2017-04-18', '2017-04-19', '2017-04-20',
'2017-04-21', '2017-04-22', '2017-04-23', '2017-04-24',
'2017-04-25', '2017-04-26', '2017-04-27', '2017-04-28',
'2017-04-29', '2017-04-30', '2017-05-01', '2017-05-02',
'2017-05-03', '2017-05-04', '2017-05-05', '2017-05-06',
'2017-05-07', '2017-05-08', '2017-05-09', '2017-05-10',
'2017-05-11', '2017-05-12', '2017-05-13', '2017-05-14',
'2017-05-15', '2017-05-16', '2017-05-17', '2017-05-18',
'2017-05-19', '2017-05-20', '2017-05-21', '2017-05-22',
'2017-05-23', '2017-05-24', '2017-05-25', '2017-05-26',
'2017-05-27', '2017-05-28', '2017-05-29', '2017-05-30',
'2017-05-31', '2017-06-01', '2017-06-02', '2017-06-03',
'2017-06-04', '2017-06-05', '2017-06-06', '2017-06-07',
'2017-06-08', '2017-06-09', '2017-06-10', '2017-06-11',
'2017-06-12', '2017-06-13', '2017-06-14', '2017-06-15',
'2017-06-16', '2017-06-17', '2017-06-18', '2017-06-19',
'2017-06-20', '2017-06-21', '2017-06-22', '2017-06-23',
'2017-06-24', '2017-06-25', '2017-06-26', '2017-06-27',
'2017-06-28'],
dtype='datetime64[ns]', freq='D')
pd.date_range(start='4/1/2017',periods=20)
Out[94]:
DatetimeIndex(['2017-04-01', '2017-04-02', '2017-04-03', '2017-04-04',
'2017-04-05', '2017-04-06', '2017-04-07', '2017-04-08',
'2017-04-09', '2017-04-10', '2017-04-11', '2017-04-12',
'2017-04-13', '2017-04-14', '2017-04-15', '2017-04-16',
'2017-04-17', '2017-04-18', '2017-04-19', '2017-04-20'],
dtype='datetime64[ns]', freq='D')
pd.date_range(end='6/1/2017',periods=20)
Out[95]:
DatetimeIndex(['2017-05-13', '2017-05-14', '2017-05-15', '2017-05-16',
'2017-05-17', '2017-05-18', '2017-05-19', '2017-05-20',
'2017-05-21', '2017-05-22', '2017-05-23', '2017-05-24',
'2017-05-25', '2017-05-26', '2017-05-27', '2017-05-28',
'2017-05-29', '2017-05-30', '2017-05-31', '2017-06-01'],
dtype='datetime64[ns]', freq='D')
#想要生成一个每月最后一个工作日组成的日期索引,可传入“BM”频率,得到只包含时间间隔符合频率要求的日期
pd.date_range('1/1/2000','12/1/2000',freq='BM')
Out[98]:
DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
'2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
'2000-09-29', '2000-10-31', '2000-11-30'],
dtype='datetime64[ns]', freq='BM')
#虽然起始和结束日期带有时间信息,若产生一组被规范化午夜的时间戳,可用normalize实现
pd.date_range('5/2/2017 12:56:31',periods=5,normalize=True)
Out[100]:
DatetimeIndex(['2017-05-02', '2017-05-03', '2017-05-04', '2017-05-05',
'2017-05-06'],
dtype='datetime64[ns]', freq='D')
频率和日期偏移量
#对于每个基础频率都有一个被称为日期偏移量的对象与之对应
from pandas.tseries.offsets import Hour,Minute
hour = Hour()
#传入一个整数即可定义偏移量的倍数
four_hours=Hour(4)
four_hours
<4 * Hours>
pd.date_range('1/1/2017','1/3/2017',freq='4h')
Out[107]:
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 04:00:00',
'2017-01-01 08:00:00', '2017-01-01 12:00:00',
'2017-01-01 16:00:00', '2017-01-01 20:00:00',
'2017-01-02 00:00:00', '2017-01-02 04:00:00',
'2017-01-02 08:00:00', '2017-01-02 12:00:00',
'2017-01-02 16:00:00', '2017-01-02 20:00:00',
'2017-01-03 00:00:00'],
dtype='datetime64[ns]', freq='4H')
Hour(2)+Minute(30)
Out[108]: <150 * Minutes>
WOM日期:可以获得每月第三个星期五之类的
rng=pd.date_range('1/1/2012','9/1/2012',freq='WOM-3FRI')
list(rng)
Out[111]:
[Timestamp('2012-01-20 00:00:00', freq='WOM-3FRI'),
Timestamp('2012-02-17 00:00:00', freq='WOM-3FRI'),
Timestamp('2012-03-16 00:00:00', freq='WOM-3FRI'),
Timestamp('2012-04-20 00:00:00', freq='WOM-3FRI'),
Timestamp('2012-05-18 00:00:00', freq='WOM-3FRI'),
Timestamp('2012-06-15 00:00:00', freq='WOM-3FRI'),
Timestamp('2012-07-20 00:00:00', freq='WOM-3FRI'),
Timestamp('2012-08-17 00:00:00', freq='WOM-3FRI')]
移动(超前和滞后)的数据
ts=Series(np.random.randn(4),index=pd.date_range('1/1/2000',periods=4,freq='M'))
ts
Out[113]:
2000-01-31 -0.098800
2000-02-29 1.454941
2000-03-31 -0.052731
2000-04-30 -0.522302
Freq: M, dtype: float64
ts.shift(2)
Out[114]:
2000-01-31 NaN
2000-02-29 NaN
2000-03-31 -0.098800
2000-04-30 1.454941
Freq: M, dtype: float64
ts.shift(-2)
Out[115]:
2000-01-31 -0.052731
2000-02-29 -0.522302
2000-03-31 NaN
2000-04-30 NaN
Freq: M, dtype: float64
ts/ts.shift(1)-1
Out[117]:
2000-01-31 NaN
2000-02-29 -15.726113
2000-03-31 -1.036243
2000-04-30 8.904943
Freq: M, dtype: float64
ts.shift(2,freq='M')
Out[118]:
2000-03-31 -0.098800
2000-04-30 1.454941
2000-05-31 -0.052731
2000-06-30 -0.522302
Freq: M, dtype: float64
ts.shift(3,freq='D')
Out[119]:
2000-02-03 -0.098800
2000-03-03 1.454941
2000-04-03 -0.052731
2000-05-03 -0.522302
dtype: float64
ts.shift(1,freq='3D')
Out[120]:
2000-02-03 -0.098800
2000-03-03 1.454941
2000-04-03 -0.052731
2000-05-03 -0.522302
dtype: float64
通过偏移量对日期进行位移
from pandas.tseries.offsets import Day,MonthEnd
now=datetime(2011,11,17)
now+3*Day()
Out[123]: Timestamp('2011-11-20 00:00:00')
now+MonthEnd()
Out[124]: Timestamp('2011-11-30 00:00:00')
now+MonthEnd(2)
Out[125]: Timestamp('2011-12-31 00:00:00')
offset=MonthEnd()
offset.rollforward(now)
Out[127]: Timestamp('2011-11-30 00:00:00')
offset.rollback(now)
Out[128]: Timestamp('2011-10-31 00:00:00')
ts=Series(np.random.randn(20),index=pd.date_range('1/15/2000',periods=20,freq='4d'))
ts.groupby(offset.rollforward).mean()
Out[130]:
2000-01-31 -0.081986
2000-02-29 -0.045495
2000-03-31 0.100962
dtype: float64
ts.resample('M',how='mean')
Out[131]:
2000-01-31 -0.081986
2000-02-29 -0.045495
2000-03-31 0.100962
Freq: M, dtype: float64