In [1]:import numpy as np
In [2]:import pandas as pd
In [3]:import matplotlib.pyplot as plt
2. 创建对象
2.1 通过list方式创建(pandas默认自动生成整数索引)
In [4]: s1 = pd.Series([4,3,1,5,np.nan,'a',55])
In [5]: s1
Out[5]:041321354 NaN
5 a
655
dtype:object
In [6]: s2 = pd.Series(range(5,10))
In [7]: s2
Out[7]:0516273849
dtype: int64
In [63]: np.random.seed(1)
In [64]: s2 = pd.Series(np.random.randint(0,7, size=10))
In [65]: s2
Out[65]:05132430415365708091
dtype: int64
3. Series数据操作
3.1 统计
In [66]: s2.value_counts()# 可用于绘制直方图
Out[66]:0352321241
dtype: int64
3.2 Series中str属性(封装了一系列的字符串操作方法)
In [67]: s = pd.Series(['A','B','C','Aaba','Baca', np.nan,'CABA','dog','cat'])
In [68]: s
Out[68]:0 A
1 B
2 C
3 Aaba
4 Baca
5 NaN
6 CABA
7 dog
8 cat
dtype:object
In [69]: s.str.lower()
Out[69]:0 a
1 b
2 c
3 aaba
4 baca
5 NaN
6 caba
7 dog
8 cat
dtype:object
4. 时间序列(索引)
pandas具有简单、强大和高效的时间处理函数,用于在频率转换时执行重采样操作。
In [151]: rng = pd.date_range('1/1/2019', periods=5, freq='D')
In [152]: rng # 年月日时分秒:'Y'、'M'、'D'、'H'、'Min'、'S'
Out[152]:
DatetimeIndex(['2019-01-01','2019-01-02','2019-01-03','2019-01-04','2019-01-05'],
dtype='datetime64[ns]',
freq='D')
In [153]: ts = pd.Series(np.random.randint(0,500,len(rng)),index=rng)
In [154]: ts
Out[154]:2019-01-011842019-01-024432019-01-033992019-01-04242019-01-05137
Freq: D, dtype: int64
In [162]: ts1 = ts.resample('9H').sum()# 频率(9H)转换,执行 重采样 操作
In [163]: ts1
Out[163]:2019-01-0100:00:001842019-01-0109:00:0002019-01-0118:00:004432019-01-0203:00:0002019-01-0212:00:0002019-01-0221:00:003992019-01-0306:00:0002019-01-0315:00:00242019-01-0400:00:0002019-01-0409:00:0002019-01-0418:00:00137
Freq: 9H, dtype: int64
In [157]: tz = ts.tz_localize('UTC')# 时区表示
In [158]: tz.tz_convert('US/Eastern')# 转换成其它时区
Out[158]:2018-12-3119:00:00-05:001842019-01-0119:00:00-05:004432019-01-0219:00:00-05:003992019-01-0319:00:00-05:00242019-01-0419:00:00-05:00137
Freq: D, dtype: int64
In [178]: ts # 时间跨度转换
Out[178]:2019-01-014502019-01-021992019-01-033092019-01-043252019-01-05420
Freq: D, dtype: int64
In [179]: ps = ts.to_period()
In [180]: ps
Out[180]:2019-01-014502019-01-021992019-01-033092019-01-043252019-01-05420
Freq: D, dtype: int64
In [181]: ps.to_timestamp()
Out[181]:2019-01-014502019-01-021992019-01-033092019-01-043252019-01-05420
Freq: D, dtype: int64
In [196]: ts = pd.Series(np.random.randn(10), index=pd.date_range('6/1/2019', periods=10))
In [197]: ts
Out[197]:2019-06-010.9990512019-06-02-0.3081712019-06-030.3658382019-06-041.6075072019-06-05-0.2381772019-06-06-0.3408282019-06-070.4875942019-06-081.7390732019-06-090.0689702019-06-100.473241
Freq: D, dtype: float64
In [198]: ts = ts.cumsum()
In [199]: ts
Out[199]:2019-06-010.9990512019-06-020.6908802019-06-031.0567192019-06-042.6642262019-06-052.4260492019-06-062.0852202019-06-072.5728142019-06-084.3118872019-06-094.3808572019-06-104.854099
Freq: D, dtype: float64
In [200]: ts.plot()
Out[200]:<matplotlib.axes._subplots.AxesSubplot at 0x7f04999e07b8>
In [201]: plt.show()# 绘图如下: