In [1]: import pandas as pd
In [2]: import numpy as np
In [3]: s = pd.Series([1,3,5,np.Na])
In [4]: dates = pd.date_range("20160301", periods=6)
In [5]: dates
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06'],
dtype='datetime64[ns]', freq='D')
In [6]: data = pd.DataFrame(np.random.randn(6,4), index=dates,columns=list("ABCD"))
In [7]: data
Out[7]:
A B C D
2016-03-01 -0.222083 -0.963381 -0.172100 0.690578
2016-03-02 0.010768 -0.149352 -0.408257 1.341249
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
2016-03-05 0.850521 -0.302853 -0.978198 -0.991488
2016-03-06 1.505182 -0.087285 1.641413 2.316626
In [8]: data.shape
Out[8]: (6, 4)
In [9]: data.values
Out[9]:
array([[-0.22208318, -0.96338087, -0.17209986, 0.69057764],
[ 0.01076758, -0.14935166, -0.40825718, 1.34124888],
[ 1.80867181, -0.23454168, -0.56645639, 0.58882996],
[-0.31567662, -1.33944465, 1.15407395, 0.41100917],
[ 0.85052137, -0.30285326, -0.97819807, -0.99148776],
[ 1.50518199, -0.08728479, 1.64141316, 2.31662617]])
In [11]: data.index
Out[11]:
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06'],
dtype='datetime64[ns]', freq='D')
In [12]: data.columns
Out[12]: Index(['A', 'B', 'C', 'D'], dtype='object')
In [15]: data.describe()
Out[15]:
A B C D
count 6.000000 6.000000 6.000000 6.000000
mean 0.606230 -0.512809 0.111746 0.726134
std 0.916979 0.514009 1.041656 1.093405
min -0.315677 -1.339445 -0.978198 -0.991488
25% -0.163870 -0.798249 -0.526907 0.455464
50% 0.430644 -0.268697 -0.290179 0.639704
75% 1.341517 -0.170649 0.822531 1.178581
max 1.808672 -0.087285 1.641413 2.316626
In [17]: data.T
Out[17]:
2016-03-01 2016-03-02 2016-03-03 2016-03-04 2016-03-05 2016-03-06
A -0.222083 0.010768 1.808672 -0.315677 0.850521 1.505182
B -0.963381 -0.149352 -0.234542 -1.339445 -0.302853 -0.087285
C -0.172100 -0.408257 -0.566456 1.154074 -0.978198 1.641413
D 0.690578 1.341249 0.588830 0.411009 -0.991488 2.316626
In [18]: data
Out[18]:
A B C D
2016-03-01 -0.222083 -0.963381 -0.172100 0.690578
2016-03-02 0.010768 -0.149352 -0.408257 1.341249
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
2016-03-05 0.850521 -0.302853 -0.978198 -0.991488
2016-03-06 1.505182 -0.087285 1.641413 2.316626
In [19]: data.sort_index(axis=1)
Out[19]:
A B C D
2016-03-01 -0.222083 -0.963381 -0.172100 0.690578
2016-03-02 0.010768 -0.149352 -0.408257 1.341249
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
2016-03-05 0.850521 -0.302853 -0.978198 -0.991488
2016-03-06 1.505182 -0.087285 1.641413 2.316626
In [21]: data.sort_index(axis=1,ascending=False)
Out[21]:
D C B A
2016-03-01 0.690578 -0.172100 -0.963381 -0.222083
2016-03-02 1.341249 -0.408257 -0.149352 0.010768
2016-03-03 0.588830 -0.566456 -0.234542 1.808672
2016-03-04 0.411009 1.154074 -1.339445 -0.315677
2016-03-05 -0.991488 -0.978198 -0.302853 0.850521
2016-03-06 2.316626 1.641413 -0.087285 1.505182
In [22]: data.sort_values(by="A")
Out[22]:
A B C D
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
2016-03-01 -0.222083 -0.963381 -0.172100 0.690578
2016-03-02 0.010768 -0.149352 -0.408257 1.341249
2016-03-05 0.850521 -0.302853 -0.978198 -0.991488
2016-03-06 1.505182 -0.087285 1.641413 2.316626
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
In [23]: data["A"]
Out[23]:
2016-03-01 -0.222083
2016-03-02 0.010768
2016-03-03 1.808672
2016-03-04 -0.315677
2016-03-05 0.850521
2016-03-06 1.505182
In [24]: data[2:4]
Out[24]:
A B C D
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
In [25]: data.loc["2016-03-01":"2016-03-04"]
Out[25]:
A B C D
2016-03-01 -0.222083 -0.963381 -0.172100 0.690578
2016-03-02 0.010768 -0.149352 -0.408257 1.341249
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
In [26]: data.iloc[2:4]
Out[26]:
A B C D
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
In [27]: data.loc[:,["B","D"]]
Out[27]:
B D
2016-03-01 -0.963381 0.690578
2016-03-02 -0.149352 1.341249
2016-03-03 -0.234542 0.588830
2016-03-04 -1.339445 0.411009
2016-03-05 -0.302853 -0.991488
2016-03-06 -0.087285 2.316626
In [28]: data.loc["2016-03-02":"2016-03-04",["B","D"]]
Out[28]:
B D
2016-03-02 -0.149352 1.341249
2016-03-03 -0.234542 0.588830
2016-03-04 -1.339445 0.411009
In [30]: data.at[pd.Timestamp("2016-03-02"),"B"]
Out[30]: -0.14935166221886448
In [31]: data.iloc[1:3,2:4]
Out[31]:
C D
2016-03-02 -0.408257 1.341249
2016-03-03 -0.566456 0.588830
In [32]: data.iloc[1,1]
Out[32]: -0.14935166221886448
In [33]: data.iat[1,1]
Out[33]: -0.14935166221886448
In [34]: data
Out[34]:
A B C D
2016-03-01 -0.222083 -0.963381 -0.172100 0.690578
2016-03-02 0.010768 -0.149352 -0.408257 1.341249
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
2016-03-05 0.850521 -0.302853 -0.978198 -0.991488
2016-03-06 1.505182 -0.087285 1.641413 2.316626
In [35]: data.iat[0,0]=100
In [36]: data
Out[36]:
A B C D
2016-03-01 100.000000 -0.963381 -0.172100 0.690578
2016-03-02 0.010768 -0.149352 -0.408257 1.341249
2016-03-03 1.808672 -0.234542 -0.566456 0.588830
2016-03-04 -0.315677 -1.339445 1.154074 0.411009
2016-03-05 0.850521 -0.302853 -0.978198 -0.991488
2016-03-06 1.505182 -0.087285 1.641413 2.316626
In [37]: data.A=range(6)
In [38]: data
Out[38]:
A B C D
2016-03-01 0 -0.963381 -0.172100 0.690578
2016-03-02 1 -0.149352 -0.408257 1.341249
2016-03-03 2 -0.234542 -0.566456 0.588830
2016-03-04 3 -1.339445 1.154074 0.411009
2016-03-05 4 -0.302853 -0.978198 -0.991488
2016-03-06 5 -0.087285 1.641413 2.316626
In [39]: data.B=200
In [40]: data
Out[40]:
A B C D
2016-03-01 0 200 -0.172100 0.690578
2016-03-02 1 200 -0.408257 1.341249
2016-03-03 2 200 -0.566456 0.588830
2016-03-04 3 200 1.154074 0.411009
2016-03-05 4 200 -0.978198 -0.991488
2016-03-06 5 200 1.641413 2.316626