10 Minutes to pandas
请参阅官方文档
In [1]:
<span class="c1"># 设置为 inline 风格</span> <span class="o">%</span><span class="k">matplotlib</span> inline
|
1
2
|
<
span
class
=
"c1"
>
# 设置为 inline 风格</span>
<
span
class
=
"o"
>
%
<
/
span
>
<
span
class
=
"k"
>
matplotlib
<
/
span
>
inline
|
In [2]:
<span class="c1"># 包导入</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span> <span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
|
1
2
3
4
|
<
span
class
=
"c1"
>
# 包导入</span>
<
span
class
=
"kn"
>
import
<
/
span
>
<
span
class
=
"nn"
>
pandas
<
/
span
>
<
span
class
=
"kn"
>
as
<
/
span
>
<
span
class
=
"nn"
>
pd
<
/
span
>
<
span
class
=
"kn"
>
import
<
/
span
>
<
span
class
=
"nn"
>
numpy
<
/
span
>
<
span
class
=
"kn"
>
as
<
/
span
>
<
span
class
=
"nn"
>
np
<
/
span
>
<
span
class
=
"kn"
>
import
<
/
span
>
<
span
class
=
"nn"
>
matplotlib
.
pyplot
<
/
span
>
<
span
class
=
"kn"
>
as
<
/
span
>
<
span
class
=
"nn"
>
plt
<
/
span
>
|
创建数据集对象
In [3]:
<span class="c1"># Series 对象可以理解为一维数组</span> <span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">8</span><span class="p">])</span> <span class="n">s</span>
|
1
2
3
|
<
span
class
=
"c1"
>
# Series 对象可以理解为一维数组</span>
<
span
class
=
"n"
>
s
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
Series
<
/
span
>
<
span
class
=
"p"
>
(
[
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
3
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
5
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"n"
>
np
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
nan
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
6
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
8
<
/
span
>
<
span
class
=
"p"
>
]
)
<
/
span
>
<
span
class
=
"n"
>
s
<
/
span
>
|
Out[3]:
0 1 1 3 2 5 3 NaN 4 6 5 8 dtype: float64
|
1
2
3
4
5
6
7
|
0
1
1
3
2
5
3
NaN
4
6
5
8
dtype
:
float64
|
In [4]:
<span class="c1"># DataFrame 对象可以理解为二维数组,可以指定索引格式</span> <span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">'20160301'</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">6</span><span class="p">)</span> <span class="c1"># periods:integer或None,默认值是None,表示你要从这个函数产生多少个日期索引值;如果是None的话,那么start和end必须不能为None。</span> <span class="n">dates</span>
|
1
2
3
4
|
<
span
class
=
"c1"
>
# DataFrame 对象可以理解为二维数组,可以指定索引格式</span>
<
span
class
=
"n"
>
dates
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
date_range
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"n"
>
periods
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"mi"
>
6
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
<
span
class
=
"c1"
>
# periods:integer或None,默认值是None,表示你要从这个函数产生多少个日期索引值;如果是None的话,那么start和end必须不能为None。</span>
<
span
class
=
"n"
>
dates
<
/
span
>
|
Out[4]:
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04', '2016-03-05', '2016-03-06'], dtype='datetime64[ns]', freq='D')
|
1
2
3
|
DatetimeIndex
(
[
'2016-03-01'
,
'2016-03-02'
,
'2016-03-03'
,
'2016-03-04'
,
'2016-03-05'
,
'2016-03-06'
]
,
dtype
=
'datetime64[ns]'
,
freq
=
'D'
)
|
In [5]:
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">6</span><span class="p">,</span><span class="mi">4</span><span class="p">),</span> <span class="n">index</span><span class="o">=</span><span class="n">dates</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s1">'ABCD'</span><span class="p">))</span> <span class="n">df</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
DataFrame
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
np
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
random
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
randn
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"mi"
>
6
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
4
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"n"
>
index
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
dates
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"n"
>
columns
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"nb"
>
list
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'ABCD'
<
/
span
>
<
span
class
=
"p"
>
)
)
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[5]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-01 | 1.188983 | -1.150119 | -0.700588 | 0.439065 |
| 2016-03-02 | -2.041544 | 1.084507 | -0.335441 | 1.969754 |
| 2016-03-03 | 1.204151 | -1.277714 | -0.230671 | 0.629063 |
| 2016-03-04 | -0.352351 | -1.701585 | -0.034294 | -0.330139 |
| 2016-03-05 | 0.627601 | -0.292939 | 0.457975 | 2.262402 |
| 2016-03-06 | -1.121869 | -0.533223 | 0.627452 | 0.412665 |
In [6]:
<span class="n">df</span><span class="o">.</span><span class="n">values</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
values
<
/
span
>
|
Out[6]:
array([[ 1.18898298, -1.15011854, -0.70058776, 0.43906549], [-2.04154443, 1.08450747, -0.33544069, 1.96975377], [ 1.2041512 , -1.27771421, -0.23067059, 0.62906316], [-0.35235094, -1.70158492, -0.03429361, -0.33013878], [ 0.62760104, -0.29293918, 0.45797463, 2.26240237], [-1.12186945, -0.53322343, 0.6274522 , 0.41266481]])
|
1
2
3
4
5
6
|
array
(
[
[
1.18898298
,
-
1.15011854
,
-
0.70058776
,
0.43906549
]
,
[
-
2.04154443
,
1.08450747
,
-
0.33544069
,
1.96975377
]
,
[
1.2041512
,
-
1.27771421
,
-
0.23067059
,
0.62906316
]
,
[
-
0.35235094
,
-
1.70158492
,
-
0.03429361
,
-
0.33013878
]
,
[
0.62760104
,
-
0.29293918
,
0.45797463
,
2.26240237
]
,
[
-
1.12186945
,
-
0.53322343
,
0.6274522
,
0.41266481
]
]
)
|
In [7]:
<span class="c1"># 使用字典来创建:key 为 DataFrame 的列;value 为对应列下的值</span> <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span> <span class="s1">'A'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">'20160301'</span><span class="p">),</span> <span class="s1">'C'</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">),</span> <span class="s1">'D'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">9</span><span class="p">),</span> <span class="s1">'E'</span><span class="p">:</span> <span class="s1">'text'</span><span class="p">,</span> <span class="s1">'F'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'AA'</span><span class="p">,</span> <span class="s1">'BB'</span><span class="p">,</span> <span class="s1">'CC'</span><span class="p">,</span> <span class="s1">'DD'</span><span class="p">]})</span> <span class="n">df</span>
|
1
2
3
4
5
6
7
8
9
|
<
span
class
=
"c1"
>
# 使用字典来创建:key 为 DataFrame 的列;value 为对应列下的值</span>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
DataFrame
<
/
span
>
<
span
class
=
"p"
>
(
{
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'B'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
Timestamp
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"s1"
>
'C'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"nb"
>
range
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"mi"
>
4
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"s1"
>
'D'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"n"
>
np
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
arange
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"mi"
>
5
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
9
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"s1"
>
'E'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"s1"
>
'text'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'F'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'AA'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'BB'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'CC'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'DD'
<
/
span
>
<
span
class
=
"p"
>
]
}
)
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[7]:
| A | B | C | D | E | F | |
|---|---|---|---|---|---|---|
| 0 | 1 | 2016-03-01 | 0 | 5 | text | AA |
| 1 | 1 | 2016-03-01 | 1 | 6 | text | BB |
| 2 | 1 | 2016-03-01 | 2 | 7 | text | CC |
| 3 | 1 | 2016-03-01 | 3 | 8 | text | DD |
In [8]:
<span class="n">df</span><span class="o">.</span><span class="n">dtypes</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
dtypes
<
/
span
>
|
Out[8]:
A int64 B datetime64[ns] C int64 D int64 E object F object dtype: object
|
1
2
3
4
5
6
7
|
A
int64
B
datetime64
[
ns
]
C
int64
D
int64
E
object
F
object
dtype
:
object
|
In [9]:
<span class="n">df</span><span class="o">.</span><span class="n">A</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
A
<
/
span
>
|
Out[9]:
0 1 1 1 2 1 3 1 Name: A, dtype: int64
|
1
2
3
4
5
|
0
1
1
1
2
1
3
1
Name
:
A
,
dtype
:
int64
|
In [10]:
<span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">A</span><span class="p">)</span>
|
1
|
<
span
class
=
"nb"
>
type
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
A
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
|
Out[10]:
pandas.core.series.Series
|
1
|
pandas
.
core
.
series
.
Series
|
查看数据
In [11]:
<span class="c1"># 创建数据集</span> <span class="n">n_rows</span> <span class="o">=</span> <span class="mi">6</span> <span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">'20160301'</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="n">n_rows</span><span class="p">)</span> <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">n_rows</span><span class="p">,</span> <span class="mi">4</span><span class="p">),</span> <span class="n">index</span><span class="o">=</span><span class="n">dates</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s1">'ABCD'</span><span class="p">))</span> <span class="n">df</span>
|
1
2
3
4
5
|
<
span
class
=
"c1"
>
# 创建数据集</span>
<
span
class
=
"n"
>
n_rows
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"mi"
>
6
<
/
span
>
<
span
class
=
"n"
>
dates
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
date_range
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"n"
>
periods
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
n_rows
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
DataFrame
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
np
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
random
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
randn
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
n_rows
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
4
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"n"
>
index
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
dates
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"n"
>
columns
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"nb"
>
list
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'ABCD'
<
/
span
>
<
span
class
=
"p"
>
)
)
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[11]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 |
In [12]:
<span class="n">df</span><span class="o">.</span><span class="n">shape</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
shape
<
/
span
>
|
Out[12]:
(6, 4)
|
1
|
(
6
,
4
)
|
In [13]:
<span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
head
<
/
span
>
<
span
class
=
"p"
>
(
)
<
/
span
>
|
Out[13]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
In [14]:
<span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
head
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"mi"
>
3
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
|
Out[14]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
In [15]:
<span class="n">df</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
tail
<
/
span
>
<
span
class
=
"p"
>
(
)
<
/
span
>
|
Out[15]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 |
In [16]:
<span class="n">df</span><span class="o">.</span><span class="n">tail</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
tail
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"mi"
>
2
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
|
Out[16]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 |
In [17]:
<span class="n">df</span><span class="o">.</span><span class="n">index</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
index
<
/
span
>
|
Out[17]:
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04', '2016-03-05', '2016-03-06'], dtype='datetime64[ns]', freq='D')
|
1
2
3
|
DatetimeIndex
(
[
'2016-03-01'
,
'2016-03-02'
,
'2016-03-03'
,
'2016-03-04'
,
'2016-03-05'
,
'2016-03-06'
]
,
dtype
=
'datetime64[ns]'
,
freq
=
'D'
)
|
In [18]:
<span class="n">df</span><span class="o">.</span><span class="n">columns</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
columns
<
/
span
>
|
Out[18]:
Index([u'A', u'B', u'C', u'D'], dtype='object')
|
1
|
Index
(
[
u
'A'
,
u
'B'
,
u
'C'
,
u
'D'
]
,
dtype
=
'object'
)
|
In [19]:
<span class="n">df</span><span class="o">.</span><span class="n">values</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
values
<
/
span
>
|
Out[19]:
array([[ 1.31341924, 0.82645709, -1.57414606, 0.52500758], [ 0.02839742, -1.00934929, 0.32701362, 0.91824786], [-0.85700833, -1.68269525, 0.646229 , -0.18337746], [-1.11288513, -1.49166212, -1.11482404, -0.11561882], [-0.44871305, -0.16365107, -1.23029491, 1.10665563], [-0.26786722, 0.09231292, -0.48023763, -0.80992272]])
|
1
2
3
4
5
6
|
array
(
[
[
1.31341924
,
0.82645709
,
-
1.57414606
,
0.52500758
]
,
[
0.02839742
,
-
1.00934929
,
0.32701362
,
0.91824786
]
,
[
-
0.85700833
,
-
1.68269525
,
0.646229
,
-
0.18337746
]
,
[
-
1.11288513
,
-
1.49166212
,
-
1.11482404
,
-
0.11561882
]
,
[
-
0.44871305
,
-
0.16365107
,
-
1.23029491
,
1.10665563
]
,
[
-
0.26786722
,
0.09231292
,
-
0.48023763
,
-
0.80992272
]
]
)
|
In [20]:
<span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
describe
<
/
span
>
<
span
class
=
"p"
>
(
)
<
/
span
>
|
Out[20]:
| A | B | C | D | |
|---|---|---|---|---|
| count | 6.000000 | 6.000000 | 6.000000 | 6.000000 |
| mean | -0.224110 | -0.571431 | -0.571043 | 0.240165 |
| std | 0.856808 | 0.983304 | 0.898112 | 0.734900 |
| min | -1.112885 | -1.682695 | -1.574146 | -0.809923 |
| 25% | -0.754935 | -1.371084 | -1.201427 | -0.166438 |
| 50% | -0.358290 | -0.586500 | -0.797531 | 0.204694 |
| 75% | -0.045669 | 0.028322 | 0.125201 | 0.819938 |
| max | 1.313419 | 0.826457 | 0.646229 | 1.106656 |
In [21]:
<span class="n">df</span><span class="o">.</span><span class="n">T</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
T
<
/
span
>
|
Out[21]:
| 2016-03-01 00:00:00 | 2016-03-02 00:00:00 | 2016-03-03 00:00:00 | 2016-03-04 00:00:00 | 2016-03-05 00:00:00 | 2016-03-06 00:00:00 | |
|---|---|---|---|---|---|---|
| A | 1.313419 | 0.028397 | -0.857008 | -1.112885 | -0.448713 | -0.267867 |
| B | 0.826457 | -1.009349 | -1.682695 | -1.491662 | -0.163651 | 0.092313 |
| C | -1.574146 | 0.327014 | 0.646229 | -1.114824 | -1.230295 | -0.480238 |
| D | 0.525008 | 0.918248 | -0.183377 | -0.115619 | 1.106656 | -0.809923 |
In [22]:
<span class="n">df</span><span class="o">.</span><span class="n">T</span><span class="o">.</span><span class="n">shape</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
T
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
shape
<
/
span
>
|
Out[22]:
(4, 6)
|
1
|
(
4
,
6
)
|
In [23]:
<span class="n">df</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span> <span class="c1"># sort_index()按照索引排序</span> <span class="c1">#df.sort_index() #按照rowID进行排序,默认升序</span> <span class="c1">#df.sort_index(axis=1,ascending=False) #按照columnID进行排序,设定为降序</span>
|
1
2
3
4
5
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
sort_index
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
axis
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"n"
>
ascending
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"bp"
>
False
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
<
span
class
=
"c1"
>
# sort_index()按照索引排序</span>
<
span
class
=
"c1"
>
#df.sort_index() #按照rowID进行排序,默认升序</span>
<
span
class
=
"c1"
>
#df.sort_index(axis=1,ascending=False) #按照columnID进行排序,设定为降序</span>
|
Out[23]:
| D | C | B | A | |
|---|---|---|---|---|
| 2016-03-01 | 0.525008 | -1.574146 | 0.826457 | 1.313419 |
| 2016-03-02 | 0.918248 | 0.327014 | -1.009349 | 0.028397 |
| 2016-03-03 | -0.183377 | 0.646229 | -1.682695 | -0.857008 |
| 2016-03-04 | -0.115619 | -1.114824 | -1.491662 | -1.112885 |
| 2016-03-05 | 1.106656 | -1.230295 | -0.163651 | -0.448713 |
| 2016-03-06 | -0.809923 | -0.480238 | 0.092313 | -0.267867 |
In [24]:
<span class="n">df</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s1">'C'</span><span class="p">)</span> <span class="c1"># df.sort_values('mpg',ascending=False)</span> <span class="c1"># Order rows by values of a column (high to low).</span> <span class="c1"># 以每一列进行排序 ascending=False默认是降序,True就是升序</span>
|
1
2
3
4
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
sort_values
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
by
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"s1"
>
'C'
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
<
span
class
=
"c1"
>
# df.sort_values('mpg',ascending=False)</span>
<
span
class
=
"c1"
>
# Order rows by values of a column (high to low).</span>
<
span
class
=
"c1"
>
# 以每一列进行排序 ascending=False默认是降序,True就是升序</span>
|
Out[24]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
数据选择
In [25]:
<span class="n">df</span><span class="p">[</span><span class="s1">'A'</span><span class="p">]</span> <span class="c1"># df[['A','B']] 取出两列</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"c1"
>
# df[['A','B']] 取出两列</span>
|
Out[25]:
2016-03-01 1.313419 2016-03-02 0.028397 2016-03-03 -0.857008 2016-03-04 -1.112885 2016-03-05 -0.448713 2016-03-06 -0.267867 Freq: D, Name: A, dtype: float64
|
1
2
3
4
5
6
7
|
2016
-
03
-
01
1.313419
2016
-
03
-
02
0.028397
2016
-
03
-
03
-
0.857008
2016
-
03
-
04
-
1.112885
2016
-
03
-
05
-
0.448713
2016
-
03
-
06
-
0.267867
Freq
:
D
,
Name
:
A
,
dtype
:
float64
|
In [26]:
<span class="n">df</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"mi"
>
2
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"mi"
>
4
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
|
Out[26]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
In [27]:
<span class="n">df</span><span class="p">[</span><span class="s1">'20160302'</span><span class="p">:</span><span class="s1">'20160305'</span><span class="p">]</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'20160302'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"s1"
>
'20160305'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
|
Out[27]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
通过标签选择
In [28]:
<span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">'20160301'</span><span class="p">]</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
|
Out[28]:
A 1.313419 B 0.826457 C -1.574146 D 0.525008 Name: 2016-03-01 00:00:00, dtype: float64
|
1
2
3
4
5
|
A
1.313419
B
0.826457
C
-
1.574146
D
0.525008
Name
:
2016
-
03
-
01
00
:
00
:
00
,
dtype
:
float64
|
In [29]:
<span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">'20160301'</span><span class="p">])</span>
|
1
|
<
span
class
=
"nb"
>
type
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
]
)
<
/
span
>
|
Out[29]:
pandas.core.series.Series
|
1
|
pandas
.
core
.
series
.
Series
|
In [30]:
<span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="p">[</span><span class="s1">'A'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">]]</span> <span class="c1"># 取出 AB两列</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
:
,
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'B'
<
/
span
>
<
span
class
=
"p"
>
]
]
<
/
span
>
<
span
class
=
"c1"
>
# 取出 AB两列</span>
|
Out[30]:
| A | B | |
|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 |
| 2016-03-02 | 0.028397 | -1.009349 |
| 2016-03-03 | -0.857008 | -1.682695 |
| 2016-03-04 | -1.112885 | -1.491662 |
| 2016-03-05 | -0.448713 | -0.163651 |
| 2016-03-06 | -0.267867 | 0.092313 |
In [31]:
<span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">'20160301'</span><span class="p">:</span><span class="s1">'20160305'</span><span class="p">,</span> <span class="p">[</span><span class="s1">'A'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">]]</span> <span class="c1"># 取出某几行 几列</span>
|
1
2
3
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"s1"
>
'20160305'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'B'
<
/
span
>
<
span
class
=
"p"
>
]
]
<
/
span
>
<
span
class
=
"c1"
>
# 取出某几行 几列</span>
|
Out[31]:
| A | B | |
|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 |
| 2016-03-02 | 0.028397 | -1.009349 |
| 2016-03-03 | -0.857008 | -1.682695 |
| 2016-03-04 | -1.112885 | -1.491662 |
| 2016-03-05 | -0.448713 | -0.163651 |
In [32]:
<span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">'2016-03-01'</span><span class="p">,</span> <span class="s1">'A'</span><span class="p">]</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'2016-03-01'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
|
Out[32]:
1.3134192362700037
|
1
|
1.3134192362700037
|
In [33]:
<span class="n">df</span><span class="o">.</span><span class="n">at</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">'2016-03-01'</span><span class="p">),</span> <span class="s1">'A'</span><span class="p">]</span> <span class="c1"># df.at['2016-03-01', 'A'] will raise error</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
at
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
Timestamp
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'2016-03-01'
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"c1"
>
# df.at['2016-03-01', 'A'] will raise error</span>
|
Out[33]:
1.3134192362700037
|
1
|
1.3134192362700037
|
通过位置选择
In [34]:
<span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
iloc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
|
Out[34]:
A 0.028397 B -1.009349 C 0.327014 D 0.918248 Name: 2016-03-02 00:00:00, dtype: float64
|
1
2
3
4
5
|
A
0.028397
B
-
1.009349
C
0.327014
D
0.918248
Name
:
2016
-
03
-
02
00
:
00
:
00
,
dtype
:
float64
|
In [35]:
<span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">5</span><span class="p">,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">2</span><span class="p">]</span> <span class="c1"># 取出 2,3,4行,0,1,列</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
iloc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"mi"
>
2
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"mi"
>
5
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
0
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"mi"
>
2
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"c1"
>
# 取出 2,3,4行,0,1,列</span>
|
Out[35]:
| A | B | |
|---|---|---|
| 2016-03-03 | -0.857008 | -1.682695 |
| 2016-03-04 | -1.112885 | -1.491662 |
| 2016-03-05 | -0.448713 | -0.163651 |
In [36]:
<span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">5</span><span class="p">,</span> <span class="p">:]</span> <span class="c1"># df.iloc[1:5] 这个也可以</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
iloc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
:
<
/
span
>
<
span
class
=
"mi"
>
5
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"p"
>
:
]
<
/
span
>
<
span
class
=
"c1"
>
# df.iloc[1:5] 这个也可以</span>
|
Out[36]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
In [37]:
<span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="c1"># 取出一行一列的值</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
iloc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"c1"
>
# 取出一行一列的值</span>
|
Out[37]:
-1.009349292057921
|
1
|
-
1.009349292057921
|
In [38]:
<span class="n">df</span><span class="o">.</span><span class="n">iat</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="c1"># 也可以达到同样的效果</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
iat
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
1
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"c1"
>
# 也可以达到同样的效果</span>
|
Out[38]:
-1.009349292057921
|
1
|
-
1.009349292057921
|
布尔索引
In [39]:
<span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">A</span> <span class="o"><</span> <span class="mi">0</span><span class="p">]</span> <span class="c1"># 取出 df.A小于0的值 筛选</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
A
<
/
span
>
<
span
class
=
"o"
>
<<
/
span
>
<
span
class
=
"mi"
>
0
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"c1"
>
# 取出 df.A小于0的值 筛选</span>
|
Out[39]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 |
In [40]:
<span class="n">df</span><span class="p">[</span><span class="n">df</span> <span class="o">></span> <span class="mi">0</span><span class="p">]</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>>
<
/
span
>
<
span
class
=
"mi"
>
0
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
|
Out[40]:
| A | B | C | D | |
|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | NaN | 0.525008 |
| 2016-03-02 | 0.028397 | NaN | 0.327014 | 0.918248 |
| 2016-03-03 | NaN | NaN | 0.646229 | NaN |
| 2016-03-04 | NaN | NaN | NaN | NaN |
| 2016-03-05 | NaN | NaN | NaN | 1.106656 |
| 2016-03-06 | NaN | 0.092313 | NaN | NaN |
In [41]:
<span class="n">df</span><span class="p">[</span><span class="s1">'tag'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'a'</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span> <span class="o">+</span> <span class="p">[</span><span class="s1">'b'</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span> <span class="o">+</span> <span class="p">[</span><span class="s1">'c'</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span> <span class="c1">#添加一列</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'tag'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'a'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>
*
<
/
span
>
<
span
class
=
"mi"
>
2
<
/
span
>
<
span
class
=
"o"
>
+
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'b'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>
*
<
/
span
>
<
span
class
=
"mi"
>
2
<
/
span
>
<
span
class
=
"o"
>
+
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'c'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>
*
<
/
span
>
<
span
class
=
"mi"
>
2
<
/
span
>
<
span
class
=
"c1"
>
#添加一列</span>
|
In [42]:
<span class="n">df</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[42]:
| A | B | C | D | tag | |
|---|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 | a |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 | a |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 | b |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 | b |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 | c |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 | c |
In [43]:
<span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">tag</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="s1">'a'</span><span class="p">,</span> <span class="s1">'c'</span><span class="p">])]</span> <span class="c1"># 筛选 通过 isin </span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
tag
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
isin
<
/
span
>
<
span
class
=
"p"
>
(
[
<
/
span
>
<
span
class
=
"s1"
>
'a'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'c'
<
/
span
>
<
span
class
=
"p"
>
]
)
]
<
/
span
>
<
span
class
=
"c1"
>
# 筛选 通过 isin </span>
|
Out[43]:
| A | B | C | D | tag | |
|---|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 | a |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 | a |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 | c |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 | c |
修改数据
In [44]:
<span class="n">df</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[44]:
| A | B | C | D | tag | |
|---|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 | a |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 | a |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 | b |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 | b |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 | c |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 | c |
In [45]:
<span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">6</span><span class="p">),</span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">'20160301'</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">6</span><span class="p">))</span> <span class="n">s</span>
|
1
2
|
<
span
class
=
"n"
>
s
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
Series
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"n"
>
np
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
arange
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"mi"
>
6
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"n"
>
index
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
date_range
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"n"
>
periods
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"mi"
>
6
<
/
span
>
<
span
class
=
"p"
>
)
)
<
/
span
>
<
span
class
=
"n"
>
s
<
/
span
>
|
Out[45]:
2016-03-01 0 2016-03-02 1 2016-03-03 2 2016-03-04 3 2016-03-05 4 2016-03-06 5 Freq: D, dtype: int64
|
1
2
3
4
5
6
7
|
2016
-
03
-
01
0
2016
-
03
-
02
1
2016
-
03
-
03
2
2016
-
03
-
04
3
2016
-
03
-
05
4
2016
-
03
-
06
5
Freq
:
D
,
dtype
:
int64
|
In [46]:
<span class="n">df</span><span class="p">[</span><span class="s1">'E'</span><span class="p">]</span> <span class="o">=</span> <span class="n">s</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'E'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
s
<
/
span
>
|
In [47]:
<span class="n">df</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[47]:
| A | B | C | D | tag | E | |
|---|---|---|---|---|---|---|
| 2016-03-01 | 1.313419 | 0.826457 | -1.574146 | 0.525008 | a | 0 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 | a | 1 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 | b | 2 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 | b | 3 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 | c | 4 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 | c | 5 |
In [48]:
<span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">'20160301'</span><span class="p">,</span> <span class="s1">'A'</span><span class="p">]</span> <span class="o">=</span> <span class="mf">0.2</span> <span class="c1"># df.['20160301', 'A'] = 0.2 will not have effect</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"mf"
>
0.2
<
/
span
>
<
span
class
=
"c1"
>
# df.['20160301', 'A'] = 0.2 will not have effect</span>
|
In [49]:
<span class="n">df</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[49]:
| A | B | C | D | tag | E | |
|---|---|---|---|---|---|---|
| 2016-03-01 | 0.200000 | 0.826457 | -1.574146 | 0.525008 | a | 0 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 | a | 1 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 | b | 2 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 | b | 3 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 | c | 4 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 | c | 5 |
In [50]:
<span class="n">df</span><span class="o">.</span><span class="n">at</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">'20160301'</span><span class="p">),</span> <span class="s1">'A'</span><span class="p">]</span> <span class="o">=</span> <span class="mf">0.4</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
at
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"n"
>
pd
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
Timestamp
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"s1"
>
'20160301'
<
/
span
>
<
span
class
=
"p"
>
)
,
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"mf"
>
0.4
<
/
span
>
|
In [51]:
<span class="n">df</span>
|
1
|
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[51]:
| A | B | C | D | tag | E | |
|---|---|---|---|---|---|---|
| 2016-03-01 | 0.400000 | 0.826457 | -1.574146 | 0.525008 | a | 0 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 | a | 1 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 | b | 2 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 | b | 3 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 | c | 4 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 | c | 5 |
In [52]:
<span class="n">df</span><span class="o">.</span><span class="n">iat</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mf">0.6</span> <span class="n">df</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
iat
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"mi"
>
0
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
0
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"mf"
>
0.6
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[52]:
| A | B | C | D | tag | E | |
|---|---|---|---|---|---|---|
| 2016-03-01 | 0.600000 | 0.826457 | -1.574146 | 0.525008 | a | 0 |
| 2016-03-02 | 0.028397 | -1.009349 | 0.327014 | 0.918248 | a | 1 |
| 2016-03-03 | -0.857008 | -1.682695 | 0.646229 | -0.183377 | b | 2 |
| 2016-03-04 | -1.112885 | -1.491662 | -1.114824 | -0.115619 | b | 3 |
| 2016-03-05 | -0.448713 | -0.163651 | -1.230295 | 1.106656 | c | 4 |
| 2016-03-06 | -0.267867 | 0.092313 | -0.480238 | -0.809923 | c | 5 |
In [53]:
<span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="s1">'A'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">16</span><span class="p">)</span> <span class="n">df</span>
|
1
2
|
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
:
,
<
/
span
>
<
span
class
=
"s1"
>
'A'
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
np
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
arange
<
/
span
>
<
span
class
=
"p"
>
(
<
/
span
>
<
span
class
=
"mi"
>
10
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"mi"
>
16
<
/
span
>
<
span
class
=
"p"
>
)
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
|
Out[53]:
| A | B | C | D | tag | E | |
|---|---|---|---|---|---|---|
| 2016-03-01 | 10 | 0.826457 | -1.574146 | 0.525008 | a | 0 |
| 2016-03-02 | 11 | -1.009349 | 0.327014 | 0.918248 | a | 1 |
| 2016-03-03 | 12 | -1.682695 | 0.646229 | -0.183377 | b | 2 |
| 2016-03-04 | 13 | -1.491662 | -1.114824 | -0.115619 | b | 3 |
| 2016-03-05 | 14 | -0.163651 | -1.230295 | 1.106656 | c | 4 |
| 2016-03-06 | 15 | 0.092313 | -0.480238 | -0.809923 | c | 5 |
In [54]:
<span class="n">df2</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="p">[</span><span class="s1">'B'</span><span class="p">,</span> <span class="s1">'C'</span><span class="p">]]</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="n">df2</span><span class="p">[</span><span class="n">df2</span> <span class="o">></span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="n">df2</span> <span class="n">df2</span>
|
1
2
3
|
<
span
class
=
"n"
>
df2
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"n"
>
df
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
loc
<
/
span
>
<
span
class
=
"p"
>
[
:
,
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"s1"
>
'B'
<
/
span
>
<
span
class
=
"p"
>
,
<
/
span
>
<
span
class
=
"s1"
>
'C'
<
/
span
>
<
span
class
=
"p"
>
]
]
<
/
span
>
<
span
class
=
"o"
>
.
<
/
span
>
<
span
class
=
"n"
>
copy
<
/
span
>
<
span
class
=
"p"
>
(
)
<
/
span
>
<
span
class
=
"n"
>
df2
<
/
span
>
<
span
class
=
"p"
>
[
<
/
span
>
<
span
class
=
"n"
>
df2
<
/
span
>
<
span
class
=
"o"
>>
<
/
span
>
<
span
class
=
"mi"
>
0
<
/
span
>
<
span
class
=
"p"
>
]
<
/
span
>
<
span
class
=
"o"
>=
<
/
span
>
<
span
class
=
"o"
>
-
<
/
span
>
<
span
class
=
"n"
>
df2
<
/
span
>
<
span
class
=
"n"
>
df2
<
/
span
>
|
Out[54]:
| B | C | |
|---|---|---|
| 2016-03-01 | -0.826457 | -1.574146 |
| 2016-03-02 | -1.009349 | -0.327014 |
| 2016-03-03 | -1.682695 | -0.646229 |
| 2016-03-04 | -1.491662 | -1.114824 |
| 2016-03-05 | -0.163651 | -1.230295 |
| 2016-03-06 | -0.092313 | -0.480238 |
1027

被折叠的 条评论
为什么被折叠?



