pandas

In [3]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [7]:

s = pd.Series([1,2,3,np.nan,6,9])
s

Out[7]:

0    1.0

1    2.0

2    3.0

3    NaN

4    6.0

5    9.0

dtype: float64

In [ ]:

dates = pd.date_range('20170101', periods=6)
dates

Out[ ]:

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03','2017-01-04',

               '2017-01-05','2017-01-06'],

             dtype='datetime64[ns]', freq='D')

In [16]:

df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Out[16]:

A

B

C

D

2017-01-01

-0.107708

-1.024182

-0.678251

-2.229398

2017-01-02

0.922306

0.817418

0.152669

-0.009996

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

2017-01-06

1.033851

0.355536

0.326386

-0.477157

In [19]:

df2 = pd.DataFrame({'A':1,
                    'B':pd.Timestamp('20130102'),
                    'C':pd.Series(1, index=list(range(4)),dtype='float32'),
                    'D':np.array([3]*4,dtype='int32'),
                    'E':pd.Categorical(['test', 'train', 'test', 'train']),
                    'F':'foo'})
df2

Out[19]:

A

B

C

D

E

F

0

1

2013-01-02

1.0

3

test

foo

1

1

2013-01-02

1.0

3

train

foo

2

1

2013-01-02

1.0

3

test

foo

3

1

2013-01-02

1.0

3

train

foo

In [20]:

df2.dtypes

Out[20]:

A             int64

B    datetime64[ns]

C           float32

D             int32

E          category

F            object

dtype: object

In [22]:

df.head()

Out[22]:

A

B

C

D

2017-01-01

-0.107708

-1.024182

-0.678251

-2.229398

2017-01-02

0.922306

0.817418

0.152669

-0.009996

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

In [24]:

df.tail(3)

Out[24]:

A

B

C

D

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

2017-01-06

1.033851

0.355536

0.326386

-0.477157

In [25]:

df.index

Out[25]:

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03','2017-01-04',

               '2017-01-05','2017-01-06'],

             dtype='datetime64[ns]', freq='D')

In [27]:

df.columns

Out[27]:

Index(['A', 'B', 'C', 'D'], dtype='object')

In [28]:

df.values

Out[28]:

array([[-0.1077083 , -1.02418217, -0.67825098, -2.22939769],

       [ 0.92230625,  0.81741758, 0.15266857, -0.00999563],

       [-2.15156187,-0.39679118, -1.03003536, -0.56338102],

       [-0.6566828 ,-0.8778064 ,  0.56461162,  0.84630068],

       [-0.12957523,-0.70444375, -0.69742558, -0.32805333],

       [ 1.03385071,  0.35553587, 0.3263864 , -0.47715656]])

In [30]:

df.describe()

Out[30]:

A

B

C

D

count

6.000000

6.000000

6.000000

6.000000

mean

-0.181562

-0.305045

-0.227008

-0.460281

std

1.167095

0.736130

0.655260

1.006785

min

-2.151562

-1.024182

-1.030035

-2.229398

25%

-0.524906

-0.834466

-0.692632

-0.541825

50%

-0.118642

-0.550617

-0.262791

-0.402605

75%

0.664803

0.167454

0.282957

-0.089510

max

1.033851

0.817418

0.564612

0.846301

In [31]:

df.T

Out[31]:

2017-01-01 00:00:00

2017-01-02 00:00:00

2017-01-03 00:00:00

2017-01-04 00:00:00

2017-01-05 00:00:00

2017-01-06 00:00:00

A

-0.107708

0.922306

-2.151562

-0.656683

-0.129575

1.033851

B

-1.024182

0.817418

-0.396791

-0.877806

-0.704444

0.355536

C

-0.678251

0.152669

-1.030035

0.564612

-0.697426

0.326386

D

-2.229398

-0.009996

-0.563381

0.846301

-0.328053

-0.477157

In [46]:

df.sort_index(axis=1, ascending=False)

Out[46]:

D

C

B

A

2017-01-01

-2.229398

-0.678251

-1.024182

-0.107708

2017-01-02

-0.009996

0.152669

0.817418

0.922306

2017-01-03

-0.563381

-1.030035

-0.396791

-2.151562

2017-01-04

0.846301

0.564612

-0.877806

-0.656683

2017-01-05

-0.328053

-0.697426

-0.704444

-0.129575

2017-01-06

-0.477157

0.326386

0.355536

1.033851

In [64]:

df.sort_values(by='B')

Out[64]:

A

B

C

D

2017-01-01

-0.107708

-1.024182

-0.678251

-2.229398

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

2017-01-06

1.033851

0.355536

0.326386

-0.477157

2017-01-02

0.922306

0.817418

0.152669

-0.009996

In [65]:

df['A']

Out[65]:

2017-01-01   -0.107708

2017-01-02    0.922306

2017-01-03   -2.151562

2017-01-04   -0.656683

2017-01-05   -0.129575

2017-01-06    1.033851

Freq: D, Name: A, dtype: float64

In [66]:

df[0:3]

Out[66]:

A

B

C

D

2017-01-01

-0.107708

-1.024182

-0.678251

-2.229398

2017-01-02

0.922306

0.817418

0.152669

-0.009996

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

In [69]:

df.loc[dates[0]]

Out[69]:

A   -0.107708

B   -1.024182

C   -0.678251

D   -2.229398

Name: 2017-01-01 00:00:00, dtype: float64

In [70]:

df.loc[:, 'A':'B']

Out[70]:

A

B

2017-01-01

-0.107708

-1.024182

2017-01-02

0.922306

0.817418

2017-01-03

-2.151562

-0.396791

2017-01-04

-0.656683

-0.877806

2017-01-05

-0.129575

-0.704444

2017-01-06

1.033851

0.355536

In [71]:

df.loc['20170101':'20170103', 'A':'B']

Out[71]:

A

B

2017-01-01

-0.107708

-1.024182

2017-01-02

0.922306

0.817418

2017-01-03

-2.151562

-0.396791

In [72]:

df.loc['20170104', 'A':'B']

Out[72]:

A   -0.656683

B   -0.877806

Name: 2017-01-04 00:00:00, dtype: float64

In [73]:

df.loc['20170105','C']

Out[73]:

-0.69742558177888991

In [76]:

df.iloc[3]

Out[76]:

A   -0.656683

B   -0.877806

C    0.564612

D    0.846301

Name: 2017-01-04 00:00:00, dtype: float64

In [77]:

df.iloc[3:5, 0:2]

Out[77]:

A

B

2017-01-04

-0.656683

-0.877806

2017-01-05

-0.129575

-0.704444

In [79]:

df.iloc[[1,3,5],[0,3]]

Out[79]:

A

D

2017-01-02

0.922306

-0.009996

2017-01-04

-0.656683

0.846301

2017-01-06

1.033851

-0.477157

In [80]:

df.iloc[:,1:3]

Out[80]:

B

C

2017-01-01

-1.024182

-0.678251

2017-01-02

0.817418

0.152669

2017-01-03

-0.396791

-1.030035

2017-01-04

-0.877806

0.564612

2017-01-05

-0.704444

-0.697426

2017-01-06

0.355536

0.326386

In [81]:

df.iloc[1,1]

Out[81]:

0.81741758295028633

In [82]:

df.iat[1,1]

Out[82]:

0.81741758295028633

In [83]:

df[df.A>0]

Out[83]:

A

B

C

D

2017-01-02

0.922306

0.817418

0.152669

-0.009996

2017-01-06

1.033851

0.355536

0.326386

-0.477157

In [84]:

df[df>0]

Out[84]:

A

B

C

D

2017-01-01

NaN

NaN

NaN

NaN

2017-01-02

0.922306

0.817418

0.152669

NaN

2017-01-03

NaN

NaN

NaN

NaN

2017-01-04

NaN

NaN

0.564612

0.846301

2017-01-05

NaN

NaN

NaN

NaN

2017-01-06

1.033851

0.355536

0.326386

NaN

In [86]:

df3=df.copy()

df3['E']=['one','one', 'two','three','four','five']

df3

Out[86]:

A

B

C

D

E

2017-01-01

-0.107708

-1.024182

-0.678251

-2.229398

one

2017-01-02

0.922306

0.817418

0.152669

-0.009996

one

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

two

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

three

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

four

2017-01-06

1.033851

0.355536

0.326386

-0.477157

five

In [88]:

df3[df3['E'].isin(['two', 'four'])]

Out[88]:

A

B

C

D

E

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

two

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

four

  • 设置 #### 1、设置一个新的列:

In [93]:

s1 = pd.Series([1,2,3,4,5,6],index=pd.date_range('20170102',periods=6))

s1

Out[93]:

2017-01-02    1

2017-01-03    2

2017-01-04    3

2017-01-05    4

2017-01-06    5

2017-01-07    6

Freq: D, dtype: int64

2、通过标签设置新的值:

In [94]:

df['F'] = s1

df

Out[94]:

A

B

C

D

F

2017-01-01

-0.107708

-1.024182

-0.678251

-2.229398

NaN

2017-01-02

0.922306

0.817418

0.152669

-0.009996

1.0

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

2.0

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

3.0

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

4.0

2017-01-06

1.033851

0.355536

0.326386

-0.477157

5.0

In [95]:

df.at[dates[0], 'A'] = 0

df

Out[95]:

A

B

C

D

F

2017-01-01

0.000000

-1.024182

-0.678251

-2.229398

NaN

2017-01-02

0.922306

0.817418

0.152669

-0.009996

1.0

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

2.0

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

3.0

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

4.0

2017-01-06

1.033851

0.355536

0.326386

-0.477157

5.0

In [97]:

df.iat[0,1] = 0

df

Out[97]:

A

B

C

D

F

2017-01-01

0.000000

0.000000

-0.678251

-2.229398

NaN

2017-01-02

0.922306

0.817418

0.152669

-0.009996

1.0

2017-01-03

-2.151562

-0.396791

-1.030035

-0.563381

2.0

2017-01-04

-0.656683

-0.877806

0.564612

0.846301

3.0

2017-01-05

-0.129575

-0.704444

-0.697426

-0.328053

4.0

2017-01-06

1.033851

0.355536

0.326386

-0.477157

5.0

4、通过numpy数组设置一组新值:

In [99]:

df.loc[:,'D'] = np.array([5]*len(df))

df

Out[99]:

A

B

C

D

F

2017-01-01

0.000000

0.000000

-0.678251

5

NaN

2017-01-02

0.922306

0.817418

0.152669

5

1.0

2017-01-03

-2.151562

-0.396791

-1.030035

5

2.0

2017-01-04

-0.656683

-0.877806

0.564612

5

3.0

2017-01-05

-0.129575

-0.704444

-0.697426

5

4.0

2017-01-06

1.033851

0.355536

0.326386

5

5.0

5、通过where操作来设置新的值:

In [101]:

df4 = df.copy()

df4[df4 >0] = -df4

df4

Out[101]:

A

B

C

D

F

2017-01-01

0.000000

0.000000

-0.678251

-5

NaN

2017-01-02

-0.922306

-0.817418

-0.152669

-5

-1.0

2017-01-03

-2.151562

-0.396791

-1.030035

-5

-2.0

2017-01-04

-0.656683

-0.877806

-0.564612

-5

-3.0

2017-01-05

-0.129575

-0.704444

-0.697426

-5

-4.0

2017-01-06

-1.033851

-0.355536

-0.326386

-5

-5.0

四、缺失值处理

在pandas中,使用np.nan来代替缺失值,这些值将默认不会包含在计算中,详细请参阅:MissingData Section

1、reindex()方法可以对指定轴上的索引进行改变/增加/删除操作,这将返回原始数据的一个拷贝:

In [105]:

df1 = df.reindex(index=dates[0:4], columns=list(df.columns) +['E'])

df1

Out[105]:

A

B

C

D

F

E

2017-01-01

0.000000

0.000000

-0.678251

5

NaN

NaN

2017-01-02

0.922306

0.817418

0.152669

5

1.0

NaN

2017-01-03

-2.151562

-0.396791

-1.030035

5

2.0

NaN

2017-01-04

-0.656683

-0.877806

0.564612

5

3.0

NaN

In [106]:

df1.loc[dates[0]:dates[1], 'E'] = 1

df1

Out[106]:

A

B

C

D

F

E

2017-01-01

0.000000

0.000000

-0.678251

5

NaN

1.0

2017-01-02

0.922306

0.817418

0.152669

5

1.0

1.0

2017-01-03

-2.151562

-0.396791

-1.030035

5

2.0

NaN

2017-01-04

-0.656683

-0.877806

0.564612

5

3.0

NaN

2、去掉包含缺失值的行:

In [107]:

df1.dropna(how='any')

Out[107]:

A

B

C

D

F

E

2017-01-02

0.922306

0.817418

0.152669

5

1.0

1.0

3、对缺失值进行填充:

In [108]:

df1.fillna(value=5)

Out[108]:

A

B

C

D

F

E

2017-01-01

0.000000

0.000000

-0.678251

5

5.0

1.0

2017-01-02

0.922306

0.817418

0.152669

5

1.0

1.0

2017-01-03

-2.151562

-0.396791

-1.030035

5

2.0

5.0

2017-01-04

-0.656683

-0.877806

0.564612

5

3.0

5.0

4、对数据进行布尔填充:

In [109]:

pd.isnull(df)

Out[109]:

A

B

C

D

F

2017-01-01

False

False

False

False

True

2017-01-02

False

False

False

False

False

2017-01-03

False

False

False

False

False

2017-01-04

False

False

False

False

False

2017-01-05

False

False

False

False

False

2017-01-06

False

False

False

False

False

五、 相关操作

详情请参阅BasicSection OnBinary Ops

  • 统计(相关操作通常情况不包括缺失值)

1、执行描述性统计:

In [112]:

df.mean()

Out[112]:

A   -0.163610

B   -0.134348

C   -0.227008

D    5.000000

F    3.000000

dtype: float64

2、在其它轴上进行相同操作:

In [113]:

df.mean(1)

Out[113]:

2017-01-01    1.080437

2017-01-02    1.578478

2017-01-03    0.684322

2017-01-04    1.406024

2017-01-05    1.493711

2017-01-06    2.343155

Freq: D, dtype: float64

3、对于拥有不同维度,需要对齐的对象进行操作。Pandas会自动的沿着指定的维度进行广播:

In [120]:

 s3 =pd.Series([1,3,4,np.nan,6,8], index=dates).shift(1)

 s3

                                                    

Out[120]:

2017-01-01    NaN

2017-01-02    1.0

2017-01-03    3.0

2017-01-04    4.0

2017-01-05    NaN

2017-01-06    6.0

Freq: D, dtype: float64

In [123]:

df.sub(s3, axis='index')

Out[123]:

A

B

C

D

F

2017-01-01

NaN

NaN

NaN

NaN

NaN

2017-01-02

-0.077694

-0.182582

-0.847331

4.0

0.0

2017-01-03

-5.151562

-3.396791

-4.030035

2.0

-1.0

2017-01-04

-4.656683

-4.877806

-3.435388

1.0

-1.0

2017-01-05

NaN

NaN

NaN

NaN

NaN

2017-01-06

-4.966149

-5.644464

-5.673614

-1.0

-1.0

In [122]:

df

Out[122]:

A

B

C

D

F

2017-01-01

0.000000

0.000000

-0.678251

5

NaN

2017-01-02

0.922306

0.817418

0.152669

5

1.0

2017-01-03

-2.151562

-0.396791

-1.030035

5

2.0

2017-01-04

-0.656683

-0.877806

0.564612

5

3.0

2017-01-05

-0.129575

-0.704444

-0.697426

5

4.0

2017-01-06

1.033851

0.355536

0.326386

5

5.0

In [126]:

df.apply(np.cumsum)

Out[126]:

A

B

C

D

F

2017-01-01

0.000000

0.000000

-0.678251

5

NaN

2017-01-02

0.922306

0.817418

-0.525582

10

1.0

2017-01-03

-1.229256

0.420626

-1.555618

15

3.0

2017-01-04

-1.885938

-0.457180

-0.991006

20

6.0

2017-01-05

-2.015514

-1.161624

-1.688432

25

10.0

2017-01-06

-0.981663

-0.806088

-1.362045

30

15.0

In [127]:

df.apply(lambda x:x.max() - x.min())

Out[127]:

A    3.185413

B    1.695224

C    1.594647

D    0.000000

F    4.000000

dtype: float64

In [128]:

s = pd.Series(np.random.randint(0, 7, size=10))

s

Out[128]:

0    6

1    1

2    3

3    6

4    5

5    3

6    3

7    3

8    5

9    4

dtype: int32

In [129]:

s.value_counts()

Out[129]:

3    4

6    2

5    2

4    1

1    1

dtype: int64

In [130]:

s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA','dog', 'cat'])

s.str.lower()

Out[130]:

0       a

1       b

2       c

3    aaba

4    baca

5     NaN

6    caba

7     dog

8     cat

dtype: object

In [131]:

df = pd.DataFrame(np.random.randn(10, 4))

df

Out[131]:

0

1

2

3

0

0.112665

0.048688

0.202798

0.025471

1

-1.390846

-0.538326

-0.078290

-0.995754

2

2.089888

0.442472

0.038945

-1.785647

3

-0.063596

-0.104295

1.330084

0.673432

4

-1.090966

-1.106314

1.203585

0.752954

5

0.650855

-0.551656

-0.476231

0.874108

6

0.515420

-0.099656

-1.028804

0.163295

7

-1.587624

-1.178458

0.869725

0.553387

8

-1.547900

-0.387638

0.458020

-0.131574

9

0.696191

-0.826334

0.482097

-1.577040

In [135]:

pieces = [df[:3], df[3:7], df[7:]]

pieces

Out[135]:

[          0         1         2         3

 0  0.112665 0.048688  0.202798  0.025471

 1 -1.390846 -0.538326-0.078290 -0.995754

 2  2.089888 0.442472  0.038945 -1.785647,

           0         1         2         3

 3 -0.063596 -0.104295  1.330084 0.673432

 4 -1.090966 -1.106314  1.203585 0.752954

 5  0.650855 -0.551656 -0.476231  0.874108

 6  0.515420 -0.099656 -1.028804  0.163295,

           0         1         2         3

 7 -1.587624 -1.178458  0.869725 0.553387

 8 -1.547900 -0.387638  0.458020 -0.131574

 9  0.696191 -0.826334  0.482097 -1.577040]

In [136]:

pd.concat(pieces)

Out[136]:

0

1

2

3

0

0.112665

0.048688

0.202798

0.025471

1

-1.390846

-0.538326

-0.078290

-0.995754

2

2.089888

0.442472

0.038945

-1.785647

3

-0.063596

-0.104295

1.330084

0.673432

4

-1.090966

-1.106314

1.203585

0.752954

5

0.650855

-0.551656

-0.476231

0.874108

6

0.515420

-0.099656

-1.028804

0.163295

7

-1.587624

-1.178458

0.869725

0.553387

8

-1.547900

-0.387638

0.458020

-0.131574

9

0.696191

-0.826334

0.482097

-1.577040

In [137]:

left = pd.DataFrame({'key':['foo', 'foo'], 'lval':[1,2]})

right = pd.DataFrame({'key':['foo', 'foo'], 'rval':[4,5]})

left

Out[137]:

key

lval

0

foo

1

1

foo

2

In [138]:

right

Out[138]:

key

rval

0

foo

4

1

foo

5

In [140]:

pd.merge(left, right, on='key')

Out[140]:

key

lval

rval

0

foo

1

4

1

foo

1

5

2

foo

2

4

3

foo

2

5

In [141]:

df = pd.DataFrame(np.random.randn(8,4),columns=['A','B','C','D'])

df

Out[141]:

A

B

C

D

0

-0.277390

-0.004491

0.328250

-0.752631

1

1.246361

-0.560859

0.140553

-2.784473

2

-0.777135

-1.778617

1.686014

-0.227608

3

-0.016098

0.530940

-1.249833

1.977533

4

-1.018847

0.540397

-0.241101

-1.287702

5

1.836941

1.422687

0.376302

-0.804313

6

0.279718

0.694741

0.363781

1.292481

7

0.575892

-2.357896

0.781150

0.789141

In [142]:

s = df.loc[3]

df.append(s, ignore_index=True)

Out[142]:

A

B

C

D

0

-0.277390

-0.004491

0.328250

-0.752631

1

1.246361

-0.560859

0.140553

-2.784473

2

-0.777135

-1.778617

1.686014

-0.227608

3

-0.016098

0.530940

-1.249833

1.977533

4

-1.018847

0.540397

-0.241101

-1.287702

5

1.836941

1.422687

0.376302

-0.804313

6

0.279718

0.694741

0.363781

1.292481

7

0.575892

-2.357896

0.781150

0.789141

8

-0.016098

0.530940

-1.249833

1.977533

In [143]:

df = pd.DataFrame({'A':['foo', 'bar', 'foo', 'bar',

                       'foo', 'bar', 'foo', 'bar'],

                  'B':['one', 'one', 'two', 'three',

                       'two', 'two', 'one', 'three'],

                  'C':np.random.randn(8),

                   'D':np.random.randn(8)})

df

Out[143]:

A

B

C

D

0

foo

one

-0.200720

-2.172299

1

bar

one

0.701414

1.142280

2

foo

two

-0.384221

-0.842057

3

bar

three

-0.883506

-0.683085

4

foo

two

1.438267

0.485135

5

bar

two

0.252307

1.145261

6

foo

one

0.572104

0.869513

7

bar

three

0.760147

0.056735

In [146]:

df.groupby('A').sum()

Out[146]:

C

D

A

bar

0.830361

1.661191

foo

1.425430

-1.659709

In [147]:

df.groupby(['A', 'B']).sum()

Out[147]:

C

D

A

B

bar

one

0.701414

1.142280

three

-0.123359

-0.626350

two

0.252307

1.145261

foo

one

0.371384

-1.302786

two

1.054046

-0.356923

In [158]:

tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',

                    'foo', 'foo', 'qux', 'qux'],

                   ['one', 'two', 'one', 'two',

                     'one', 'two', 'one', 'two']]))

tuples

Out[158]:

[('bar', 'one'),

 ('bar', 'two'),

 ('baz', 'one'),

 ('baz', 'two'),

 ('foo', 'one'),

 ('foo', 'two'),

 ('qux', 'one'),

 ('qux', 'two')]

In [159]:

index = pd.MultiIndex.from_tuples(tuples, names=['first','second'])

index

Out[159]:

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],

           labels=[[0, 0, 1,1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],

           names=['first','second'])

In [163]:

df = pd.DataFrame(np.random.randn(8, 2), index=index,columns=['A', 'B'])

df

Out[163]:

A

B

first

second

bar

one

2.496134

-0.893512

two

2.286660

-0.248709

baz

one

0.147845

0.985195

two

-0.529439

-0.095119

foo

one

0.618096

-0.364225

two

-0.090616

0.032860

qux

one

-0.912772

-0.715831

two

-1.164991

-1.050507

In [164]:

stacked = df.stack()

stacked

Out[164]:

first  second  

bar    one     A   2.496134

               B   -0.893512

       two     A   2.286660

               B   -0.248709

baz    one     A   0.147845

               B   0.985195

       two     A  -0.529439

               B   -0.095119

foo    one     A   0.618096

               B   -0.364225

       two     A  -0.090616

               B    0.032860

qux    one     A  -0.912772

               B   -0.715831

       two    A   -1.164991

               B   -1.050507

dtype: float64

In [166]:

stacked.unstack()

Out[166]:

A

B

first

second

bar

one

2.496134

-0.893512

two

2.286660

-0.248709

baz

one

0.147845

0.985195

two

-0.529439

-0.095119

foo

one

0.618096

-0.364225

two

-0.090616

0.032860

qux

one

-0.912772

-0.715831

two

-1.164991

-1.050507

In [167]:

stacked.unstack(1)

Out[167]:

second

one

two

first

bar

A

2.496134

2.286660

B

-0.893512

-0.248709

baz

A

0.147845

-0.529439

B

0.985195

-0.095119

foo

A

0.618096

-0.090616

B

-0.364225

0.032860

qux

A

-0.912772

-1.164991

B

-0.715831

-1.050507

In [168]:

stacked.unstack(0)

Out[168]:

first

bar

baz

foo

qux

second

one

A

2.496134

0.147845

0.618096

-0.912772

B

-0.893512

0.985195

-0.364225

-0.715831

two

A

2.286660

-0.529439

-0.090616

-1.164991

B

-0.248709

-0.095119

0.032860

-1.050507

In [ ]:

 


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值