import pandas as pd
import numpy as np
df = pd. DataFrame( { 'key1' : [ 'a' , 'a' , 'b' , 'b' , 'a' ] ,
'key2' : [ 'one' , 'two' , 'one' , 'two' , 'one' ] ,
'data1' : np. random. randn( 5 ) ,
'data2' : np. random. randn( 5 ) } )
df
key1 key2 data1 data2 0 a one 1.364596 0.352792 1 a two 1.685626 0.236429 2 b one -0.537077 -0.018004 3 b two 1.389866 0.826195 4 a one 0.849733 1.619383
grouped = df[ 'data1' ] . groupby( df[ 'key1' ] )
grouped
<pandas.core.groupby.groupby.SeriesGroupBy object at 0x00000261B3E2CEB8>
grouped. mean( )
key1
a 1.299985
b 0.426394
Name: data1, dtype: float64
means = df[ 'data1' ] . groupby( [ df[ 'key1' ] , df[ 'key2' ] ] ) . mean( )
means
key1 key2
a one 1.107165
two 1.685626
b one -0.537077
two 1.389866
Name: data1, dtype: float64
means. unstack( )
key2 one two key1 a 1.107165 1.685626 b -0.537077 1.389866
states = np. array( [ 'Ohio' , 'California' , 'California' , 'Ohio' , 'Ohio' ] )
years = np. array( [ 2005 , 2005 , 2006 , 2005 , 2006 ] )
df[ 'data1' ] . groupby( [ states, years] ) . mean( )
California 2005 1.685626
2006 -0.537077
Ohio 2005 1.377231
2006 0.849733
Name: data1, dtype: float64
df. groupby( 'key1' ) . mean( )
data1 data2 key1 a 1.299985 0.736201 b 0.426394 0.404096
df. groupby( [ 'key1' , 'key2' ] ) . mean( )
data1 data2 key1 key2 a one 1.107165 0.986087 two 1.685626 0.236429 b one -0.537077 -0.018004 two 1.389866 0.826195
df. groupby( [ 'key1' , 'key2' ] ) . size( )
key1 key2
a one 2
two 1
b one 1
two 1
dtype: int64
for name, group in df. groupby( 'key1' ) :
print ( name)
print ( group)
a
key1 key2 data1 data2
0 a one 1.364596 0.352792
1 a two 1.685626 0.236429
4 a one 0.849733 1.619383
b
key1 key2 data1 data2
2 b one -0.537077 -0.018004
3 b two 1.389866 0.826195
for ( k1, k2) , group in df. groupby( [ 'key1' , 'key2' ] ) :
print ( ( k1, k2) )
print ( group)
('a', 'one')
key1 key2 data1 data2
0 a one 1.364596 0.352792
4 a one 0.849733 1.619383
('a', 'two')
key1 key2 data1 data2
1 a two 1.685626 0.236429
('b', 'one')
key1 key2 data1 data2
2 b one -0.537077 -0.018004
('b', 'two')
key1 key2 data1 data2
3 b two 1.389866 0.826195
pieces = dict ( list ( df. groupby( 'key1' ) ) )
pieces[ 'b' ]
key1 key2 data1 data2 2 b one -0.537077 -0.018004 3 b two 1.389866 0.826195
df. dtypes
key1 object
key2 object
data1 float64
data2 float64
dtype: object
grouped = df. groupby( df. dtypes, axis= 1 )
for dtype, group in grouped:
print ( dtype)
print ( group)
float64
data1 data2
0 1.364596 0.352792
1 1.685626 0.236429
2 -0.537077 -0.018004
3 1.389866 0.826195
4 0.849733 1.619383
object
key1 key2
0 a one
1 a two
2 b one
3 b two
4 a one
df[ 'data1' ] . groupby( df[ 'key1' ] )
<pandas.core.groupby.groupby.SeriesGroupBy object at 0x00000261B3E74FD0>
df. groupby( [ 'key1' , 'key2' ] ) [ [ 'data2' ] ] . mean( )
data2 key1 key2 a one 0.986087 two 0.236429 b one -0.018004 two 0.826195
people = pd. DataFrame( np. random. randn( 5 , 5 ) ,
columns= [ 'a' , 'b' , 'c' , 'd' , 'e' ] ,
index= [ 'Joe' , 'Steve' , 'Wes' , 'Jim' , 'Travis' ] )
people. iloc[ 2 : 3 , [ 1 , 2 ] ] = np. nan
people
a b c d e Joe 0.037796 1.631083 0.455609 -1.405327 0.495940 Steve 0.304393 0.326005 0.146350 0.075903 -0.263559 Wes -0.055827 NaN NaN 1.268622 -0.541199 Jim 2.034394 0.818811 0.333991 0.158734 1.187207 Travis 2.719235 -0.459516 -0.292250 0.158169 1.102169
mapping = { 'a' : 'red' , 'b' : 'red' , 'c' : 'blue' ,
'd' : 'blue' , 'e' : 'red' , 'f' : 'orange' }
by_column = people. groupby( mapping, axis= 1 )
by_column. sum ( )
blue red Joe -0.949718 2.164819 Steve 0.222253 0.366838 Wes 1.268622 -0.597026 Jim 0.492726 4.040412 Travis -0.134082 3.361889
map_series = pd. Series( mapping)
map_series
a red
b red
c blue
d blue
e red
f orange
dtype: object
people. groupby( map_series, axis= 1 ) . count( )
blue red Joe 2 3 Steve 2 3 Wes 1 2 Jim 2 3 Travis 2 3
people. groupby( len ) . sum ( )
a b c d e 3 2.016363 2.449894 0.789601 0.022029 1.141948 5 0.304393 0.326005 0.146350 0.075903 -0.263559 6 2.719235 -0.459516 -0.292250 0.158169 1.102169
key_list = [ 'one' , 'one' , 'one' , 'two' , 'two' ]
people. groupby( [ len , key_list] ) . min ( )
a b c d e 3 one -0.055827 1.631083 0.455609 -1.405327 -0.541199 two 2.034394 0.818811 0.333991 0.158734 1.187207 5 one 0.304393 0.326005 0.146350 0.075903 -0.263559 6 two 2.719235 -0.459516 -0.292250 0.158169 1.102169
columns = pd. MultiIndex. from_arrays( [ [ 'US' , 'US' , 'US' , 'JP' , 'JP' ] ,
[ 1 , 3 , 5 , 1 , 3 ] ] ,
names= [ 'cty' , 'tenor' ] )
hier_df = pd. DataFrame( np. random. randn( 4 , 5 ) , columns= columns)
hier_df
cty US JP tenor 1 3 5 1 3 0 -1.865861 0.145454 1.476689 0.798411 0.546048 1 1.887149 0.218613 0.951165 2.790860 -2.419909 2 -0.681806 -0.490238 -2.215909 0.375548 0.145144 3 0.501322 1.358101 -1.069453 1.916614 -0.018305
hier_df. groupby( level= 'cty' , axis= 1 ) . count( )
df
key1 key2 data1 data2 0 a one 1.364596 0.352792 1 a two 1.685626 0.236429 2 b one -0.537077 -0.018004 3 b two 1.389866 0.826195 4 a one 0.849733 1.619383
grouped = df. groupby( 'key1' )
grouped[ 'data1' ] . quantile( 0.9 )
key1
a 1.621420
b 1.197171
Name: data1, dtype: float64
def peak_to_peak ( arr) :
return arr. max ( ) - arr. min ( )
grouped. agg( peak_to_peak)
data1 data2 key1 a 0.835893 1.382954 b 1.926943 0.844199
grouped. describe( )
data1 data2 count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max key1 a 3.0 1.299985 0.421675 0.849733 1.107165 1.364596 1.525111 1.685626 3.0 0.736201 0.767068 0.236429 0.294610 0.352792 0.986087 1.619383 b 2.0 0.426394 1.362554 -0.537077 -0.055342 0.426394 0.908130 1.389866 2.0 0.404096 0.596939 -0.018004 0.193046 0.404096 0.615145 0.826195
tips = pd. read_csv( 'examples/tips.csv' )
tips[ 'tip_pct' ] = tips[ 'tip' ] / tips[ 'total_bill' ]
tips[ : 6 ]
total_bill tip smoker day time size tip_pct 0 16.99 1.01 No Sun Dinner 2 0.059447 1 10.34 1.66 No Sun Dinner 3 0.160542 2 21.01 3.50 No Sun Dinner 3 0.166587 3 23.68 3.31 No Sun Dinner 2 0.139780 4 24.59 3.61 No Sun Dinner 4 0.146808 5 25.29 4.71 No Sun Dinner 4 0.186240
grouped = tips. groupby( [ 'day' , 'smoker' ] )
grouped_pct = grouped[ 'tip_pct' ]
grouped_pct. agg( 'mean' )
day smoker
Fri No 0.151650
Yes 0.174783
Sat No 0.158048
Yes 0.147906
Sun No 0.160113
Yes 0.187250
Thur No 0.160298
Yes 0.163863
Name: tip_pct, dtype: float64
grouped_pct. agg( [ 'mean' , 'std' , peak_to_peak] )
mean std peak_to_peak day smoker Fri No 0.151650 0.028123 0.067349 Yes 0.174783 0.051293 0.159925 Sat No 0.158048 0.039767 0.235193 Yes 0.147906 0.061375 0.290095 Sun No 0.160113 0.042347 0.193226 Yes 0.187250 0.154134 0.644685 Thur No 0.160298 0.038774 0.193350 Yes 0.163863 0.039389 0.151240
grouped_pct. agg( [ ( 'foo' , 'mean' ) , ( 'bar' , np. std) ] )
foo bar day smoker Fri No 0.151650 0.028123 Yes 0.174783 0.051293 Sat No 0.158048 0.039767 Yes 0.147906 0.061375 Sun No 0.160113 0.042347 Yes 0.187250 0.154134 Thur No 0.160298 0.038774 Yes 0.163863 0.039389
functions = [ 'count' , 'mean' , 'max' ]
result = grouped[ 'tip_pct' , 'total_bill' ] . agg( functions)
result
tip_pct total_bill count mean max count mean max day smoker Fri No 4 0.151650 0.187735 4 18.420000 22.75 Yes 15 0.174783 0.263480 15 16.813333 40.17 Sat No 45 0.158048 0.291990 45 19.661778 48.33 Yes 42 0.147906 0.325733 42 21.276667 50.81 Sun No 57 0.160113 0.252672 57 20.506667 48.17 Yes 19 0.187250 0.710345 19 24.120000 45.35 Thur No 45 0.160298 0.266312 45 17.113111 41.19 Yes 17 0.163863 0.241255 17 19.190588 43.11
ftuples = [ ( 'Durchschnitt' , 'mean' ) , ( 'Abweichung' , np. var) ]
grouped[ 'tip_pct' , 'total_bill' ] . agg( ftuples)
tip_pct total_bill Durchschnitt Abweichung Durchschnitt Abweichung day smoker Fri No 0.151650 0.000791 18.420000 25.596333 Yes 0.174783 0.002631 16.813333 82.562438 Sat No 0.158048 0.001581 19.661778 79.908965 Yes 0.147906 0.003767 21.276667 101.387535 Sun No 0.160113 0.001793 20.506667 66.099980 Yes 0.187250 0.023757 24.120000 109.046044 Thur No 0.160298 0.001503 17.113111 59.625081 Yes 0.163863 0.001551 19.190588 69.808518
grouped. agg( { 'tip' : np. max , 'size' : 'sum' } )
grouped. agg( { 'tip_pct' : [ 'min' , 'max' , 'mean' , 'std' ] ,
'size' : 'sum' } )
tip_pct size min max mean std sum day smoker Fri No 0.120385 0.187735 0.151650 0.028123 9 Yes 0.103555 0.263480 0.174783 0.051293 31 Sat No 0.056797 0.291990 0.158048 0.039767 115 Yes 0.035638 0.325733 0.147906 0.061375 104 Sun No 0.059447 0.252672 0.160113 0.042347 167 Yes 0.065660 0.710345 0.187250 0.154134 49 Thur No 0.072961 0.266312 0.160298 0.038774 112 Yes 0.090014 0.241255 0.163863 0.039389 40
def top ( df, n= 5 , column= 'tip_pct' ) :
return df. sort_values( by= column) [ - n: ]
top( tips, n= 6 )
total_bill tip smoker day time size tip_pct 109 14.31 4.00 Yes Sat Dinner 2 0.279525 183 23.17 6.50 Yes Sun Dinner 4 0.280535 232 11.61 3.39 No Sat Dinner 2 0.291990 67 3.07 1.00 Yes Sat Dinner 1 0.325733 178 9.60 4.00 Yes Sun Dinner 2 0.416667 172 7.25 5.15 Yes Sun Dinner 2 0.710345
tips. groupby( 'smoker' ) . apply ( top)
total_bill tip smoker day time size tip_pct smoker No 88 24.71 5.85 No Thur Lunch 2 0.236746 185 20.69 5.00 No Sun Dinner 5 0.241663 51 10.29 2.60 No Sun Dinner 2 0.252672 149 7.51 2.00 No Thur Lunch 2 0.266312 232 11.61 3.39 No Sat Dinner 2 0.291990 Yes 109 14.31 4.00 Yes Sat Dinner 2 0.279525 183 23.17 6.50 Yes Sun Dinner 4 0.280535 67 3.07 1.00 Yes Sat Dinner 1 0.325733 178 9.60 4.00 Yes Sun Dinner 2 0.416667 172 7.25 5.15 Yes Sun Dinner 2 0.710345
tips. groupby( [ 'smoker' , 'day' ] ) . apply ( top, n= 1 , column= 'total_bill' )
total_bill tip smoker day time size tip_pct smoker day No Fri 94 22.75 3.25 No Fri Dinner 2 0.142857 Sat 212 48.33 9.00 No Sat Dinner 4 0.186220 Sun 156 48.17 5.00 No Sun Dinner 6 0.103799 Thur 142 41.19 5.00 No Thur Lunch 5 0.121389 Yes Fri 95 40.17 4.73 Yes Fri Dinner 4 0.117750 Sat 170 50.81 10.00 Yes Sat Dinner 3 0.196812 Sun 182 45.35 3.50 Yes Sun Dinner 3 0.077178 Thur 197 43.11 5.00 Yes Thur Lunch 4 0.115982
result = tips. groupby( 'smoker' ) [ 'tip_pct' ] . describe( )
result
count mean std min 25% 50% 75% max smoker No 151.0 0.159328 0.039910 0.056797 0.136906 0.155625 0.185014 0.291990 Yes 93.0 0.163196 0.085119 0.035638 0.106771 0.153846 0.195059 0.710345
result. unstack( 'smoker' )
smoker
count No 151.000000
Yes 93.000000
mean No 0.159328
Yes 0.163196
std No 0.039910
Yes 0.085119
min No 0.056797
Yes 0.035638
25% No 0.136906
Yes 0.106771
50% No 0.155625
Yes 0.153846
75% No 0.185014
Yes 0.195059
max No 0.291990
Yes 0.710345
dtype: float64
tips. groupby( 'smoker' , group_keys= False ) . apply ( top)
total_bill tip smoker day time size tip_pct 88 24.71 5.85 No Thur Lunch 2 0.236746 185 20.69 5.00 No Sun Dinner 5 0.241663 51 10.29 2.60 No Sun Dinner 2 0.252672 149 7.51 2.00 No Thur Lunch 2 0.266312 232 11.61 3.39 No Sat Dinner 2 0.291990 109 14.31 4.00 Yes Sat Dinner 2 0.279525 183 23.17 6.50 Yes Sun Dinner 4 0.280535 67 3.07 1.00 Yes Sat Dinner 1 0.325733 178 9.60 4.00 Yes Sun Dinner 2 0.416667 172 7.25 5.15 Yes Sun Dinner 2 0.710345
frame = pd. DataFrame( { 'data1' : np. random. randn( 1000 ) ,
'data2' : np. random. randn( 1000 ) } )
quartiles = pd. cut( frame. data1, 4 )
quartiles[ : 10 ]
0 (-1.415, 0.195]
1 (0.195, 1.805]
2 (-1.415, 0.195]
3 (0.195, 1.805]
4 (-1.415, 0.195]
5 (-1.415, 0.195]
6 (0.195, 1.805]
7 (-1.415, 0.195]
8 (-1.415, 0.195]
9 (-1.415, 0.195]
Name: data1, dtype: category
Categories (4, interval[float64]): [(-3.032, -1.415] < (-1.415, 0.195] < (0.195, 1.805] < (1.805, 3.414]]
def get_stats ( group) :
return { 'min' : group. min ( ) , 'max' : group. max ( ) ,
'count' : group. count( ) , 'mean' : group. mean( ) }
grouped = frame. data2. groupby( quartiles)
grouped. apply ( get_stats) . unstack( )
count max mean min data1 (-3.032, -1.415] 75.0 2.055051 0.217488 -2.419102 (-1.415, 0.195] 511.0 3.134327 0.002096 -2.709974 (0.195, 1.805] 379.0 3.146306 -0.014438 -2.561375 (1.805, 3.414] 35.0 2.209609 0.293813 -1.865174
grouping = pd. qcut( frame. data1, 10 , labels= False )
grouped = frame. data2. groupby( grouping)
grouped. apply ( get_stats) . unstack( )
count max mean min data1 0 100.0 2.055051 0.128959 -2.419102 1 100.0 3.134327 -0.106525 -2.131247 2 100.0 2.178958 0.041848 -1.964788 3 100.0 2.443715 0.176331 -2.587572 4 100.0 2.231973 0.016023 -2.709974 5 100.0 2.432079 -0.082424 -2.189209 6 100.0 3.146306 0.072272 -2.282556 7 100.0 1.831667 -0.011231 -2.538154 8 100.0 2.060099 -0.060095 -2.561375 9 100.0 2.209609 0.046783 -2.049609
s = pd. Series( np. random. randn( 6 ) )
s[ : : 2 ] = np. nan
s
0 NaN
1 -0.162668
2 NaN
3 0.273627
4 NaN
5 0.345890
dtype: float64
s. fillna( s. mean( ) )
0 0.152283
1 -0.162668
2 0.152283
3 0.273627
4 0.152283
5 0.345890
dtype: float64
states = [ 'Ohio' , 'New York' , 'Vermont' , 'Florida' ,
'Oregon' , 'Nevada' , 'California' , 'Idaho' ]
group_key = [ 'East' ] * 4 + [ 'West' ] * 4
data = pd. Series( np. random. randn( 8 ) , index= states)
data
Ohio -1.141165
New York 0.077290
Vermont 1.178992
Florida 1.413521
Oregon -0.840100
Nevada -0.262705
California 0.410535
Idaho -2.736216
dtype: float64
data[ [ 'Vermont' , 'Nevada' , 'Idaho' ] ] = np. nan
data
Ohio -1.141165
New York 0.077290
Vermont NaN
Florida 1.413521
Oregon -0.840100
Nevada NaN
California 0.410535
Idaho NaN
dtype: float64
data. groupby( group_key) . mean( )
East 0.116549
West -0.214783
dtype: float64
fill_mean = lambda g: g. fillna( g. mean( ) )
data. groupby( group_key) . apply ( fill_mean)
Ohio -1.141165
New York 0.077290
Vermont 0.116549
Florida 1.413521
Oregon -0.840100
Nevada -0.214783
California 0.410535
Idaho -0.214783
dtype: float64
suits = [ 'H' , 'S' , 'C' , 'D' ]
card_val = ( list ( range ( 1 , 11 ) ) + [ 10 ] * 3 ) * 4
base_names = [ 'A' ] + list ( range ( 2 , 11 ) ) + [ 'J' , 'K' , 'Q' ]
cards = [ ]
for suit in [ 'H' , 'S' , 'C' , 'D' ] :
cards. extend( str ( num) + suit for num in base_names)
deck = pd. Series( card_val, index= cards)
deck[ : 13 ]
AH 1
2H 2
3H 3
4H 4
5H 5
6H 6
7H 7
8H 8
9H 9
10H 10
JH 10
KH 10
QH 10
dtype: int64
def draw ( deck, n= 5 ) :
return deck. sample( n)
draw( deck)
QH 10
AS 1
2S 2
4C 4
5C 5
dtype: int64
get_suit = lambda card: card[ - 1 ]
deck. groupby( get_suit) . apply ( draw, n= 2 )
C 9C 9
2C 2
D QD 10
10D 10
H 5H 5
KH 10
S 3S 3
2S 2
dtype: int64
df = pd. DataFrame( { 'category' : [ 'a' , 'a' , 'a' , 'a' ,
'b' , 'b' , 'b' , 'b' ] ,
'data' : np. random. randn( 8 ) ,
'weights' : np. random. rand( 8 ) } )
df
category data weights 0 a -0.326019 0.107540 1 a 1.333957 0.220552 2 a -0.055586 0.189389 3 a -0.947162 0.598072 4 b -0.764437 0.505189 5 b 1.204008 0.921015 6 b -1.048168 0.660169 7 b -1.495828 0.525462
grouped = df. groupby( 'category' )
get_wavg = lambda g: np. average( g[ 'data' ] , weights= g[ 'weights' ] )
grouped. apply ( get_wavg)
category
a -0.284928
b -0.289163
dtype: float64
close_px = pd. read_csv( 'examples/stock_px_2.csv' , parse_dates= True ,
index_col= 0 )
close_px. info( )
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2214 entries, 2003-01-02 to 2011-10-14
Data columns (total 4 columns):
AAPL 2214 non-null float64
MSFT 2214 non-null float64
XOM 2214 non-null float64
SPX 2214 non-null float64
dtypes: float64(4)
memory usage: 86.5 KB
close_px[ - 4 : ]
AAPL MSFT XOM SPX 2011-10-11 400.29 27.00 76.27 1195.54 2011-10-12 402.19 26.96 77.16 1207.25 2011-10-13 408.43 27.18 76.37 1203.66 2011-10-14 422.00 27.27 78.11 1224.58
spx_corr = lambda x: x. corrwith( x[ 'SPX' ] )
rets = close_px. pct_change( ) . dropna( )
get_year = lambda x: x. year
by_year = rets. groupby( get_year)
by_year. apply ( spx_corr)
AAPL MSFT XOM SPX 2003 0.541124 0.745174 0.661265 1.0 2004 0.374283 0.588531 0.557742 1.0 2005 0.467540 0.562374 0.631010 1.0 2006 0.428267 0.406126 0.518514 1.0 2007 0.508118 0.658770 0.786264 1.0 2008 0.681434 0.804626 0.828303 1.0 2009 0.707103 0.654902 0.797921 1.0 2010 0.710105 0.730118 0.839057 1.0 2011 0.691931 0.800996 0.859975 1.0
tips. pivot_table( index= [ 'day' , 'smoker' ] )
size tip tip_pct total_bill day smoker Fri No 2.250000 2.812500 0.151650 18.420000 Yes 2.066667 2.714000 0.174783 16.813333 Sat No 2.555556 3.102889 0.158048 19.661778 Yes 2.476190 2.875476 0.147906 21.276667 Sun No 2.929825 3.167895 0.160113 20.506667 Yes 2.578947 3.516842 0.187250 24.120000 Thur No 2.488889 2.673778 0.160298 17.113111 Yes 2.352941 3.030000 0.163863 19.190588
tips. pivot_table( [ 'tip_pct' , 'size' ] , index= [ 'time' , 'day' ] ,
columns= 'smoker' )
size tip_pct smoker No Yes No Yes time day Dinner Fri 2.000000 2.222222 0.139622 0.165347 Sat 2.555556 2.476190 0.158048 0.147906 Sun 2.929825 2.578947 0.160113 0.187250 Thur 2.000000 NaN 0.159744 NaN Lunch Fri 3.000000 1.833333 0.187735 0.188937 Thur 2.500000 2.352941 0.160311 0.163863
tips. pivot_table( [ 'tip_pct' , 'size' ] , index= [ 'time' , 'day' ] ,
columns= 'smoker' , margins= True )
size tip_pct smoker No Yes All No Yes All time day Dinner Fri 2.000000 2.222222 2.166667 0.139622 0.165347 0.158916 Sat 2.555556 2.476190 2.517241 0.158048 0.147906 0.153152 Sun 2.929825 2.578947 2.842105 0.160113 0.187250 0.166897 Thur 2.000000 NaN 2.000000 0.159744 NaN 0.159744 Lunch Fri 3.000000 1.833333 2.000000 0.187735 0.188937 0.188765 Thur 2.500000 2.352941 2.459016 0.160311 0.163863 0.161301 All 2.668874 2.408602 2.569672 0.159328 0.163196 0.160803
tips. pivot_table( 'tip_pct' , index= [ 'time' , 'smoker' ] , columns= 'day' ,
aggfunc= len , margins= True )
day Fri Sat Sun Thur All time smoker Dinner No 3.0 45.0 57.0 1.0 106.0 Yes 9.0 42.0 19.0 NaN 70.0 Lunch No 1.0 NaN NaN 44.0 45.0 Yes 6.0 NaN NaN 17.0 23.0 All 19.0 87.0 76.0 62.0 244.0
tips. pivot_table( 'tip_pct' , index= [ 'time' , 'size' , 'smoker' ] ,
columns= 'day' , aggfunc= 'mean' , fill_value= 0 )
day Fri Sat Sun Thur time size smoker Dinner 1 No 0.000000 0.137931 0.000000 0.000000 Yes 0.000000 0.325733 0.000000 0.000000 2 No 0.139622 0.162705 0.168859 0.159744 Yes 0.171297 0.148668 0.207893 0.000000 3 No 0.000000 0.154661 0.152663 0.000000 Yes 0.000000 0.144995 0.152660 0.000000 4 No 0.000000 0.150096 0.148143 0.000000 Yes 0.117750 0.124515 0.193370 0.000000 5 No 0.000000 0.000000 0.206928 0.000000 Yes 0.000000 0.106572 0.065660 0.000000 6 No 0.000000 0.000000 0.103799 0.000000 Lunch 1 No 0.000000 0.000000 0.000000 0.181728 Yes 0.223776 0.000000 0.000000 0.000000 2 No 0.000000 0.000000 0.000000 0.166005 Yes 0.181969 0.000000 0.000000 0.158843 3 No 0.187735 0.000000 0.000000 0.084246 Yes 0.000000 0.000000 0.000000 0.204952 4 No 0.000000 0.000000 0.000000 0.138919 Yes 0.000000 0.000000 0.000000 0.155410 5 No 0.000000 0.000000 0.000000 0.121389 6 No 0.000000 0.000000 0.000000 0.173706