目录
1.Sorting dataframes
2.Merging dataframes-concat()
1.Sorting dataframes
import pandas as pd
import numpy as np
data_BM = pd. read_csv( "bigmart_data.csv" )
data_BM = data_BM. dropna( how = "any" )
data_BM. head( )
Item_Identifier Item_Weight Item_Fat_Content Item_Visibility Item_Type Item_MRP Outlet_Identifier Outlet_Establishment_Year Outlet_Size Outlet_Location_Type Outlet_Type Item_Outlet_Sales
0 FDA15 9.300 Low Fat 0.016047 Dairy 249.8092 OUT049 1999 Medium Tier 1 Supermarket Type1 3735.1380
1 DRC01 5.920 Regular 0.019278 Soft Drinks 48.2692 OUT018 2009 Medium Tier 3 Supermarket Type2 443.4228
2 FDN15 17.500 Low Fat 0.016760 Meat 141.6180 OUT049 1999 Medium Tier 1 Supermarket Type1 2097.2700
4 NCD19 8.930 Low Fat 0.000000 Household 53.8614 OUT013 1987 High Tier 3 Supermarket Type1 994.7052
5 FDP36 10.395 Regular 0.000000 Baking Goods 51.4008 OUT018 2009 Medium Tier 3 Supermarket Type2 556.6088
sorted_data = data_BM. sort_values( by = 'Outlet_Establishment_Year' )
sorted_data. head( )
Item_Identifier Item_Weight Item_Fat_Content Item_Visibility Item_Type Item_MRP Outlet_Identifier Outlet_Establishment_Year Outlet_Size Outlet_Location_Type Outlet_Type Item_Outlet_Sales
2812 FDR60 14.30 Low Fat 0.130307 Baking Goods 75.7328 OUT013 1987 High Tier 3 Supermarket Type1 617.8624
5938 NCJ06 20.10 Low Fat 0.034624 Household 118.9782 OUT013 1987 High Tier 3 Supermarket Type1 1549.3166
3867 FDY38 13.60 Regular 0.119077 Dairy 231.2300 OUT013 1987 High Tier 3 Supermarket Type1 2330.3000
1307 FDB37 20.25 Regular 0.022922 Baking Goods 240.7538 OUT013 1987 High Tier 3 Supermarket Type1 3364.9532
5930 NCA18 10.10 Low Fat 0.056031 Household 115.1492 OUT013 1987 High Tier 3 Supermarket Type1 1737.7380
data_BM. sort_values( by = 'Outlet_Establishment_Year' , asending = False , inplace = True )
data_BM. head( )
Item_Identifier Item_Weight Item_Fat_Content Item_Visibility Item_Type Item_MRP Outlet_Identifier Outlet_Establishment_Year Outlet_Size Outlet_Location_Type Outlet_Type Item_Outlet_Sales
2825 FDL16 12.850 Low Fat 0.169139 Frozen Foods 46.4060 OUT018 2009 Medium Tier 3 Supermarket Type2 186.4240
6319 FDP58 11.100 Low Fat 0.135693 Snack Foods 220.0482 OUT018 2009 Medium Tier 3 Supermarket Type2 2628.5784
6347 FDR12 12.600 Regular 0.031663 Baking Goods 173.2764 OUT018 2009 Medium Tier 3 Supermarket Type2 1030.6584
4274 FDR47 17.850 Low Fat 0.087825 Breads 196.0794 OUT018 2009 Medium Tier 3 Supermarket Type2 1755.7146
4275 FDJ10 5.095 Regular 0.130031 Snack Foods 142.4838 OUT018 2009 Medium Tier 3 Supermarket Type2 561.9352
data_BM. sort_index( inplace = True )
data_BM. head( )
Item_Identifier Item_Weight Item_Fat_Content Item_Visibility Item_Type Item_MRP Outlet_Identifier Outlet_Establishment_Year Outlet_Size Outlet_Location_Type Outlet_Type Item_Outlet_Sales
0 FDA15 9.300 Low Fat 0.016047 Dairy 249.8092 OUT049 1999 Medium Tier 1 Supermarket Type1 3735.1380
1 DRC01 5.920 Regular 0.019278 Soft Drinks 48.2692 OUT018 2009 Medium Tier 3 Supermarket Type2 443.4228
2 FDN15 17.500 Low Fat 0.016760 Meat 141.6180 OUT049 1999 Medium Tier 1 Supermarket Type1 2097.2700
4 NCD19 8.930 Low Fat 0.000000 Household 53.8614 OUT013 1987 High Tier 3 Supermarket Type1 994.7052
5 FDP36 10.395 Regular 0.000000 Baking Goods 51.4008 OUT018 2009 Medium Tier 3 Supermarket Type2 556.6088
2.Merging dataframes-concat()
import pandas as pd
df1 = pd. DataFrame( { 'A' : [ 'A0' , 'A1' , 'A2' , 'A3' ] ,
'B' : [ 'B0' , 'B1' , 'B2' , 'B3' ] ,
'C' : [ 'C0' , 'C1' , 'C2' , 'C3' ] ,
'D' : [ 'D0' , 'D1' , 'D2' , 'D3' ] } ,
index= [ 0 , 1 , 2 , 3 ] )
df2 = pd. DataFrame( { 'A' : [ 'A4' , 'A5' , 'A6' , 'A7' ] ,
'B' : [ 'B4' , 'B5' , 'B6' , 'B7' ] ,
'C' : [ 'C4' , 'C5' , 'C6' , 'C7' ] ,
'D' : [ 'D4' , 'D5' , 'D6' , 'D7' ] } ,
index= [ 4 , 5 , 6 , 7 ] )
df3 = pd. DataFrame( { 'A' : [ 'A8' , 'A9' , 'A10' , 'A11' ] ,
'B' : [ 'B8' , 'B9' , 'B10' , 'B11' ] ,
'C' : [ 'C8' , 'C9' , 'C10' , 'C11' ] ,
'D' : [ 'D8' , 'D9' , 'D10' , 'D11' ] } ,
index= [ 8 , 9 , 10 , 11 ] )
result = pd. concat( [ df2, df2, df3] )
result
A B C D
0 A0 B0 C0 D0
1 A1 B1 C1 D1
2 A2 B2 C2 D2
3 A3 B3 C3 D3
4 A4 B4 C4 D4
5 A5 B5 C5 D5
6 A6 B6 C6 D6
7 A7 B7 C7 D7
8 A8 B8 C8 D8
9 A9 B9 C9 D9
10 A10 B10 C10 D10
11 A11 B11 C11 D11
reult1 = pd. concat( [ df1, df2, df3] , keys = [ 'x' , 'y' , 'z' ] )
result1
A B C D
x 0 A0 B0 C0 D0
1 A1 B1 C1 D1
2 A2 B2 C2 D2
3 A3 B3 C3 D3
y 4 A4 B4 C4 D4
5 A5 B5 C5 D5
6 A6 B6 C6 D6
7 A7 B7 C7 D7
z 8 A8 B8 C8 D8
9 A9 B9 C9 D9
10 A10 B10 C10 D10
11 A11 B11 C11 D11
result1. loc[ 'x' ]
A B C D
0 A0 B0 C0 D0
1 A1 B1 C1 D1
2 A2 B2 C2 D2
3 A3 B3 C3 D3
df4 = pd. DataFrame( { 'B' : [ 'B2' , 'B3' , 'B6' , 'B7' ] ,
'D' : [ 'D2' , 'D3' , 'D6' , 'D7' ] ,
'F' : [ 'F2' , 'F3' , 'F6' , 'F7' ] } ,
index= [ 2 , 3 , 6 , 7 ] )
result = pd. concat( [ df1, df4] , axis = 1 , sort = False )
result
A B C D B D F
0 A0 B0 C0 D0 NaN NaN NaN
1 A1 B1 C1 D1 NaN NaN NaN
2 A2 B2 C2 D2 B2 D2 F2
3 A3 B3 C3 D3 B3 D3 F3
6 NaN NaN NaN NaN B6 D6 F6
7 NaN NaN NaN NaN B7 D7 F7
result = pd. concat( [ df1, df4] , axis = 0 , sort = False )
result
A B C D F
0 A0 B0 C0 D0 NaN
1 A1 B1 C1 D1 NaN
2 A2 B2 C2 D2 NaN
3 A3 B3 C3 D3 NaN
2 NaN B2 NaN D2 F2
3 NaN B3 NaN D3 F3
6 NaN B6 NaN D6 F6
7 NaN B7 NaN D7 F7
result = pd. concat( [ df1, df4] , axis = 1 , sort = True )
result
A B C D B D F
0 A0 B0 C0 D0 NaN NaN NaN
1 A1 B1 C1 D1 NaN NaN NaN
2 A2 B2 C2 D2 B2 D2 F2
3 A3 B3 C3 D3 B3 D3 F3
6 NaN NaN NaN NaN B6 D6 F6
7 NaN NaN NaN NaN B7 D7 F7
result = pd. concat( [ df1, df4] , axis= 1 , join= 'inner' )
result
A B C D B D F
2 A2 B2 C2 D2 B2 D2 F2
3 A3 B3 C3 D3 B3 D3 F3
result = pd. concat( [ df1, df4] , axis= 1 , join_axes= [ df1. index] )
result
A B C D B D F
0 A0 B0 C0 D0 NaN NaN NaN
1 A1 B1 C1 D1 NaN NaN NaN
2 A2 B2 C2 D2 B2 D2 F2
3 A3 B3 C3 D3 B3 D3 F3