1、concat合并数据
import numpy as np
import pandas as pd
df1 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
df2 = pd.DataFrame(np.ones((3,4))*2, columns=['A','B','C','D'])
df3 = pd.DataFrame(np.ones((3,4))*3, columns=['A','B','C','D'])
print(df1)
print(df2)
print(df3)
A B C D
0 1.0 1.0 1.0 1.0
1 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0
A B C D
0 2.0 2.0 2.0 2.0
1 2.0 2.0 2.0 2.0
2 2.0 2.0 2.0 2.0
A B C D
0 3.0 3.0 3.0 3.0
1 3.0 3.0 3.0 3.0
2 3.0 3.0 3.0 3.0
pd.concat([df1,df2,df3], axis=0, ignore_index=True) #注意添加[],axis=0表示竖向,忽略原索引,会产生新索引
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
A | B | C | D | |
---|---|---|---|---|
0 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | 2.0 | 2.0 | 2.0 | 2.0 |
4 | 2.0 | 2.0 | 2.0 | 2.0 |
5 | 2.0 | 2.0 | 2.0 | 2.0 |
6 | 3.0 | 3.0 | 3.0 | 3.0 |
7 | 3.0 | 3.0 | 3.0 | 3.0 |
8 | 3.0 | 3.0 | 3.0 | 3.0 |
df4 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
df5 = pd.DataFrame(np.ones((3,4))*2, columns=['B','C','D','E'])
print(df4)
print(df5)
A B C D
0 1.0 1.0 1.0 1.0
1 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0
B C D E
0 2.0 2.0 2.0 2.0
1 2.0 2.0 2.0 2.0
2 2.0 2.0 2.0 2.0
pd.concat([df4,df5], join='outer', ignore_index=True) #join的inner参数取交集,outer取并集
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
A | B | C | D | E | |
---|---|---|---|---|---|
0 | 1.0 | 1.0 | 1.0 | 1.0 | NaN |
1 | 1.0 | 1.0 | 1.0 | 1.0 | NaN |
2 | 1.0 | 1.0 | 1.0 | 1.0 | NaN |
3 | NaN | 2.0 | 2.0 | 2.0 | 2.0 |
4 | NaN | 2.0 | 2.0 | 2.0 | 2.0 |
5 | NaN | 2.0 | 2.0 | 2.0 | 2.0 |
df6 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'], index=[1,2,3])
df7 = pd.DataFrame(np.ones((3,4))*2, columns=['B','C','D','E'], index=[2,3,4])
pd.concat([df6,df7], axis=1, join_axes=[df6.index]) #若不加join_axes,行索引会有1,2,3,4
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
A | B | C | D | B | C | D | E | |
---|---|---|---|---|---|---|---|---|
1 | 1.0 | 1.0 | 1.0 | 1.0 | NaN | NaN | NaN | NaN |
2 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | 2.0 | 2.0 |
3 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | 2.0 | 2.0 |
2、append合并数据
df8 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
s1 = pd.Series([1,2,3,4], index=['A','B','C','D'])
df8.append(s1, ignore_index=True) #相当于在一个表里添加一条记录
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
A | B | C | D | |
---|---|---|---|---|
0 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | 1.0 | 2.0 | 3.0 | 4.0 |
df9 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
df10 = pd.DataFrame(np.ones((3,4))*2, columns=['A','B','C','D'])
df9.append(df10, ignore_index=True) #合并两个表结构相同的表
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
A | B | C | D | |
---|---|---|---|---|
0 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | 2.0 | 2.0 | 2.0 | 2.0 |
4 | 2.0 | 2.0 | 2.0 | 2.0 |
5 | 2.0 | 2.0 | 2.0 | 2.0 |
3、merge合并数据(更复杂)
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],
'key2':['K0','K1','K0','K1'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],
'key2':['K0','K0','K0','K0'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']})
print(left)
print(right)
A B key1 key2
0 A0 B0 K0 K0
1 A1 B1 K0 K1
2 A2 B2 K1 K0
3 A3 B3 K2 K1
C D key1 key2
0 C0 D0 K0 K0
1 C1 D1 K1 K0
2 C2 D2 K1 K0
3 C3 D3 K2 K0
pd.merge(left, right, on=['key1','key2'], how='inner') #取key1, key2组合的交集,outer取并集,left基于left数据的的key来合并,right同理
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
A | B | key1 | key2 | C | D | |
---|---|---|---|---|---|---|
0 | A0 | B0 | K0 | K0 | C0 | D0 |
1 | A2 | B2 | K1 | K0 | C1 | D1 |
2 | A2 | B2 | K1 | K0 | C2 | D2 |
#有时DataFrame中的连接键位于其索引中,可以传入left_index=True或right_index=True, 表明索引应该被用作连接键。
left1 = pd.DataFrame({'key':['a','a','b','c'],
'v1':range(4)})
right1 = pd.DataFrame({'v2':[9,8]}, index=['a','b'])
print(left1)
print(right1)
key v1
0 a 0
1 a 1
2 b 2
3 c 3
v2
a 9
b 8
pd.merge(left1, right1, left_on='key', right_index=True) #表示合并两个数据,连接键以left1的key为主;但right1的索引也应该被作为连接键。
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
key | v1 | v2 | |
---|---|---|---|
0 | a | 0 | 9 |
1 | a | 1 | 9 |
2 | b | 2 | 8 |
#连接键重名的处理方式
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})
pd.merge(boys, girls, on='k', suffixes=['_boys','_girls'], how='outer')
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
age_boys | k | age_girls | |
---|---|---|---|
0 | 1.0 | K0 | 4.0 |
1 | 1.0 | K0 | 5.0 |
2 | 2.0 | K1 | NaN |
3 | 3.0 | K2 | NaN |
4 | NaN | K3 | 6.0 |