import pandas as pd
data1=pd.DataFrame({"k1":["one",'two']*3+["two"],"k2":[1,1,2,3,3,4,4]})
# print(data1)
data2=data1.duplicated()
print(data1)
print(data2)
data3=data1.drop_duplicates()
print(data3)
data1["k3"]=range(7)
data4=data1.drop_duplicates(["k1"])
print(data4)
data5=data1.drop_duplicates(["k1","k2"],keep="last")
print(data5)
answer:
data1
k1 k2
0 one 1
1 two 1
2 one 2
3 two 3
4 one 3
5 two 4
6 two 4
data2
0 False
1 False
2 False
3 False
4 False
5 False
6 True
dtype: bool
data3
k1 k2
0 one 1
1 two 1
2 one 2
3 two 3
4 one 3
5 two 4
data4
k1 k2 k3
0 one 1 0
1 two 1 1
data5
k1 k2 k3
0 one 1 0
1 two 1 1
2 one 2 2
3 two 3 3
4 one 3 4
6 two 4 6
[/code]
利用函数或映射进行数据转换
