# -*- coding: utf-8 -*-
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
'''
移除重复数据
'''
data=DataFrame({'k1':['one']*4+['two']*3,'k2':[1,1,2,3,3,4,4]})
print(data)
# k1 k2
# 0 one 1
# 1 one 1
# 2 one 2
# 3 one 3
# 4 two 3
# 5 two 4
# 6 two 4
#判断是否是重复行
print(data.duplicated())
# 0 False
# 1 True
# 2 False
# 3 False
# 4 False
# 5 False
# 6 True
# dtype: bool
#返回去重行
print(data.drop_duplicates())
# k1 k2
# 0 one 1
# 2 one 2
# 3 one 3
# 4 two 3
# 5 two 4
#重新赋值一列
data['v1']=range(7)
print(data)
# k1 k2 v1
# 0 one 1 0
# 1 one 1 1
# 2 one 2 2
# 3 one 3 3
# 4 two 3 4
# 5 two 4 5
# 6 two 4 6
#指定重复列
print(data.drop_duplicates(['k1']))
# k1 k2 v1
# 0 one 1 0
# 4 two 3 4
#传入take_last保存最后一个
print(data.drop_duplicates(['k1','k2'],keep='last'))
# k1 k2 v1
# 1 one 1 1
# 2 one 2 2
# 3 one 3 3
# 4 two 3 4
# 6 two 4 6
'''
利用函数或者映射进行数据转换
'''
data=DataFrame({'key':['北京','上海','广州','深圳','上海','广州','深圳'],'value':[11,22,33,44,66,77,88]})
#编写映射文档
ys={'北京':'烤鸭','上海':'娘娘腔','广州':'叶问','深圳':'腾讯'}
data['logo']=data['key'].map(ys)
print(data)
# key value logo
# 0 北京 11 烤鸭
# 1 上海 22 娘娘腔
# 2 广州 33 叶问
# 3 深圳 44 腾讯
# 4 上海 66 娘娘腔
# 5 广州 77 叶问
# 6 深圳 88 腾讯
#lambda函数
data['logo']=data['key'].map(lambda x:ys[x])
print(data)
# key value logo
# 0 北京 11 烤鸭
# 1 上海 22 娘娘腔
# 2 广州 33 叶问
# 3 深圳 44 腾讯
# 4 上海 66 娘娘腔
# 5 广州 77 叶问
# 6 深圳 88 腾讯
'''
替换值replace
'''
data2=Series([1,2,3,5,4,6,7])
print(data2)
print(data2.replace([1],'换1'))
print(data2.replace([1,2],['换1','换2']))
print(data2.replace({2:'huan',3:000}))
'''
修改索引名称
'''
data=DataFrame(np.arange(12).reshape(3,4),index=['Hao','Haong','Bo'],columns=list('name'))
print(data)
#Index(['HAO', 'HAONG', 'BO'], dtype='object')
print(data.index.map(str.upper))#获取索引名,将索引名字母大写uppper后面没有括号
#赋值给索引
data.index=data.index.map(str.upper)
print(data)
# n a m e
# HAO 0 1 2 3
# HAONG 4 5 6 7
# BO 8 9 10 11
#修改列名
print(data.rename(index=str.title,columns=str.upper))
# N A M E
# Hao 0 1 2 3
# Haong 4 5 6 7
# Bo 8 9 10 11
print(data.rename(index={'HAO':'郝'},columns={'n':'NN'}))
# NN a m e
# 郝 0 1 2 3
# HAONG 4 5 6 7
# BO 8 9 10 11
python数据分析十五:pandas矩阵数据的删除重复数据和重命名(duplicated、rename)
最新推荐文章于 2024-12-18 14:34:44 发布