import pandas as pd import numpy as np #读取文件 data = pd.read_csv("G:\data\LoanStats_2016Q2\LoanStats_2016Q2_3.csv",low_memory=True) #将python的list转成DataFrame name = ['王家卫','周星驰','徐克'] id = ['001','002','003'] c={"name": name,"id": id} data2 = pd.DataFrame(c) print(data2.name) #pandas.core.frame.DataFrame print(type(data2.name)) #<class 'pandas.core.series.Series'> #DataFrame转list data3 = np.array(data2) print(type(data3)) #类型:class 'numpy.ndarray' print(type(data3.tolist()))#<class 'list'> print(type([1,2,3]))#<class 'list'> #如何从dataFrame中取特定位置上的数据 print(data.head(5)["total_acc"])#取totalArray中的前5行数据进行展示 print(data.tail(5)["total_acc"])#取totalArray中的后5行数据进行展示 print("---------------------------------------------------") #查看DataFrame中所有的列名 print(type(data.columns))#<class 'pandas.core.indexes.base.Index'> print(data["total_acc"].value_counts())#统计total_acc列中每一种值出现的频率 print(type(data["total_acc"].value_counts()))#<class 'pandas.core.series.Series'> #series转DataFrame print(type(data["total_acc"].value_counts().to_frame()))#<class 'pandas.core.frame.DataFrame'> print(type(data2.name.to_frame())) #查看DataFrame中的行列数 print(data.shape)#(45915, 286) #查看Series去重后的结果 print(data["total_acc"].unique())#查看total_acc这列去重后的结果 print("----------------------------------------------------------") #查看total_acc这列小于100的数 ststic = data["total_acc"].value_counts().to_frame() print(ststic.columns) aa = ststic.total_acc>1600 print(ststic[aa]) #如何遍历dataFrame for index,row in data2.iterrows(): print(type(row))#<class 'pandas.core.series.Series'> #如何遍历Series print("-------------------------------------------------") for i,v in data2["name"].iteritems(): print(v) #series排序 print("-------------------------------------------------") print(data["total_acc"].sort_values())#根据值的大小排序 print("-------------------------------------------------") print(data["total_acc"].sort_index())#根据索引的大小排序,其实就是没有排序 print("-------------------------------------------------") print(ststic["total_acc"].sort_values()) print(ststic["total_acc"].sort_index())#统计频率后形成dataFrame的sort_index是有意义的
python-panda使用日记
最新推荐文章于 2023-01-02 22:29:24 发布