import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
1. pandas简介
s=pd.Series(data=[1,3,6,np.nan,44,1])
dates=pd.date_range("20160101",periods=6)
df=pd.DataFrame(data=np.random.randn(6,4),
index=dates,columns=["a","b","c","d"])
df.index
df.columns
df.values
df.describe()
df.T
df.sort_index(axis=1,ascending=False)
df.sort_index(axis=0,ascending=False)
df.sort_values(by=["d","a"],ascending=True)
2. 选择数据
dates=pd.date_range("20160101","20160106")
df=pd.DataFrame(data=np.arange(24).reshape((6,4)),index=dates,columns=["A","B","C","D"])
df["A"]
df.A
df[0:3]
df.loc["20160101",:]
df.loc[:,["A","B"]]
df.iloc[[1,3,5],1:3]
df[df.A>8]
3. 设置值
dates=pd.date_range("20160101","20160106")
df=pd.DataFrame(data=np.arange(24).reshape((6,4)),index=dates,columns=["A","B","C","D"])
df.iloc[1,1]=111
df.loc["20160101","A"]=222
df.A[df.A>4]=0
df['E']=np.nan
df["F"]=pd.Series(data=[1,2,3,4,5,6],index=dates)
4. 处理丢失数列
dates=pd.date_range("20160101","20160106")
df=pd.DataFrame(data=np.arange(24).reshape((6,4)),index=dates,columns=["A","B","C","D"])
df.iloc[0,1]=np.nan
df.iloc[1,2]=np.nan
df.dropna(axis=0,how="any")
df.dropna(axis=1,how="any")
df.fillna(value=0)
df.isnull().sum()
5. 导入导出
'''
sep 分隔符
encoding="utf-8"
index_col 指定源文件的某一列为index
'''
df=pd.read_csv(filepath_or_buffer="student.csv",sep=",",encoding="utf-8")
df.columns=[]
df.to_csv(filepath_or_buffer="",encoding="utf-8")
6. df合并
df1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2=pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3=pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
pd.concat((df1,df2,df3),axis=0,ignore_index=True)
df1=pd.DataFrame(data=np.ones((3,4))*0,columns=['a','b','c','d'])
df2=pd.DataFrame(data=np.ones((3,4))*1,columns=['b','c','d','e'])
pd.concat((df1,df2),join='inner',ignore_index=True)
pd.concat((df1,df2),join='outer',ignore_index=True)
df1.append(df2,ignore_index=True)
left=pd.DataFrame({
"key":["K0","K1","K2","K3"],
"A":["A0","A1","A2","A3"],
"B":["B0","B1","B2","B3"]
})
right=pd.DataFrame({
"key":["K0","K1","K2","K3"],
"C":["C0","C1","C2","C3"],
"D":["D0","D1","D2","D3"]
})
pd.merge(left=left, right=right, on="key")
pd.merge(left=left,
right=right,
left_on="key",
right_on="key",
how="inner")
pd.merge(left=left,
right=right,
left_index=True,
right_index=True)
7. plot画图
data=pd.DataFrame(
data=np.random.randn(1000,4),
columns=["A","B","C","D"])
data["A"].plot()
plt.show()
data.plot.scatter("A","B")
plt.show()