内容较多可使用Ctrl+f搜索,大部分常用的应该有,都是本人使用过的
df = pd.DataFrame(data = np.random.randint(0,150,size = (5,3)),
columns=['Python','En','Math'],
index = list('ABCDE'),dtype=np.float32)
df = pd.DataFrame(data = {'Python':np.random.randint(100,150,size = 5),
'En':np.random.randint(0,150,size = 5),
'Math':np.random.randint(0,150,size = 5)},index = list('ABCDE'))
df.sort_index(ascending = False)
df.head(10)
df.tail()
df.shape
df.dtypes
df.index
df.columns
df.values
df.describe()
df.info()
df2.mean()
df2.std()
df.isnull()
df.dropna()
df.fillna(1024)
df.take(index)
df.median()
df.quantile(q = [0.25,0.5,0.75])
df.cummin()
df.diff()
df.drop(labels = [0,1,5],axis = 0)
df.drop(labels=[0,1,3,5],axis = 0,inplace=True)
df.to_csv('./salary.csv',
sep = ',',
header = True,
index = True)
pd.read_csv('./salary.csv',
sep = ',',
header = [0],
index_col=0)
df1.to_excel('./salary.xlsx',
sheet_name = 'salary',
header = True,
index = False)
pd.read_excel('./salary.xlsx',
sheet_name='salary',
header = 0,
names = list('ABCDE'),
index_col = 3)
with pd.ExcelWriter('./data.xlsx') as writer:
df1.to_excel(writer,sheet_name='salary',index = False)
df2.to_excel(writer,sheet_name='score',index = False)
pd.read_excel('./data.xlsx',
sheet_name='score')
from sqlalchemy import create_engine
conn = create_engine('mysql+pymysql://root:root@localhost/pandas?charset=UTF8MB4')
df.to_sql('data',
conn,index = False,if_exists = 'append')
df['Python']
df[['Python','Math']]
df2.loc['A']
df2.loc[['A','D']]
pd.concat([df1,df2],axis = 0)
pd.merge(df1,df2,)
pd.merge(df1,df3,left_on = 'name',right_on = '名字')
df.rename(index = {'A':'AA','B':'BB'},columns = {'Python':'人工智能'})
df.replace({'Python':6},-1024)
df['Keras'].map({9:'Hello',5:'World',np.NaN:'AI'})
df.T
df.unstack(level = -1)
df.stack(level = 1)
df.sum(level = -1)
df.mean(level = 1)
df3['Python','期中']['A']
df2['Python','期中']
df['Tensorflow'].argmin()
df['Keras'].argmax()
df.idxmax()
df.idxmin()
df['Python'].value_counts()
df['Python'].unique()
df.cov()
df['Python'].cov(df['Keras'])
df.corr()
df.corrwith(df['Tensorflow'])
df.nlargest(10,columns='Keras')
df.nsmallest(5,columns='Python')
df.sort_index(axis = 0,ascending=False)
df.sort_index(axis = 1,ascending=False)
df.sort_values(by = ['Python'])
df.sort_values(by = ['Python','Keras'])
pd.qcut(df.Python,q = 4,
labels=['差','中','良','优'])
pd.cut(df.Python,bins = 3)
df.groupby(by = ['class','sex'])[['Tensorflow','Keras']].agg([np.max,np.min,pd.Series.count])
df.pivot_table(values=['Python','Keras','Tensorflow'],
index=['class','sex'],
aggfunc={'Python':[('最大值',np.max)],
'Keras':[('最小值',np.min),('中位数',np.median)],
'Tensorflow':[('最小值',np.min),('平均值',np.mean),('计数',count)]})
df['姓名'].str.extract(r'.*?(\d+)')
map,apply,applymap
- map()是pandas.series.map()方法,对每个值进行映射操作。
- apply()是DF的方法, 对DF中的数据按行/列应用func操作,也可以单独对Series应用func操作。
- applymap()也是DF的方法, 对整个DF所有元素应用func操作。
get_dummies

df=pd.get_dummies(df,columns=["color"])
print(df)
