Matplotlib
1.基础操作
1.绘制基本图像
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-3,3,50)
y1 = 2*x+1
y2 = x**2
plt.figure()
plt.plot(x,y1)
plt.figure(num=3,figsize=(8,5))
plt.plot(x,y2)
plt.plot(x,y1,color='red',linewidth=1.0,linestyle='--' ) # 设置y1曲线的颜色、宽度、形状
plt.show()
2.设置坐标轴范围、标签及原点位置
plt.xlim((-1,2)) # 设置x轴坐标范围
plt.xlim((-2,3)) # 设置y轴坐标范围
plt.xlabel('xxx') # 设置x轴标题
plt.ylabel('yyy') # 设置y轴标题
new_ticks = np.linspace(-1,2,5)
plt.xticks(new_ticks) # 重新设置x轴刻度
plt.yticks([-3,-2,-1,-1.5,3],
['really bad',r'$bad$',r'$normal\ \alpha$',r'$good$','$really\ good$'])
# 设置y轴刻度为文字,正则表达式设置字体r'$xx$'
# \alpha LaTex语法
# 设置坐标原点
ax = plt.gca() # 得到边框
ax.spines['right'].set_color('none') # 取消右边边框
ax.spines['top'].set_color('none') # 取消顶部边框
ax.xaxis.set_ticks_position('bottom') # 设置默认x轴
ax.yaxis.set_ticks_position('left') # 设置默认y轴
ax.spines['bottom'].set_position(('data',0))
ax.spines['left'].set_position(('data',0))
3.设置图例
l1, = plt.plot(x,y2,label='y2')
l2, = plt.plot(x,y1,color='red',linewidth=1.0,linestyle='--',label='y1')
plt.legend(handles=[l1,l2,],labels=['label1','label2'],loc='best')
4.标注
# 添加文本,设置添加位置、形式
# 方法1
plt.annotate(r'$2x+1=%s$' % y0,xy=(x0,y0),xycoords='data',
xytext=(+30,-30),textcoords='offset points',
fontsize=16,arrowprops=dict(arrowstyle='->',connectionstyle='arc3,rad=0.2'))
# 方法2
plt.text(0.5,-3,r'$This\ is\ the\ text.\mu\ \sigma_i\ \alpha_t$',
fontdict={'size':16,'color':'r'})
5.散点图
n = 1024
X = np.random.normal(0,1,n)
Y = np.random.normal(0,1,n)
T = np.arctan2(Y,X)
plt.scatter(X,Y,s=75,c=T,alpha=0.5)
plt.xlim((-1.5,1.5))
plt.ylim((-1.5,1.5))
plt.show()
6.绘制子图
plt.figure()
plt.subplot(2,2,1) # 2行2列,开始绘制第1个图
plt.plot([0,1],[0,1])
plt.subplot(2,2,2) # 2行2列,开始绘制第1个图
plt.plot([0,1],[0,1])
plt.subplot(2,2,3) # 2行2列,开始绘制第1个图
plt.plot([0,1],[0,1])
plt.subplot(2,2,4) # 2行2列,开始绘制第1个图
plt.plot([0,1],[0,1])
plt.show()
分格显示
# 方法1:plt.subplot2grid()
ax1 = plt.subplot2grid((3,3),(0,0),colspan=3,rowspan=1)
ax1.plot([1,2],[1,2])
ax1.set_title('ax1_title')
ax2 = plt.subplot2grid((3,3),(1,0),colspan=2)
ax3 = plt.subplot2grid((3,3),(1,2),rowspan=2)
ax4 = plt.subplot2grid((3,3),(2,0))
ax5 = plt.subplot2grid((3,3),(2,1))
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
# 方法2:gridspec
gs = gridspec.GridSpec(3,3)
ax1 = plt.subplot(gs[0,:])
ax2 = plt.subplot(gs[1,:2])
ax3 = plt.subplot(gs[1:,2])
ax4 = plt.subplot(gs[-1,0])
ax5 = plt.subplot(gs[-1,-2])
# 方法3
import matplotlib.pyplot as plt
f,((ax11,ax12),(ax21,ax22)) = plt.subplots(2,2,sharex=True,sharey=True)
ax11.scatter([1,2],[1,2])
plt.tight_layout()
plt.show()
Numpy
1.基础操作
-
生成array
import numpy as np # 生成array a1 = np.array([2,3,4]) a2 = np.array([[2,3,4], [5,6,7]]) # 2*3 矩阵 a3 = np.zeros((3,4)) # 3*4 全0矩阵 a4 = np.ones((3,4)) # 3*4 全1矩阵 a5 = np.arange(1,10,2) # 类似range() a6 = np.arange(12).reshape((3,4))# 改变形状 -
运算
import numpy as np a = np.array([1,2,3,4]) b = np.arange(4) # a与b进行+/-/*,都是逐个对应运算 # np.sin(),np.cos(),... c = np.array([[2,3], [5,6]]) d = np.arange(4).reshape((2,2)) result1 = c*d # 对应元素相乘 result2 = np.dot(c,d) # 矩阵乘法 result3 = c.dot(d) # 矩阵乘法 -
索引
np.sum(), np.min(), np.max() np.transpose(c) # 转置,c.T c[1,1],c[1][1],c[1,1:3] # 索引 c.flatten() # 转为1行 np.argmin(c) # 最小值索引 np.sort() # 排序 np.clip() np.mean(c,axis=0) # 计算平均值,axis=0表示列,1表示行 -
矩阵合并
A = np.array[1,1,1] B = np.array[2,2,2] C = np.vstack((A,B)) # vertical stack,C:2*3 D = np.hstack((A,B)) # horizontal stack,D:1*6 -
矩阵分割
A = np.arange(12).reshape((3,4)) np.split(A,2,axia=1) -
deep copy
b = a.copy() # 对b操作不会影响a
Pandas
1.基础操作
-
选择数据
import pandas as pd import numpy as np s = pd.Series([1,2,3]) a1 = pd.array((1,2,3,4,5,6)) data1 = pd.DataFrame(np.arange(12).reshape((3,4))) range1 = pd.date_range('20210101',periods=6) data2 = pd.DataFrame(np.arange(24).reshape((6,4)), index=range1,columns=['A','B','C','D']) print(data2['A'],data2.A) print(data2[0:3],data['20210101':'20210103']) # 用标签及索引筛选 print(data2.loc['20210101':'20210103']) print(data2.loc[:,['A','B']]) print(data2.loc['20210102',['A','B']]) # 用索引筛选 print(data2.iloc[3,1]) print(data2.iloc[3:5,1:3]) # print(data2.iloc[[1,3,4],1:3]) print(data2[data2.A>8]) -
设置值
data2.iloc[2,2] = 111 data2.loc['20210102','B'] = 222 data2[data2.B>17] = 1 data2.C[data2.A>4] = 0 data2['F'] = np.nan data2['E'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20210101',periods=6)) print(data2) -
处理NaN数据
print(data2.dropna(axis=0,how='any')) # 隐藏NaN所在的行 data2.fillna(value=0) # 将NaN设置为0 data2.isnull() # 判断是否有NaN,返回True/False -
数据的导入与导出
data = pd.read_csv('xxx.csv') # 导入数据 data.to_pickle('xxx.pickle') # 导出数据 -
合并DataFrame
df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d']) df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d']) df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d']) # columns相同的数据,上下合并 res1 = pd.concat([df1,df2,df3],axis=0) # 0竖向,1横向 res2 = pd.concat([df1,df2,df3],axis=0,ignore_index=True) df4 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d']) df5 = pd.DataFrame(np.ones((3,4))*1,columns=['b','c','d','e']) res3 = pd.concat([df4,df5],join='inner',ignore_index=True) res4 = df4.append(df5,ignore_index=True)# merge df1 = pd.DataFrame({'key':['k0','k1','k2','k3'], 'A':['a0','a1','a2','a3'], 'B':['b0','b1','b2','b3']}) df2 = pd.DataFrame({'key':['k0','k1','k2','k3'], 'C':['c0','c1','c2','c3'], 'D':['d0','d1','d2','d3']}) res1 = pd.merge(df1,df2,on='key') print(res1) res2 = pd.merge(df1,df2,on=['key1','key2']) # 默认how='inner' -
绘图
本文详细介绍使用Matplotlib绘制各种图表的方法,包括基础操作、坐标轴设置、图例及标注等,并通过实例演示如何利用Pandas进行数据处理,如数据选择、设置值、处理缺失值及数据导入导出。
2467

被折叠的 条评论
为什么被折叠?



