可复制直接食用 2023-9-26 莫凡python可视化工具 numpy，pandas，matplotlib

博客围绕莫凡Python可视化工具展开，主要介绍了numpy、pandas和matplotlib的学习内容，这些工具在信息技术领域的数据处理与可视化方面有重要作用。

莫凡python可视化工具 numpy，pandas，matplotlib

numpy 学习

import numpy
import numpy as np

array = np.array([[1, 2, 3],
                 [4, 5, 6]], dtype=np.int64)
# 若想定义一个数组全部为0的数列 下方代表三行四列
a = numpy.zeros((3, 4), dtype=np.int64)
# 空数组 接近于0
a = numpy.empty((3, 4), dtype=np.int64)
# 有序
a = numpy.arange(12).reshape((3, 4))
# 生成一个数段 从1到10 一共有20段
a = numpy.linspace(1, 10, 20)

# print(a)
# print(array)
# print(array.dtype)
# print('number of dim:', array.ndim) # 几维数组
# print('shape:', array.shape)
# print('size:', array.size)
# numpy的基础运算
a1 = np.array([10, 20, 30, 40])
# 判断是否大于某个数
print(a1 > 19)
print(a1 == 30)
b1 = np.arange(4)
c1 = a1-b1
print(a1, b1)
print(c1)
# c2代表平方
c2 = b1**2
print(c2)
# 对a1每个都求sin值
c3 = 10*np.sin(a1)
print(c3)

a2 = np.array([[1, 1],
              [2, 2]])
b2 = np.arange(4).reshape((2, 2))
c = a2*b2
print(c)
# 矩阵乘法
c_dot = np.dot(a2, b2)
c_dot2 = a2.dot(b2) # 矩阵乘法的另一种形式
print(c_dot)
# 随机生成的一组 在其中求最大和最小值
a3 = np.random.random((2, 4))
print(a3)
print(np.sum(a3, axis=1)) # 行数求和
print(np.sum(a3, axis=0)) # 列数求和
print(np.min(a3))
print(np.max(a3))

# 求索引
A = np.arange(2, 14).reshape((3, 4))
print(np.argmin(A))
print(np.argmax(A))
# 整个矩阵的平均值
print(np.mean(A)) # 另一种为average 只能识别第一种形式
print(A.mean())
# 中位数
print(np.median(A))
print(np.cumsum(A)) # 逐步累加
print(np.diff(A)) # 累差
print(np.nonzero(A)) # 找出非0的数
A2 = np.arange(14, 2, -1).reshape((3, 4)) #第三个参数为步长
print(np.sort(A2)) # 按照逐行进行排序
print(np.transpose(A2)) # 矩阵的转置  该行或者输入为print(A2.T)
print(np.clip(A2, 5, 9)) # 5-9之间的保留 小于5变5，大于9变9
print(A2)
print(np.mean(A2, axis=0)) # 可以指定对于行或者列来求平均数  0为列！！1为行！！


# 对一维数组或者高维的进行索引
A3 = np.arange(3, 15).reshape((3, 4))
print(A3)
print(A3[2][1])
print(A3[2, :]) # 冒号代表第二行所有的数  1:2 代表从1到2之间的值
print(A3[1, 1:3])
for row in A3:
    print(row)
print(A3.flatten())
for item in A3.flat:# 将A3转变为一行
    print(item)

# numpy的array合并
A4 = np.array([1, 1, 1])[:, np.newaxis]
B4 = np.array([2, 2, 2])[:, np.newaxis]
C = np.vstack((A4, B4)) # 向下合并 vertical stack
D = np.hstack((A4, B4)) # 左右合并 horizontal stack
E = np.concatenate((A4, A4, B4, B4),axis=1) # 多个矩阵的合并
print(E)
print(D)
print(A4.shape, C.shape)
# 将横向的数列变为纵向的
print(A4[np.newaxis, :]) # 在列上增加了一个维度
print(A4[:, np.newaxis]) # 在行上增加了一个维度
print(A4.reshape(3, 1))

# 分割矩阵
A5 = np.arange(12).reshape((3, 4))
print(np.split(A5, 3, axis=0)) # 纵向分割
print(np.split(A5, [1, 1, 2], axis=1)) # 不等量分割
print(np.array_split(A5, 3, axis=1)) # 不等量分割
# 简化版本的分割
print(np.vsplit(A5, 3))
print(np.hsplit(A5, 2))

pandas学习

import matplotlib
import pandas as pd
import numpy as np
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

s = pd.Series([1, 3, 6, np.nan, 44, 1])  # 列表
print(s)
print("==========================================================")
dates = pd.date_range('20230922', periods=6)  # 序列
print(dates)
print("==========================================================")
df = pd.DataFrame(np.random.rand(6, 4), index=dates, columns=['A', 'B', 'C', 'D'])
print(df)
print("==========================================================")
# 定义dataframe
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20230922'),
                    'C': pd.Series(2, index=list(range(4)), dtype='float32'),
                    'D': np.array([4] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})
print(df2)
print("==========================================================")
print(df2.dtypes)
print("==========================================================")
print(df2.columns)
print("==========================================================")
print(df2.values)
print("==========================================================")
print(df2.T)
print("==========================================================")
print(df2.sort_index(axis=1, ascending=False))  # 第二个参数为倒着排序
print("==========================================================")
print(df2.sort_index(axis=0, ascending=False))  # 第二个参数为倒着排序
print("==========================================================")
print(df2.sort_values(by='E'))
print("==========================================================")
# 选择数据
print(df['A'], df.A)  # 两种显示的是同样的效果
print("==========================================================")
print(df[0:3], df['20230922':'20230923'])
print("==========================================================")
# 以标签的名义来选择
print(df.loc['20230922'])
print("==========================================================")
print(df.loc[:, ['A', 'B']])
print("==========================================================")
print(df.loc['20230922'], ['A', 'B'])
print("==========================================================")
# selct by position:iloc
print(df.iloc[3, 1])
print("==========================================================")
print(df.iloc[3:5, 1:3])
print("==========================================================")
print(df.iloc[[1, 3, 5], 1:3])  # 不连续的筛选
print("==========================================================")
# print(df.ix[:3, ['A', 'C']]) # 混合起来的筛选 该方法已被弃用
print(df[df.A < 0.854575])
print("==========================================================")
# pandas 设置值
dates = pd.date_range('20230922', periods=6)  # 序列
df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
df.iloc[2, 2] = 11
df.loc['20230922', 'B'] = 22
df[df.A > 4] = 0  # 同样也可以这样定义df.A[df.A > 4] = 0 单独对A这一行进行操作
# 在此加一行
df['E'] = np.nan
# 新加一列但是对应的序列值是一样的 Series 新生成一列
df['F'] = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20230922', periods=6))
print(df)
print("==========================================================")
# 处理其中不完整的数据 比如NAN格式
dates = pd.date_range('20230922', periods=6)  # 序列
df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
df.iloc[0, 1] = np.nan
df.iloc[1, 2] = np.nan
print(df)
print("==========================================================")
print(df.dropna(axis=0, how='any'))  # how = {'any','all'}如果出现nan 则整行都给丢掉
print("==========================================================")
print(df.fillna(value=0))  # 给nan部分填上0
print("==========================================================")
print(df.isnull())
print("==========================================================")
print(np.any(df.isnull()) == True)
print("==========================================================")

# pandas导入导出数据
data = pd.read_csv("C:/Users/admin/temps.csv")
print(data)
print("==========================================================")
# 存储
data.to_pickle('temps.pickle')
# pandas 合并
df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)  # ignore_index为忽略掉序列号重新排序
print(res)
print("==========================================================")
df4 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df5 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res2 = pd.concat([df4, df5], join='outer')  # 默认的为outer
res2 = pd.concat([df4, df5], join='inner', ignore_index=True)  # 只合并相同的部分
res3 = pd.concat([df4, df5.reindex(df4.index)], axis=1)  # 相同的部分
print(res2)
print("==========================================================")
print(res3)
print("==========================================================")
# merge用法
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                      'C': ['C0', 'C1', 'C2', 'C3'],
                      'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print("==========================================================")
print(right)
print("==========================================================")
res4 = pd.merge(left, right, on='key')
print(res4)
print("==========================================================")
data = pd.Series(np.random.randn(1000), index=np.arange(1000))
data = data.cumsum()
data.plot()
plt.show()

data = pd.DataFrame(np.random.randn(1000, 4),
                    index=np.arange(1000),
                    columns=list("ABCD"))
data = data.cumsum()
ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label='Class 1')
data.plot.scatter(x='A', y='C', color='DarkGreen', label='Class 2',ax = ax)
data.plot()
plt.show()

matplotlib

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import animation
import matplotlib.gridspec as gridspec
# x = np.linspace(-3, 3, 50)
# y1 = 2*x+1
# y2 = x**2
# plt.figure()
# plt.plot(x, y1)
# plt.figure(num=2, figsize=(8, 5))# 设置里面的参数
# plt.plot(x, y2)
# plt.plot(x, y1, color='red', linewidth=1.0, linestyle='--')
# 设置坐标轴
# plt.xlim((-1, 2))
# plt.ylim((-2, 3))
# plt.xlabel('x轴')
# plt.ylabel('y轴')
#
# new_ticks = np.linspace(-1, 2, 5)
# print(new_ticks)
# plt.xticks(new_ticks)
# plt.yticks([-2, -1, 1.3],
#            [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$']) #r后面使用的是正则表达式
print('============================================================================')

# 挪动坐标轴
# gca = 'get current axis'
# ax = plt.gca()
# ax.spines['right'].set_color('none')
# ax.spines['top'].set_color('none')
# ax.xaxis.set_ticks_position('bottom')
# ax.yaxis.set_ticks_position('left')
# ax.spines['bottom'].set_position(('data', 0))
# ax.spines['left'].set_position(('data', 0))

# 打上图例  line1,2如果要传进handles 需要在后面加上逗号,
# line1, = plt.plot(x, y2, label='up')
# line2, = plt.plot(x, y1, color='red', linewidth=1.0, linestyle='--', label='down')
#plt.legend(handles=[line1, line2], labels=['aa', 'bb'], loc='best')# 如果只想显示一条线 在[]中的参数可以减少 逗号需要保存
print('============================================================================')
# 线条的标注 anotation 标注
# 添加点  scatter一堆散点
# x0 = 1
# y0 = 2*x0+1
# plt.scatter(x0, y0, s=50, color='blue') # s = size
# plt.plot([x0, x0], [y0, 0], 'k--', lw=2.5) # 连接到坐标轴的线 k=black  lw = linewidth
# # 两种方法 method1
# plt.annotate(r'$2x+1=%s$' % y0, xy=(x0, y0), xycoords='data',
#              xytext=(+30, -30), textcoords='offset points', fontsize = 16,
#              arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=.2')) #在x+30，y-30点开始打印我的这部分内容
# # method2
# plt.text(-3.7, 3, r'$new\ text\ \sigma_i\ \alpha_t$', fontdict={'size': 16, 'color': 'r'})
# 能见度 不影响线条穿过
# for label in ax.get_xticklabels()+ax.get_yticklabels():
#     label.set_fontsize(12)
#     label.set_bbox(dict(facecolor='black', edgecolor='None', alpha=0.7)) # alpha透明度

print('============================================================================')

# Scatter散点图
# n = 1024
# X = np.random.normal(0, 1, n)  # normal正态分布
# Y = np.random.normal(0, 1, n)
# T = np.arctan2(Y, X) # 生成不同的颜色数量值
# #plt.scatter(X, Y, s=75, c=T, alpha=0.5)
# # 一条线的散点图
# plt.scatter(np.arange(5), np.arange(5))
# # plt.xlim((-1.5, 1.5))
# # plt.ylim((-1.5, 1.5))
# plt.xticks(()) #隐藏所有的ticks
# plt.yticks(())
print('============================================================================')

# 柱状图
# m = 12
# X1 = np.arange(m)
# Y1 = (1-X1 / float(m))*np.random.uniform(0.5, 1.0, m) # uniform 产生一个0.5~1.0的数值
# Y2 = (1-X1 / float(m))*np.random.uniform(0.5, 1.0, m)
# plt.bar(X1, +Y1, facecolor='#9999ff', edgecolor='white') #+ 向上
# plt.bar(X1, -Y2, facecolor='#ff9999', edgecolor='white') #- 向下
# for x, y in zip(X1, Y1): # zip的作用是分别传值
#     plt.text(x + 0.1, y+0.05, '%.2f' % y, ha='center', va='bottom') # ha=horizontal alignment 对齐方式
# for x, y in zip(X1, Y2): # zip的作用是分别传值
#     plt.text(x + 0.1, -y-0.05, '%-.2f' % y, ha='center', va='top') # ha=horizontal alignment 对齐方式
#
# plt.xlim((-.5, m))
# plt.xticks(())
# plt.ylim((-1.25, 1.25))
# plt.yticks(())
print('============================================================================')

# 等高线的图
# def f(x, y):
#     return (1 - x/2 + x**5 + y**3)*np.exp(-x**2 - y**2)# 算高度的值
# n = 256
# x = np.linspace(-3, 3, n)
# y = np.linspace(-3, 3, n)
# X, Y = np.meshgrid(x, y)
# plt.contourf(X, Y, f(X, Y), 8, alpha=0.75, cmap=plt.cm.hot) # 8相当于分了10部分
# C = plt.contour(X, Y, f(X, Y), 8, colors='black', linewidth=.5)
# # 加上数值描述
# plt.clabel(C, inline=True, fontsize=10)
print('============================================================================')
# image图片  失败！！！
# a = np.array([np.random.random(9)]).reshape(3.3)
# plt.imshow(a, interpolation='nearest', cmap='bone', origin='upper')
#
# plt.xticks(())
# plt.yticks(())
print('============================================================================')
# fig = plt.figure() # 定义一个图片的窗口
# #ax = Axes3D(fig)
# ax = fig.add_axes(Axes3D(fig))
# X = np.arange(-4, 4, 0.25)
# Y = np.arange(-4, 4, 0.25)
# X, Y = np.meshgrid(X, Y)
# R = np.sqrt(X**2+Y**2)
# #height value
# Z = np.cos(R)
# ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow')) # rstride 跨度
# ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap='rainbow')
#
# ax.set_zlim(-2, 2)
print('============================================================================')
# subplot 多合一显示
# plt.figure()
# #plt.subplot(2, 2, 1) # 两行两列
# plt.subplot(2, 1, 1) # 将第一个图变成只有一行
# plt.plot([0, 1], [0, 1])
# plt.subplot(2, 3, 4) # 两行两列,其中的逗号可以去掉
# plt.plot([0, 1], [0, 2])
# plt.subplot(2, 3, 5) # 两行两列,其中的逗号可以去掉
# plt.plot([0, 1], [0, 2])
# plt.subplot(2, 3, 6) # 两行两列,其中的逗号可以去掉
# plt.plot([0, 1], [0, 2])
print('============================================================================')

# subplot 分格显示
# method1
#method 1:subplot2grid
# plt.figure()
# # 先分为3行3列，从(0,0)开始，行跨度1，列跨度为3
# ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=3, rowspan=1)
# ax1.plot([1, 2], [1, 2])
# # 因为是axis，而不是figure整个的改变坐标轴，所以要使用set_xlabel/set_title等等
# ax1.set_title('ax1_title')
# ax2 = plt.subplot2grid((3, 3), (1, 0), colspan=2)
# ax3 = plt.subplot2grid((3, 3), (1, 2), rowspan=2)
# ax4 = plt.subplot2grid((3, 3), (2, 0))
# ax5 = plt.subplot2grid((3, 3), (2, 1))
# plt.tight_layout()

# method2 gridspec需要导入 import matplotlib.gridspec as gridspec
# plt.figure()
# # 3行3列
# gs = gridspec.GridSpec(3, 3)
# ax1 = plt.subplot(gs[0, :])   # 第0行，所有列
# ax2 = plt.subplot(gs[1, :2])  # 第1行，前两列
# ax3 = plt.subplot(gs[1:, 2])  # 第1行之后，第2列
# ax4 = plt.subplot(gs[-1, 0])  # 第最后一行，第0列
# ax5 = plt.subplot(gs[-1, -2]) # 最后一行，倒数第二列
# plt.tight_layout()

# method 3:easy to define structure
# f：figure
# () 所有的2行2列的值，定义格式
# f, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, sharex=True, sharey=True,)  #共享x、y轴 subplots多加了s
# ax11.scatter([1, 2], [1, 2])
#
# plt.tight_layout()
print('============================================================================')
# 图中图
# fig=plt.figure()
#
# x = [1, 2, 3, 4, 5, 6, 7]
# y = [1, 3, 4, 2, 5, 8, 6]
#
# # 首先画一个大图
# # 传入相对于整个figure的百分比
# left, bottom, width, height = 0.1, 0.1, 0.8, 0.8
# # 加上一个axis，传入参数
# ax1 = fig.add_axes([left, bottom, width, height])
# ax1.plot(x, y, 'r')
# ax1.set_xlabel('x')
# ax1.set_ylabel('y')
# ax1.set_title('title')
# # 加入小图1
# left, bottom, width, height = 0.2, 0.6, 0.25, 0.25
# # 加上一个axis，传入参数
# ax2 = fig.add_axes([left, bottom, width, height])
#
# ax2.plot(y, x, 'b')
# ax2.set_xlabel('x')
# ax2.set_ylabel('y')
# ax2.set_title('title inside 1')
#
# # 也可以用plt的方法
# plt.axes([0.6, 0.2, 0.25, 0.25])
# plt.plot(y[::-1], x, 'g')
# plt.xlabel('x')
# plt.ylabel('y')
# plt.title('title inside 2')
print('============================================================================')
# 主次坐标轴，共享一个x轴
# x = np.arange(0, 10, 0.1)
# y1 = 0.05*x**2
# y2 = -1*y1
#
# fig, ax1=plt.subplots()
# # 用镜面将x的数据对称到另外坐标轴  twinx共享x轴 twiny共享y轴
# ax2 = ax1.twinx()
# ax1.plot(x, y1, 'g-')
# ax2.plot(x, y2, 'b-')
#
# ax1.set_xlabel('X data')
# ax1.set_ylabel('Y1', color='g')
# ax2.set_ylabel('Y2', color='b')

print('============================================================================')
# animation动画  添加依赖from matplotlib import animation
fig, ax = plt.subplots()

x=np.arange(0,2*np.pi,0.01)
# ,是因为返回是列表，选择第一位就加，就好
line, = ax.plot(x,np.sin(x))

def Animation(i):
    line.set_ydata(np.sin(x+i/100))
    return line,

def init():
    line.set_ydata(np.sin(x))
    return line,

# 100:100帧
# 20：20ms更新一次
# blit：是否更新未变化的值
ani = animation.FuncAnimation(fig=fig, func=Animation, frames=100,
                             init_func=init, interval=20, blit=False)

plt.show()


plt.show()