Python科学计算与数据可视化基础-优快云博客

本文链接：https://blog.youkuaiyun.com/XiaoWang_csdn/article/details/130786166

一导入库清单

import matplotlib.pyplot as plt
import matplotlib as mpl
import random
import numpy as np

二科学计算库

注释部分代码按照实际需求，取消注释即可运行。

import numpy as np

"""
关于numpy：
1，一个科学计算库，底层使用C语言实现，计算效率非常高；
2，numpy比较重要的数据结构是数组，底层大部分算法基于数组实现；
3，numpy开源免费；
"""

# 创建一维数组
# a = np.array([1, 3, 5, 7], dtype=np.int64)  # dtype=np.int64把里面的每一个元素指定为64位
# print(a, a.dtype)  # a.dtype 获取数据的类型是多少位
#
# # 使用arange()函数创建数组
# a = np.arange(10)
# print(a)
#
# # 设定步长
# a = np.arange(start=1, stop=10, step=2)
# print(a)
#
# # 初始值为0，结束值为10，个数为9
# e = np.linspace(0, 10, 9)  # 等差数组
# print(e)
#
# # 初始值为0，结束值不为10，个数为9
# e = np.linspace(0, 10, 10, endpoint=False)  # 等差数组
# print(e)
#
# # 初始值为0，结束值不为10，个数为9
# e = np.linspace(0, 10, 10, endpoint=False, retstep=True)  # 等差数组
# print(e)  # (array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]), 1.0) 一个值为数组，一个值为公差
#
# # 等比数组
# log = np.logspace(1, 3, 3)  # 开始10**1，结束10**3，创建3个等比数列，公比默认为10
# print(log)
#
# # 等比数组
# log = np.logspace(0, 3, 4, base=2)  # 开始10**1，结束10**3，创建3个等比数列，公比设置为2
# print(log)
#
# # 等比数组: endpoint=False不包含2的三次方
# log = np.logspace(0, 3, 3, base=2, endpoint=False)  # 开始10**1，结束10**3，创建3个等比数列，公比设置为2
# print(log)
#
# ar = [[1, 2, 3], [2, 4, 6]]
# # 创建二维数组
# b = np.array(ar)
# print(b)
#
# # 二维数组的轴: 0,1 ,二维数组的转置使用数组属性T
# b2 = b.T
# print(b2)
#
# # ones() 根据指定的形状和数据类型生成全为1的数组
# p = np.ones([2, 3])  # 2行3列值全为1的数组
# print(p)
#
# # zeros() 根据指定的形状和数据类型生成全为0的数组
# p = np.zeros([2, 3])  # 2行3列值全为0的数组
# print(p)
#
# # full() 根据指定的形状和数据类型生成全为指定数字的数组
# p = np.full([2, 3], 8)  # 2行3列值全为8的数组
# print(p)
#
# # identity() 创建单位矩阵： 对角线元素为1，其他元素为0的矩阵
# p = np.identity(6)  # 6行6列单位矩阵
# print(p)
#
# a = np.array([1, 3, 5, 7])
# # 一维数组访问
# print(a[0])
#
# ar = [[1, 2, 3], [2, 4, 6]]
# b = np.array(ar)
# # 二维数组访问
# print(b[0][0], b[1, 2])
#
# # 一维数组的切片访问
# print(a[0:2])  # 切片访问不包含结束位置
#
# print('===========')
# # 二维数组的切片访问
# print(b[0:2, 1:2])  # 切片访问不包含结束位置,相当于输出两个一维数组切片结果的交集,结果为二维数组

# 一维数组布尔索引
a1 = np.array([1, 2, 3, 4, 1, 2, 3, 4, 5, 6])
# b1 = np.array([True, True, False, False])
# print(a1[b1]) #当为true时才取出元素,输出结果[1 2]

# 二维数组布尔索引
# a2 = np.array([[1, 2], [3, 4], [5, 6]])
# b2 = np.array([[True, True], [False, False],[True,True]])
# print(a2[b2]) #当为true时才取出元素,输出结果[1 2 5 6]

# 一维数组花式索引
# a = np.array([1, 2, 3, 4])
# print(a1[a])  # 输出[2 3 4 1]
#
# a2 = np.array([[1, 2], [3, 4]])
# print(a1[a2])  # 输出: [[2 3][4 1]]

# 二维数组花式索引
# ar = np.array([[1, 2, 3],
#                [2, 4, 6],
#                [1, 2, 3]])
# # 两个一维数组作为索引
# m = np.array([1, 2])
# n = np.array([0, 1])
# print(ar[m, n])  # 输出[2 2]
#
# # 两个二维数组作为索引
# m2 = np.array([[1, 1], [2, 0]])
# n2 = np.array([[1, 0], [1, 0]])
# print(ar[m2, n2])  # 输出[[4 2][2 1]]


# a = np.array([[1, 2], [3, 4]])
# b = np.array([[5, 6]])
# # 连接数组: 需要两个数组维度相同,沿指定轴的索引连接
# ab = np.concatenate((a, b))
# print(ab)
# # 沿垂直堆叠多个数组，1轴上元素个数相同
# ab = np.vstack((a, b))
# print(ab)
# # 0轴上元素个数相同
# ab = np.hstack((a, b.T))
# print(ab)

# 分割数组
# a1 = np.array([0, 5, 4, 2, 6, 7, 98, 8, 9])
# b1 = np.split(a1, 3)  # 平均拆分为三个数组
# print(b1)
#
# a1 = np.arange(9)  # 随机生成数组
# section = np.array([4, 7])
# b1 = np.split(a1, section)  # 平均拆分为三个数组
# print(b1)
# # 垂直分割数组
# a2 = np.array([[1, 2, 3], [4, 5, 6], [6, 7, 8], [8, 9, 11]])
# b3 = np.vsplit(a2, section)
# print(b3)
# # 水平分割数组
# b4 = np.hsplit(a2, section)


# a1 = np.array([0, 5, 4, 2, 6, 7, 98, 8, 9])
# a2 = np.array([0, 5, 4, 2, 6, 7, 98, 8, 9])
# a3 = np.array([[1, 2, 3], [4, 5, 6], [6, 7, 8], [8, 9, 11]])
# a4 = np.array([[1, 2, 3], [4, 5, 6], [6, 7, 8], [8, 9, 11]])
# # 数组运算
# print(a1 + a2)  # 输出[  0  10   8   4  12  14 196  16  18]
# print(a1 ** 2)  # 输出[   0   25   16    4   36   49 9604   64   81]
# print(a3 + a4)
# """
# 输出结果：
# [[ 2  4  6]
#  [ 8 10 12]
#  [12 14 16]
#  [16 18 22]]
#  """
# print(a3 + 100)  # 相当于每个数组均加上100
# 输出结果
# [[101 102 103]
#  [104 105 106]
#  [106 107 108]
#  [108 109 111]]


# a1 = np.array([0, 5, 4])
# a2 = np.array([5, 8, 9])
# a3 = np.array([[1, 2, 3], [4, 5, 6], [6, 7, 8], [8, 9, 11]])
# a4 = np.array([[1, 2, 3], [4, 5, 6], [6, 7, 8], [8, 9, 11]])
# # 数组的广播
# print(a1 + 2)  # 将标量2进行广播，即[2,2,2]+[0, 5, 4]
# print(a1 + a3)  # 将a1数组进行广播，分别与二维数组中的每个维度进行运算


# 随机函数
a1 = np.random.rand(10)  # 生成随机一维数组，类型是浮点型
# print(a1)
# a2 = np.random.rand(3, 4)  # 生成随机二维数组，类型是浮点型
# print(a2)
# a3 = np.random.randint(1, 7, (5,))  # 生成有5个元素的一维数组，每个元素的值范围是0~7
# print(a3)
# a4 = np.random.randint(3, 8, (3, 5))  # 生成三行五列的二维数组，每个元素的值范围在2~8之间
# print(a4)
# a5 = np.random.normal(10, 3, (3, 4))  # 生成一个平均值为10，标准差为3的3行4列的二维数组
# print(a5)
# a6 = np.random.randn(3, 4)  # 生成3行4列的标准正态分布的二维函数
# print(a6)


# 数组排序
# a2 = np.random.randint(0, 10, size=(3, 4))
# print(a2)
"""
随机输出结果：
[[5 0 4 2]
 [8 1 4 1]
 [8 9 3 8]]
"""

# a2_s = np.sort(a2, axis=-1)  # sort按水平轴（1轴）对数组进行升序排序
# print(a2_s)
# a2_s2 = np.sort(a2, axis=0)  # sort按垂直轴（0轴）对数组进行升序排序
# print(a2_s2)
"""
随机输出结果：
[[5 0 3 1]
 [8 1 4 2]
 [8 9 4 8]]
"""

# a2_s3 = np.argsort(a2)  # 按行排序，返回排序后的元素的对应元素的索引数组
# print(a2_s3)
"""
索引排序结果：
[[2 0 1 3]
 [1 0 2 3]
 [3 2 0 1]]
"""

# a2_s4 = np.argsort(a2, axis=0)  # 按列排序，返回排序后的元素的对应元素的索引数组
# print(a2_s4)
"""
索引排序结果：
[[0 1 2 1]
 [1 2 0 2]
 [2 0 1 0]]
"""

# 数组的聚合函数
# 求和
# a2 = np.array([[1, 2], [2, 3]])
# a3 = np.array([[1, 2], [2, np.nan]])
# print(np.sum(a2))  # 求和： 全部数组元素相加
# print(np.sum(a2, axis=1))  # 求和： 按列全部数组元素相加，返回一维数组
# print(a2.sum(axis=1))  # 求和： 按列全部数组元素相加，返回一维数组
# print(np.nansum(a3))  # 求和： 按列全部数组元素相加并且忽略空值nan，返回一维数组
#
# # 求最大值
# print(np.max(a2))
# print(np.max(a2, axis=1))
# print(a2.max(axis=1))
# print(np.nanmax(a3))
#
# # 求最小值
# print(np.min(a2))
# print(np.min(a2, axis=1))
# print(a2.min(axis=1))
# print(np.nanmin(a3))
#
# # 求平均值
# print(np.mean(a2))
# print(np.mean(a2, axis=1))
# print(a2.mean(axis=1))
# print(np.nanmean(a3))


# a2 = np.array([[1, 2], [3, 4]])
# # 加权平均值
# print(np.mean(a2))
# # print(np.average(a2, axis=1, weights=[[0.7, 0.1], [0.1, 0, 1]]))


# a2 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
# a3 = np.array([[1, 0, 3], [8, 8, 8], [7, 8, 22]])
# a4 = np.array([[1, 22, 3], [42, 5, 62], [72, 82, 92]])
#
# # 数组保存
# np.save('array_save', a2)  # 生成文件array_save.npy
# # 保存多个数组，其中需要指定每一个数组对应的映射名称，读取文件时按照该规则读取数组
# np.savez('arrays', array_a1=a1, array_a2=a2, array_a3=a3)
# # 保存多个数组，并且压缩保存后的文件
# np.savez_compressed('arrays_coms', array_a1=a1, array_a2=a2, array_a3=a3)


# 数组读取
# a2=np.load('array_save.npy')
# print(a2)

# 根据键读取多个数组：未压缩
# a3 = np.load('arrays.npz')
# print(a3['array_a1'])
# print(a3['array_a2'])
# print(a3['array_a2'])

# 根据键读取多个数组：压缩
a4 = np.load('arrays_coms.npz')
print(a4['array_a1'])
print(a4['array_a2'])
print(a4['array_a2'])

三数据可视化

注释部分代码按照实际需求，取消注释即可运行。

import matplotlib.pyplot as plt
import matplotlib as mpl
import random

"""
数据清洗： 剔除错误数据

数据分析可视化流程：
1，定义分析目标
2，数据采集及预处理
3，数据分析挖掘
4，数据可视化

常见的可视化形式：
1，统计图：直方图、折线图、饼图
2，分布图：热力图、散点图、气泡图

数据可视化工具：
1，分析工具：pandas，SciPy , numpy , sklearn
2，绘图工具：matplotlib, Pychart, reportlab
3，平台工具：Jupyter Notebook, PyCharm
"""

# = [1, 2]
# y = [-3, 4]
plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文
plt.rcParams['axes.unicode_minus'] = False
# plt.title('柱状图')
# plt.bar(x, y)
# plt.show()

# plt.rcParams['lines.linewidth'] = 10
# plt.rcParams['lines.linestyle'] = '--'
# plt.title('虚线图')
# plt.plot(x, y)
# plt.show()

# 身高数据
# height = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170, 190]
# bins = range(110, 191,5)  # 定义区间
# plt.title('直方图')
# # 绘制直方图
# plt.hist(height, bins=bins)
# plt.show()

# 数据
# classes = ['c1', 'c2', 'c3']
# score = [70, 90, 88]
# #图形配置
# plt.title('条形图') #标题
# plt.xlabel('班级')
# plt.ylabel('成绩')
# # 条形图
# plt.bar(classes, score)
# plt.show()


# 数据
# year = range(2005, 2020)
# height = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]
# plt.title('折线图')
# plt.plot(year, height)
# plt.show()

# 数据
# labels = ['房贷', '购车', '教育', '饮食']
# data = [4000, 2000, 6000, 1200]
# plt.title('饼图')
# plt.pie(data, labels=labels, autopct='%1.1f%%')  # autopct='%1.1f%%'为保留一位小数
# plt.show()

# 数据
# data = [[12.2, 23.4], [14.5, 11.4], [15.8, 22.9]]
# x = [item[0] for item in data]
# y = [item[0] for item in data]
# plt.title('散点图')
# plt.scatter(x, y)
# plt.xlabel('价格(元)')
# plt.ylabel('销售(件)')
# # 在指定的坐标嵌入文字
# plt.text(12, 12, '牙膏')
# plt.text(14, 14, '洗衣粉')
# plt.text(15, 15, '衣服')
# plt.show()

# 数据
# data = [88, 78, 68, 79, 90, 89, 67, 76, 98, 30, 30]
# plt.title('箱线图')
# plt.boxplot(data)
# plt.show()


# 极径和角度数据
# r = [1, 2, 3, 4, 5]  # 极径
# theta = [0.0, 1.57, 3.14, 4.71, 6.28]
#
# ax = plt.subplot(111, projection='polar')  # 指定坐标轴为极坐标轴
# plt.plot(theta, r)  # 绘制极线图


# 指定坐标轴为极坐标轴
# ax = plt.subplot(111, projection='polar')
# # 绘制极坐标轴的示例
# ax.plot([1, 2, 3, 4, 5])
# ax.scatter([0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.4, 0.2, 0.8, 0.3])
# plt.title('极线图')
# plt.show()  # 显示图形

# 数据
# year = range(2005, 2020)
# height = [168, 155, 160, 143, 170, 160, 193, 170, 190, 160, 143, 170, 160, 193, 170]
# plt.title('阶梯图')
# plt.step(year, height)
# plt.show()


# 图形配置
# x = [1, 2, 3]
# name = ['一班', '二班', '三班']
# y = [70, 90, 88]
# # 柱状图
# plt.bar(x, y)
#
# # 图形配置
# plt.title('成绩柱状图')  # 标题
# plt.xlabel('班级')
# plt.ylabel('成绩')
# plt.xticks(x, name)  # 设置X轴柱状图名称
# for i in range(1, 4):
#     plt.text(i, y[i - 1] + 1, y[i - 1])  # 纵坐标的具体分数
# plt.show()


# 数据: 三个学科的成绩
# ch = [72, 80, 66, 77, 92]
# math = [62, 92, 72, 75, 88]
# eng = [88, 76, 45, 80, 98]
# plt.title('堆积图')
# plt.bar(range(1, 6), ch, color='r', label='语文成绩')  # 绘制语文柱状图
# plt.bar(range(1, 6), math, bottom=ch, color='g', label='数学成绩')  # bottom=ch在语文柱状图的基础上绘制数学柱状图
# chmath = [ch[i] + math[i] for i in range(5)]  # 计算语文和数学成绩之和
# plt.bar(range(1, 6), eng, bottom=chmath, color='b', label='英语成绩')  # bottom=chmath在数学和语文之和柱状图的基础上英语柱状图
# plt.show()


# 数据: 三个学科的成绩
# c1 = [72, 80, 66]
# c2 = [62, 92, 72]
# c3 = [88, 76, 45]
# name_list = ['语文', '数学', '英语']
# width = 0.4  # 柱状图宽度
# x = [1, 3, 5]  # 柱状图之间的间隔
#
# plt.bar(x, c1, label='c1', fc='r', width=width)
# x = [1.4, 3.4, 5.4]
# plt.bar(x, c2, label='c2', fc='g', width=width)
# x = [1.8, 3.8, 5.8]
# plt.bar(x, c3, label='c3', fc='b', width=width)
# x = [1.4, 3.4, 5.4]
# # 设置横坐标的名称
# plt.xticks(x, name_list)
# # 设置班级颜色
# plt.legend()
# plt.title('分块图-三班级成绩图')
# plt.xlabel('科目')
# plt.ylabel('成绩')
# plt.show()

x = [22, 23, 24, 25, 26, 27, 28, 29, 30]  # 随机生成年龄
y = [155, 150, 175, 180, 179, 190, 189, 170, 168]  # 随机生成身高
z = [60, 66, 58, 76, 90, 89, 77, 88, 98]  # 随机生成体重
# 绘制气泡图： s指定气泡的大小
plt.scatter(x, y, s=z)

plt.title('气泡图')
plt.show()