Numpy基础

换一个不容易被看出来的名字

已于 2022-07-13 12:13:36 修改

阅读量288

点赞数

文章标签： python numpy

于 2022-07-13 12:09:48 首次发布

本文链接：https://blog.youkuaiyun.com/rice_balls_/article/details/125751703

版权

这篇博客深入探讨了NumPy库的优势，展示了如何利用其快速生成和操作大型数组。内容包括：数组的创建、求和比较、数据分布、形状变换、去重方法。此外，还详细讲解了数组的统计计算、逻辑运算、矩阵运算以及合并与分割等核心功能。通过实例演示，阐述了NumPy在数值计算中的高效性和灵活性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

~~一些lkx什么都不会的证据~~

Date 0713记录

numpy的优势

import random

import numpy as np
import time

# ndarray的优势
# 生成一个大数组
python_list = []
for i in range(10000000):
    python_list.append(random.random())

nd_array = np.array(python_list)

# 原生求和
t1 = time.time()
a = sum(python_list)
t2 = time.time()
print(t2 - t1)

# ndarraytime
t3 = time.time()
b = np.sum(nd_array)
t4 = time.time()
print(t4 - t3)

numpy生成数据

zeros
ones

分布：均匀分布uniform/正态分布normal(loc均值，scale方差）

转置和变换形状：

reshape
resize
T

去重：unique

flatten( )转成一维再用set去重也可以

import numpy as np
import matplotlib.pyplot as plt

score = np.array([
    [80, 89, 86, 67, 79],
    [78, 97, 89, 67, 81],
    [90, 94, 78, 67, 74],
    [91, 91000, 90, 67, 69],
    [76, 87, 75, 67, 86],
    [70, 79, 84, 67, 84],
    [94, 92, 93, 67, 64],
    [86, 85, 83, 67, 80]
])
print(score.shape)
print(score.dtype)  # 类型
print(score.itemsize)

# ndarray形状
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([1, 2, 3, 4])
c = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
print(a.shape)
print(b.shape)
print(c.shape)
print('----------------')

##################################################
# 生成0-1的数组
a = np.zeros(shape=(5, 5), dtype="float32")
b = np.ones(shape=[2, 3], dtype=np.int32)
print(a)
print(b)

# 从现有数组中生成,三种方式
data1 = np.array(score)  # 深拷贝
data2 = np.asarray(score)  # 浅拷贝
data3 = np.copy(score)  # 深拷贝

###修改异常值
score[3, 1] = 1000
print(data1)
print(data2)
print(data3)
print('------------------------------')
##################################################

# 生成固定范围的数组
a = np.linspace(0, 10, 5)  # [0,10]等距
b = np.arange(0, 10, 1)
print(a, b)
###################################################

# 生成随机数据
# 均匀分布
x1 = np.random.uniform(-1, 1, 1000000)
# 通过直方图查看效果
plt.figure()
plt.hist(x1, 1000)
plt.show()

# 正态分布
# loc均值  scale标准差
x2 = np.random.normal(loc=1.75, scale=0.1, size=1000000)
# 通过直方图查看效果
plt.figure()
plt.hist(x2, 1000)
plt.show()

# 案例:随机生成8只股票2周(10天)的交易日涨幅数据
stock_change = np.random.normal(loc=0, scale=1, size=(8, 10))
# 获取第一个股票的前3个交易日的涨跌幅数据
print(stock_change[0, 0:3])
print(stock_change.shape)

# 形状修改
# reshape有返回值(需要接的)，不对原本数组作变化
stock_change=stock_change.reshape((10, 8))
print(stock_change.shape)


# resize没有返回值(作用于自身)，对原本数组作变化
stock_change.resize((8,10))
print(stock_change.shape)

#T转置
print(stock_change.T)
print(stock_change.T.shape)
print("-----------------------------------")

#类型修改
stock_change1=stock_change.astype("int32")
print(stock_change1)
print("-----------------------------------")

#stock_change2=stock_change.tostring()
#print(stock_change2)
#print("-----------------------------------")

#数组的去重 set方法要求是1维的
temp=np.array([[1,2,3,4],[3,4,5,6]])
temp=np.unique(temp)
print(temp)

temp=np.array([[1,2,3,4],[3,4,5,6]])
temp=set(temp.flatten())
print(temp)

逻辑运算&通用判断函数

# 1逻辑运算 >
stock_change = np.random.normal(loc=0, scale=1, size=(8, 10))
# 大于0.5标记为True,小于0.5标记为False
print(stock_change > 0.5)
print("----------------------------------------------------------------")

data1 = stock_change[stock_change > 0.5]
print(data1)
print("----------------------------------------------------------------")

stock_change[stock_change > 0.5] = 1.1
print(stock_change)
print("----------------------------------------------------------------")

# 2通用判断函数all any
# np.all(布尔值)与,有false即false
# np.any(布尔值)或,有true即true

# 是否一直上涨？
print(np.all(stock_change[0:2, 0:5] > 0))
# 是否存在上涨？
print(np.any(stock_change[0:2, 0:5] > 0))

三元运算符&统计运算

# 3三元运算符where
# 判断前四个股票前四天的涨跌幅 大于0设置为1，否则0
temp = stock_change[:4, :4]
print(np.where(temp > 0, 1, 0))

# logical_and logical_or 与和或
print(np.logical_and(temp>0.5,temp<1))
print(np.where(np.logical_and(temp>0.5,temp<1),1,0))
print(np.where(np.logical_or(temp>0.5,temp<-0.5),1,0))


#3统计运算
#统计指标函数  min max mean median vat std
#np.max(ndarray)
#ndarray.max
#注意axis轴的选取

###举例
print(temp.max())
print(np.max(temp))
print(temp)
#按列求最大值
print(temp.max(axis=0))
#按行求最大值
print(temp.max(axis=1))
#行最大值所在的位置
print(np.argmax(temp,axis=1))
#print(np.argmin(temp,axis=0))

数组运算

数组之间能否进行运算：

满足维度相等
shape存在1

广播机制

数组从右到左依次排开（右对齐）

要求对齐的每列（即两个ndarray均有数）：要么两个维度相同，要么其中一个数组存在1，才满足广播机制，可以进行数组运算；否则不可以进行。

以下为正面的例子：

从后开始往前看，以第一个为例子，最后一列均为3，满足

第二个例子，对其的每一列均含1，满足

第三个例子，对齐的每列均含1，满足

第四个例子，后面每列均存在1，而且第一列相同，满足

矩阵运算

矩阵&二维数组：矩阵一定是二维数组，二维数组不一定是矩阵

matrix可以直接乘法*

ndarray乘法满足矩阵乘法规则后：

np.matmal
np.dot两种方式

#5矩阵运算
#ndaarray存储
data_nd=np.array([
    [80,86],
    [82,80],
    [85,78],
    [90,90],
    [86,82],
    [82,90],
    [78,80],
    [92,94]
])
print(data_nd)
print('----------')
#mat存储
data_mat=np.matrix([
    [80,86],
    [82,80],
    [85,78],
    [90,90],
    [86,82],
    [82,90],
    [78,80],
    [92,94]
])
print(data_mat)
weights_arr=np.array([[0.3],[0.7]])
weights=np.mat([[0.3],[0.7]])
#矩阵乘法 直接乘即可
print('*:\n',data_mat*weights)#加权

#ndarray 两个方法
#np.matmal
#np.dot点乘
print('matmal:\n',np.matmul(data_nd,weights_arr))
print('dot:\n',np.dot(data_nd,weights_arr))

合并与分割

#6 合并与分割
#包括水平拼接和竖直拼接
#水平拼接hstack
a=np.array([1,3,4])
b=np.array([1,2,3])
print(np.hstack([a,b]))

#竖直拼接vstack
a=np.array([1,3,4])
b=np.array([1,2,3])
print(np.vstack([a,b]))

#都concatenate axis参数
a=np.array([1,3,4])
b=np.array([1,2,3])
print(np.concatenate([a,b],axis=0))

#分割
x=np.arange(9.0)
#平均分为3份
print(np.split(x,3))
#按照索引进行分割
print(np.split(x,[3,5,6,10]))

汇总

import numpy as np

# 1逻辑运算 >
stock_change = np.random.normal(loc=0, scale=1, size=(8, 10))
# 大于0.5标记为True,小于0.5标记为False
print(stock_change > 0.5)
print("----------------------------------------------------------------")

data1 = stock_change[stock_change > 0.5]
print(data1)
print("----------------------------------------------------------------")

stock_change[stock_change > 0.5] = 1.1
print(stock_change)
print("----------------------------------------------------------------")

# 2通用判断函数all any
# np.all(布尔值)与,有false即false
# np.any(布尔值)或,有true即true

# 是否一直上涨？
print(np.all(stock_change[0:2, 0:5] > 0))
# 是否存在上涨？
print(np.any(stock_change[0:2, 0:5] > 0))

# 3三元运算符where
# 判断前四个股票前四天的涨跌幅 大于0设置为1，否则0
temp = stock_change[:4, :4]
print(np.where(temp > 0, 1, 0))

# logical_and logical_or 与和或
print(np.logical_and(temp>0.5,temp<1))
print(np.where(np.logical_and(temp>0.5,temp<1),1,0))
print(np.where(np.logical_or(temp>0.5,temp<-0.5),1,0))


#3统计运算
#统计指标函数  min max mean median vat std
#np.max(ndarray)
#ndarray.max
#注意axis轴的选取

###举例
print(temp.max())
print(np.max(temp))
print(temp)
#按列求最大值
print(temp.max(axis=0))
#按行求最大值
print(temp.max(axis=1))
#行最大值所在的位置
print(np.argmax(temp,axis=1))
#print(np.argmin(temp,axis=0))

#4数组间运算
#数组与数的运算
arr=np.array([[1,2,3,2,1,4],[5,6,1,2,3,1]])
#每个元素都进行运算
print(arr+10)

#数组与数组的运算
arr1=np.array([[1,2,3,2,1,4],[5,6,1,2,3,1]])
arr2=np.array([[1,2],[2,4],[1,2],[2,4],[1,2],[2,4]])
#广播机制
##维度相等或者
##shape其中一个地方为1


#5矩阵运算
#ndaarray存储
data_nd=np.array([
    [80,86],
    [82,80],
    [85,78],
    [90,90],
    [86,82],
    [82,90],
    [78,80],
    [92,94]
])
print(data_nd)
print('----------')
#mat存储
data_mat=np.matrix([
    [80,86],
    [82,80],
    [85,78],
    [90,90],
    [86,82],
    [82,90],
    [78,80],
    [92,94]
])
print(data_mat)
weights_arr=np.array([[0.3],[0.7]])
weights=np.mat([[0.3],[0.7]])
#矩阵乘法 直接乘即可
print('*:\n',data_mat*weights)#加权

#ndarray 两个方法
#np.matmal
#np.dot点乘
print('matmal:\n',np.matmul(data_nd,weights_arr))
print('dot:\n',np.dot(data_nd,weights_arr))


#6 合并与分割
#包括水平拼接和竖直拼接
#水平拼接hstack
a=np.array([1,3,4])
b=np.array([1,2,3])
print(np.hstack([a,b]))

#竖直拼接vstack
a=np.array([1,3,4])
b=np.array([1,2,3])
print(np.vstack([a,b]))

#都concatenate axis参数
a=np.array([1,3,4])
b=np.array([1,2,3])
print(np.concatenate([a,b],axis=0))

#分割
x=np.arange(9.0)
#平均分为3份
print(np.split(x,3))
#按照索引进行分割
print(np.split(x,[3,5,6,10]))

#7 IO操作与数据处理：这些用pandas均更合适！
#读取数据文件
print("IO-----------------------------------")
#分隔符是逗号
data=np.genfromtxt("data/test.csv",delimiter=",")
print(data)

#缺失值处理方法
##直接去掉
##用平均值替换

#nan是浮点型
print(type(data[2][2]))