数值运算-numpy基本操作-优快云博客

本文链接：https://blog.youkuaiyun.com/charlesAI/article/details/119211806

本文详细介绍了numpy库在Python中的使用，包括生成数组、数据类型操作、数组形状调整、数组计算、读取本地数据和索引、nan与inf处理、结合matplotlib作图以及数组的拼接和行列交换等核心功能。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

生成数组

生成正数

直接生成
t1 = np.array([1,2,3])
range生成
t2 = np.array(range(10))
arange生成（numpy独有）

使用方法类似range
t3 = np.arange(4,10,2)

生成小数

t7= np.array([random.random() for i in range(10)])  
print(t7)  
print(t7.dtype)  
  
t8= np.round(t7,2) #取近似两位 
print(t8)

---------------------
[0.10012212 0.95524231 0.25023686 0.65104295 0.29307086 0.18376068
 0.34930801 0.92896144 0.79899637 0.79465704]
float64

[0.1  0.96 0.25 0.65 0.29 0.18 0.35 0.93 0.8  0.79]

数据类型操作

查看数据类型

t3 = np.arange(4,10,2)
print(t3.dtype)
-------------------------
int64

改变数组存储方式

节省内存空间

t4 = np.array(range(1,4),dtype ="i1")  
print(t4.dtype)
---------------
int8

t5 = np.array([1,1,0,1,0,0],dtype=bool)  
print(t5)  
print(t5.dtype)
---------------
[ True  True False  True False False]
bool

t6= t5.astype("int8")
print(t6)
print(t6.dtype)
----------------
[1 1 0 1 0 0]
int8

数组的形状

查看形状

可以理解为：数组的维度

t8= np.array([[1,2,3],[4,5,6]])  
print(t8.shape)
------------------------
(2, 3)    #二维，两行三列


t8= np.array([[[1,2,3],[4,5,6]],[[3,4,5],[6,7,8]]])  
print(t8.shape)
------------------------
(2, 2, 3)  #，三维两块两行三列

修改形状

升维

t3 = np.arange(12)  
t4 = t3.reshape((3,4)) # 变为二维

print(t3.shape)  
print(t4.shape)
------------------------
(12,)
(3, 4)

降维

t5 = t4.reshape((12,))  # 变为一维
/
t5 = t4.reshape((t5.shape[0]*t5.shape[1],)) #shape[0]表示行，shape[1]表示列
/
t4.flatten() #二维按照行展开为一维

数组计算

标量加减乘除

数组内的所有数都计算

数组间运算

形状一样

#对应位置相加减乘除

t5 = np.arange(0,24).reshape((4,6))  
t6 = np.arange(100,124).reshape((4,6))  
print(t5+t6)
print(t5-t6)  
print(t5*t6)  
print(t5/t6)
---------------------------
[[100 102 104 106 108 110]
 [112 114 116 118 120 122]
 [124 126 128 130 132 134]
 [136 138 140 142 144 146]]
 
[[-100 -100 -100 -100 -100 -100]
 [-100 -100 -100 -100 -100 -100]
 [-100 -100 -100 -100 -100 -100]
 [-100 -100 -100 -100 -100 -100]]
 
[[   0  101  204  309  416  525]
 [ 636  749  864  981 1100 1221]
 [1344 1469 1596 1725 1856 1989]
 [2124 2261 2400 2541 2684 2829]]
 
[[0.         0.00990099 0.01960784 0.02912621 0.03846154 0.04761905]
 [0.05660377 0.06542056 0.07407407 0.08256881 0.09090909 0.0990991 ]
 [0.10714286 0.11504425 0.12280702 0.13043478 0.13793103 0.14529915]
 [0.15254237 0.15966387 0.16666667 0.17355372 0.18032787 0.18699187]]
 
 
 
 print(t5/0)  # 0/0=nan   非零数/0=inf
 --------------------------
 [[nan inf inf inf inf inf]
 [inf inf inf inf inf inf]
 [inf inf inf inf inf inf]
 [inf inf inf inf inf inf]]

不同形状

广播原则：行列不匹配报错

t5 = np.arange(0,24).reshape((4,6))  
t7 = np.arange(0,6)  
print(t5-t7)  #和维度相同的行计算
---------------------------
[[ 0  0  0  0  0  0]
 [ 6  6  6  6  6  6]
 [12 12 12 12 12 12]
 [18 18 18 18 18 18]]
 

t8 = np.arange(4).reshape((4,1))  
print(t5-t8)  #和维度相同的列计算
---------------------------
[[ 0  1  2  3  4  5]
 [ 5  6  7  8  9 10]
 [10 11 12 13 14 15]
 [15 16 17 18 19 20]]

读取本地数据和索引

轴

在numpy中可以理解为方向,使用0,1,2…数字表示,对于一个一维数组，只有一个0轴，对于2维数组(shape(2,2)),有0轴和1轴，对于三维数(shape(2,2,3)),有0,1,2轴。
二维数组的轴
三维数组的轴

读取本地数据

用得少，pandas读取数据更强大更常用

np.loadtxt(frame,dtype=np.float,delimiter=None,skiprows=0,usecols=None,unpack=False)

frame      文件、字符串或产生器，可以是.gz或bz2压缩文件
dtype     数据类型，可选，CSV的字符串以什么数据类型读入数组中，默认np.float
delimiter  分隔字符串，默认是任何空格，改为逗号
skiprows   跳过前x行，一般跳过第一行表头
usecols    读取指定的列，索引，元组类型
unpack     True：每组纵向排布 False：每组横向排布

import numpy as np  

file_path = r"gb_videos_data_numbers.csv"  
a = np.loadtxt(file_path, delimiter=",", dtype="int",unpack=True)
b = np.loadtxt(file_path, delimiter=",", dtype="int")

print(a)
print(b)
------------------------
[[1231231  494203  142819 ...  379570   17141   79278]
 [  78240    2651   13119 ...    7277     922    2137]
 [  13548    1309     151 ...      88       8      19]
 [    705       0    1141 ...     336      59     173]]
 
 [[1231231   78240   13548     705]
 [ 494203    2651    1309       0]
 [ 142819   13119     151    1141]
 ...
 [ 379570    7277      88     336]
 [  17141     922       8      59]
 [  79278    2137      19     173]]

数组转置

t2.transpose()/t2.T()

索引

print(b[2])  #取行
print(b[2:]) #取连续多行
print(b[[2,8,10]]) #取不连续多行,需要两个[]中括号

#逗号前对行操作，逗号后对列操作

print(b[1,:])   # 取第一行的所有列
print(b[2:,:])  # 取第二行到最后一行的所有列
print(b[[2,10,3],:]) #取不连续多行和所有列

print(b[:,1])  #取第一列
print(b[:,1:])  #取多列
print(b[:,[0,1,2]]) #取不连续多列

print(b[2,1])#取值
print(b[2:5,1:4]) # 取多行和多列相邻的点
print(b[[0,2],[0,1]]) # 取不相邻相邻的点

修改

先索引再赋值

b[:,1:5] = 0 

b[b<10]=3 #布尔索引：检索出小于三的元素赋值3
b[b>10]=20

三元运算符

np.where(t<10,0,10) #将小于10的元素替换为0，否则替换为10

t.clip(10,18)     #将小于10的元素替换为10，大于18的元素替换为了18

nan 和 inf

nan

缺失值或不合适的计算

属性
（1）属性1：两个nan是不相等的；判断多少个nan：t[np.isnan(t2)]
（2）属性2：nan和任何值计算都为nan；求和：np.sum(t6,axis=0)

inf(无穷)

数/0

将nan换成均值

import numpy as np  
  
def fill_ndarray(t1):  
    for i in range(t1.shape[1]): #遍历每一列  
		 temp_col = t1[:,i] #当前的一列  
		 nan_num = np.count_nonzero(temp_col != temp_col)  
         
		 if nan_num != 0:  
			 temp_not_nan_col = temp_col[temp_col==temp_col] 
			 #当前一列不为nan的array  
			 temp_col[np.isnan(temp_col)]=temp_not_nan_col.mean() 
			 #选中当前为nan的位置，赋值为不为nan的均值  
 	
	return t1  
  
if __name__ == "__main__": #主函数入口  
 	t1=np.arange(12).reshape((3,4)).astype("float")  
    t1[1,2:] = np.nan  
    print(t1)  
    print("-"*50)  
    t2 = fill_ndarray(t1)  
    print(t2)
	
	
-------------------------------
[[ 0.  1.  2.  3.]
 [ 4.  5. nan nan]
 [ 8.  9. 10. 11.]]
--------------------------------------------------
[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]

结合matplotlib画图

画直方图

import numpy as np  
from matplotlib import pyplot as plt  
  
file_path = r"gb_videos_data_numbers.csv"  
a = np.loadtxt(file_path, delimiter=",", dtype="int")  
  
#取评论的数据  
t_us_comments = a[:,-1]  
  
#取小于5000的数据  
t_us_comments = t_us_comments[t_us_comments <= 5000]  
  
print(t_us_comments.max(),t_us_comments.min())  
  
d = 100  
bin_nums = (t_us_comments.max()-t_us_comments.min())//d  
plt.figure(figsize=(20,8),dpi=80)  
plt.hist(t_us_comments,bin_nums)  
  
plt.show()

画散点图

import numpy as np  
from matplotlib import pyplot as plt  
  
file_path = r"gb_videos_data_numbers.csv"  
a = np.loadtxt(file_path, delimiter=",", dtype="int")  
  
a=a[a[:,1]<=50000]  
  
#取数据  
t_us_comments = a[:,-1]  
t_us_like = a[:,1]  
  
plt.figure(figsize=(20,8),dpi=80)  
plt.scatter(t_us_like,t_us_comments)  
  
plt.show()