目录
print(help(numpy.genfromtxt)) 帮助文档
矩阵 # :冒号代表所有,这里指的是所有列 将有25的 行的所有列输出
ndarray.astype() 值类型的转换 dtype是 显示值的类型
随机获取一个 2行3列 元素大小在-1 到1 的矩阵 在python 3.6 里 看到的是 0到1
floor是向下取整 ravel() 方法 把向量拉成矩阵 T 转置 a.reshape(3,-1) 指定3行,列自动确认
拼接两个矩阵 np.hstack((a,b))横向拼接,np.vstack((a,b)) 纵向拼接
等号复制 id方法类似于 获取内存分配的地址值 这里是同一个内存地址 伪复制
data.argmax(axis=0) axis =0表示 比较列的值
排序 axis =1 列的排序 =0 行的排序 np.argsort(a) 索引由小到大排序 显示的是索引
因公司业务需要,开始了解 python及其一些python库,案例基于python3.6.5
print(help(numpy.genfromtxt)) 帮助文档
numpy.genfromtxt("world_alcohol.txt", delimiter=",",dtype=str) 导入文件 以 逗号分隔 数据类型是 str
import numpy
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",",dtype=str)
print(type(world_alcohol))
print(world_alcohol)
print(help(numpy.genfromtxt)) 帮助文档
numpy 的 数据类型 ndarray
#The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
vector = numpy.array([5, 10, 15, 20])
#When we input a list of lists, we get a matrix as a result:
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print (vector)
print (matrix)
[ 5 10 15 20]
[[ 5 10 15]
[20 25 30]
[35 40 45]]
4个元素 2行3 列
#We can use the ndarray.shape property to figure out how many elements are in the array
vector = numpy.array([1, 2, 3, 4])
print(vector.shape)
#For matrices, the shape property contains a tuple with 2 elements.
#两行三列
matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape)
(4,)
(2, 3)
数据类型
#Each value in a NumPy array has to have the same data type
#NumPy will automatically figure out an appropriate data type when reading in data or converting lists to arrays.
#You can check the data type of a NumPy array using the dtype property.
# numpy 的 anarray 内的list 列表内的数据类型必须一致
numbers = numpy.array([1, 2, 3, 4])
numbers.dtype
dtype('int32')
# 切片 这里冒号代表了所有行, 获取的是第三列
# 切片 这里冒号代表了所有行, 获取的是第三列
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
print(matrix[:,2])
[15 30 45]
判断向量中是否包含 10
import numpy
#it will compare the second value to each element in the vector
# If the values are equal, the Python interpreter returns True; otherwise, it returns False
vector = numpy.array([5, 10, 15, 20])
#判断 是否 包含10
vector == 10
array([False, True, False, False])
判断矩阵中是否包含25
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix == 25
array([[False, False, False],
[False, True, False],
[False, False, False]])
布尔类型可以当成索引在 numpy中使用,返回真实值
#Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print(equal_to_ten)
print(vector[equal_to_ten])
[False True False False]
[10]
矩阵 # :冒号代表所有,这里指的是所有列 将有25的 行的所有列输出
matrix = numpy.array([
[5, 25, 15],
[20, 25, 30],
[35, 40, 45]
])
#所有行的第二列里面有没有等于25的
second_column_25 = (matrix[:,1] == 25)
print (second_column_25)
# :冒号代表所有,这里指的是所有列 将有25的 行的所有列输出
print(matrix[second_column_25, :])
[ True True False] [[ 5 25 15] [20 25 30]]
同时满足两个条件 与操作
#We can also perform comparisons with multiple conditions
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_and_five = (vector == 10) & (vector == 5)
print (equal_to_ten_and_five)
[False False False False]
两个条件 或 操作 包含10 或者 包含5
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
print (equal_to_ten_or_five)
[ True True False False]
ndarray.astype() 值类型的转换 dtype是 显示值的类型
#We can convert the data type of an array with the ndarray.astype() method.
vector = numpy.array(["1", "2", "3"])
print(vector.dtype)
print(vector)
vector = vector.astype(float)
print(vector.dtype)
print(vector)
<U1
['1' '2' '3']
float64
[1. 2. 3.]
计算总和
vector = numpy.array([5, 10, 15, 20])
vector.sum()
axis 维度 等于1 时,行相加
# The axis dictates which dimension we perform the operation on
#1 means that we want to perform the operation on each row, and 0 means on each column
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix.sum(axis=1)
array([ 30, 75, 120])
axis 维度 等于0 时,列相加
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix.sum(axis=0)
向量的形式 转到矩阵的形式 3行5列
import numpy as np
a = np.arange(15).reshape(3, 5)
a
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14] 这就是向量的形式
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]) 这是矩阵的形式
显示 矩阵 行列数
a.shape
(3, 5)
显示 维度
#the number of axes (dimensions) of the array
a.ndim
2
显示矩阵数据类型的名称
a.dtype.name
'int32'
显示矩阵元素个数
#the total number of elements of the array
a.size
15
初始化矩阵 使用元祖类型的数据
np.zeros ((3,4))
array([[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.]])
初始化矩阵元素为1 数据类型为整形
np.ones( (2,3,4), dtype=np.int32 )
array([[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]],
[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]]])
起始值为10 每次增加5 包含头 不包尾
#To create sequences of numbers
np.arange( 10, 30, 5 )
array([10, 15, 20, 25])
权重参数的初始化需要numpy 的 radom模块
随机获取一个 2行3列 元素大小在-1 到1 的矩阵 在python 3.6 里 看到的是 0到1
np.random.random((2,3))
array([[ 0.40130659, 0.45452825, 0.79776512],
[ 0.63220592, 0.74591134, 0.64130737]])
在一个区间内取100个数,这100个数是平均的
from numpy import pi
np.linspace( 0, 2*pi, 100 )
array([ 0. , 0.06346652, 0.12693304, 0.19039955, 0.25386607,
0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866,
0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126,
0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385,
1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644,
1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903,
1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162,
2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421,
2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 ,
2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939,
3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199,
3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458,
3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717,
4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976,
4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652235,
4.75998887, 4.82345539, 4.88692191, 4.95038842, 5.01385494,
5.07732146, 5.14078798, 5.2042545 , 5.26772102, 5.33118753,
5.39465405, 5.45812057, 5.52158709, 5.58505361, 5.64852012,
5.71198664, 5.77545316, 5.83891968, 5.9023862 , 5.96585272,
6.02931923, 6.09278575, 6.15625227, 6.21971879, 6.28318531])
向量的一些运算
#the product operator * operates elementwise in NumPy arrays
a = np.array( [20,30,40,50] )
b = np.arange( 4 )
#print a
#print b
#b
c = a-b
#print c
b**2
print (b**2)
print (a<35)
[0 1 4 9]
[ True True False False]
乘法运算和矩阵的乘法运算
#The matrix product can be performed using the dot function or method
A = np.array( [[1,1],
[0,1]] )
B = np.array( [[2,0],
[3,4]] )
print (A)
print (B)
print ("A*B:",A*B)
print ("A.dot(B):",A.dot(B))
print ("np.dot(A, B):",np.dot(A, B) )
[[1 1]
[0 1]]
[[2 0]
[3 4]]
A*B: [[2 0]
[0 4]]
A.dot(B): [[5 4]
[3 4]]
np.dot(A, B): [[5 4]
[3 4]]
exp方法 算 e的多少次幂 sqrt方法求平方根
import numpy as np
B = np.arange(3)
print (B)
print (np.exp(B))
print (np.sqrt(B))
[0 1 2]
[1. 2.71828183 7.3890561 ]
[0. 1. 1.41421356]
floor是向下取整 ravel() 方法 把向量拉成矩阵 T 转置 a.reshape(3,-1) 指定3行,列自动确认
#Return the floor of the input
a = np.floor(10*np.random.random((3,4)))
print(a)
print("-----")
a.shape
#flatten the array
print (a.ravel())
print("-----")
a.shape = (6, 2)
print (a)
print("-----")
print (a.T)
print("-----")
print (a.resize((2,6)))
print("-----")
print (a)
#If a dimension is given as -1 in a reshaping operation, the other dimensions are automatically calculated:
#a.reshape(3,-1)
[[2. 6. 9. 2.]
[6. 7. 3. 5.]
[9. 7. 7. 4.]]
-----
[2. 6. 9. 2. 6. 7. 3. 5. 9. 7. 7. 4.]
-----
[[2. 6.]
[9. 2.]
[6. 7.]
[3. 5.]
[9. 7.]
[7. 4.]]
-----
[[2. 9. 6. 3. 9. 7.]
[6. 2. 7. 5. 7. 4.]]
-----
None
-----
[[2. 6. 9. 2. 6. 7.]
[3. 5. 9. 7. 7. 4.]]
Out[6]:
array([[2., 6., 9., 2.],
[6., 7., 3., 5.],
[9., 7., 7., 4.]])
拼接两个矩阵 np.hstack((a,b))横向拼接,np.vstack((a,b)) 纵向拼接
a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))
print (a)
print ('---')
print (b)
print ('---')
print (np.hstack((a,b)))
np.vstack((a,b))
[[3. 5.]
[9. 6.]]
---
[[5. 9.]
[4. 7.]]
---
[[3. 5. 5. 9.]
[9. 6. 4. 7.]]
Out[9]:
array([[3., 5.],
[9., 6.],
[5., 9.],
[4., 7.]])
向量的拆分 拆成多个矩阵
print (np.hsplit(a,3))切成三个矩阵
print (np.hsplit(a,(3,4))) 在 3 和4的位置 切数据
np.random.random((12,2)) 矩阵 12行2列 大小是 1到-1
a = np.floor(10*np.random.random((2,12)))
print(a)
print ('---')
print (np.hsplit(a,3))
print ('---')
print (np.hsplit(a,(3,4))) # Split a after the third and the fourth column
print ('---')
a = np.floor(10*np.random.random((12,2)))
print (a)
np.vsplit(a,3)
[9. 1. 0. 8. 1. 7. 9. 3. 3. 3. 8. 7.]
[4. 8. 2. 2. 7. 8. 6. 4. 1. 3. 3. 1.]]
---
[array([[9., 1., 0., 8.],
[4., 8., 2., 2.]]), array([[1., 7., 9., 3.],
[7., 8., 6., 4.]]), array([[3., 3., 8., 7.],
[1., 3., 3., 1.]])]
---
[array([[9., 1., 0.],
[4., 8., 2.]]), array([[8.],
[2.]]), array([[1., 7., 9., 3., 3., 3., 8., 7.],
[7., 8., 6., 4., 1., 3., 3., 1.]])]
---
[[8. 5.]
[6. 9.]
[4. 7.]
[6. 8.]
[9. 5.]
[7. 0.]
[5. 0.]
[8. 6.]
[6. 5.]
[7. 6.]
[7. 9.]
[7. 9.]]
Out[10]:
[array([[8., 5.],
[6., 9.],
[4., 7.],
[6., 8.]]), array([[9., 5.],
[7., 0.],
[5., 0.],
[8., 6.]]), array([[6., 5.],
[7., 6.],
[7., 9.],
[7., 9.]])]
复制操作的对比 很重要 有坑
等号复制 id方法类似于 获取内存分配的地址值 这里是同一个内存地址 伪复制
#Simple assignments make no copy of array objects or of their data.
a = np.arange(12)
b = a
# a and b are two names for the same ndarray object
b is a
b.shape = 3,4
print (a.shape)
print("----------")
print (id(a))
print("----------")
print (id(b))
(3, 4)
----------
93224192
----------
93224192
a.view() 浅复制 元素值是共用的,修改一个两个都变
#The view method creates a new array object that looks at the same data.
c = a.view()
print(c is a)
print("-------------")
c.shape = 2,6
print(a.shape)
print("-------------")
print(id(a))
print("-------------")
print(id(c))
c[0,4] = 1234
a
False ------------- (3, 4) ------------- 93224192 ------------- 93328928
Out[5]:
array([[ 0, 1, 2, 3],
[1234, 5, 6, 7],
[ 8, 9, 10, 11]])
copy方法 深复制 两个矩阵没有内存上的关系 互不干扰
#The copy method makes a complete copy of the array and its data.
d = a.copy()
print (d is a)
d[0,0] = 9999
print (d)
print (a)
False
[[9999 1 2 3]
[1234 5 6 7]
[ 8 9 10 11]]
[[ 0 1 2 3]
[1234 5 6 7]
[ 8 9 10 11]]
获取每列中数值最大的数的索引
data.argmax(axis=0) axis =0表示 比较列的值
data.shape[0] 0代表 几行 1 代表几列
import numpy as np
data = np.sin(np.arange(20)).reshape(5,4)
print (data)
ind = data.argmax(axis=0)
print (ind)
#data_max = data[ind, xrange(data.shape[1])]
print(data.shape[0])
print("---------")
print(data.shape[1])
print("---------")
data_max = data[ind, range(data.shape[1])]
print (data_max)
all(data_max == data.max(axis=0))
[[ 0. 0.84147098 0.90929743 0.14112001]
[-0.7568025 -0.95892427 -0.2794155 0.6569866 ]
[ 0.98935825 0.41211849 -0.54402111 -0.99999021]
[-0.53657292 0.42016704 0.99060736 0.65028784]
[-0.28790332 -0.96139749 -0.75098725 0.14987721]]
[2 0 3 1]
5
---------
4
---------
[0.98935825 0.84147098 0.99060736 0.6569866 ]
Out[12]: True
np.tile(a, (3, 5)) 对 向量做扩展
a = np.arange(0, 40, 10)
print(a)
b = np.tile(a, (3, 5))
print (b)
[ 0 10 20 30]
[[ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30]
[ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30]
[ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30]]
排序 axis =1 列的排序 =0 行的排序 np.argsort(a) 索引由小到大排序 显示的是索引
a = np.array([[4, 3, 5], [1, 2, 1]])
print(a)
b = np.sort(a, axis=1)
c= np.sort(a, axis=0)
print (b)
#b
a.sort(axis=1)
print("-------")
print (a)
print("-------")
print(c)
print("-------")
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
print(j)
print("-------")
print (a[j])
[[4 3 5]
[1 2 1]]
[[3 4 5]
[1 1 2]]
-------
[[3 4 5]
[1 1 2]]
-------
[[1 2 1]
[4 3 5]]
-------
[2 3 1 0]
-------
[1 2 3 4]