numpy-数组
- numpy中的array运算更快
import numpy as np
my_arr = np.arange(1000000)
my_list = list(range(1000000))
#%time是ipython的特殊功能
%time for _ in range(10): my_arr2 = my_arr * 2
%time for _ in range(10): my_list2 = [x*2 for x in my_list]
CPU times: user 13 ms, sys: 8.12 ms, total: 21.1 ms
Wall time: 22 ms
CPU times: user 520 ms, sys: 147 ms, total: 667 ms
Wall time: 669 ms
- 随机生成二维数组
data = np.random.randn(2,3)
- 调用astype函数时会产生一个新的数组
numeric_strings = np.array(['1.25','-9.2','42'],dtype=np.string_)
numeric_strings_2 = numeric_strings.astype(np.float64)
print(numeric_strings.dtype)
print(numeric_strings_2.dtype)
|S4
float64
- Any arithmetic operations between equal-size arrays applies the operation element-wise
- 切片不是copy,需要显式地copy
arr = np.arange(10)
arr[:] = 12
arr
Out[17]: array([12, 12, 12, 12, 12, 12, 12, 12, 12, 12])
a = arr[5:8].copy()
a[:] = 10 #这样就不会影响arr
- 选取二维数组的前两行
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d[:2]
- boolean indexing
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data = np.random.randn(len(names), 4)
data[names == 'Bob']
#To select everything but 'Bob', you can either use != or negate the condition using ~:
data[~(names == 'Bob')]
#这里 and 和 or 不好用. Use & (and) and | (or) instead.
#Selecting data from an array by boolean indexing always creates a copy of the data
mask = (names == 'Bob') | (names == 'Will')
data[mask]
-将二维数组里小于0的元素都变为0
data[data<0]=0
numpy-矩阵
- 这里*是矩阵乘法,multiply是对应元素相乘
#矩阵
ss = np.mat([1,2,3])
mm = np.mat([1,2,3])
mm*ss.T #矩阵乘法
Out[26]: matrix([[14]])
np.shape(mm)
Out[27]: (1, 3)
np.multiply(mm,ss)
Out[28]: matrix([[1, 4, 9]])
- 排序
dd.argsort() #得到矩阵中每个元素的序号
- 取均值
dd.mean()