代码来自于《Python数据科学手册》的代码复现。
来自和鲸科技(科赛)的K-lab项目
import numpy as np
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b
array([5, 6, 7])
a + 5
array([5, 6, 7])
M = np.ones((3, 3))
print(M)
[[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]]
M + a
array([[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.]])
a = np.arange(3)
b = np.arange(3)[:, np.newaxis]
print(a)
print(b)
[0 1 2]
[[0]
[1]
[2]]
a + b
array([[0, 1, 2],
[1, 2, 3],
[2, 3, 4]])
M = np.ones((2, 3))
a = np.arange(3)
M + a
array([[1., 2., 3.],
[1., 2., 3.]])
a = np.arange(3).reshape((3, 1))
b = np.arange(3)
a + b
array([[0, 1, 2],
[1, 2, 3],
[2, 3, 4]])
a
array([[0],
[1],
[2]])
print(M)
print(a)
[[1. 1.]
[1. 1.]
[1. 1.]]
[0 1 2]
```python
a[:, np.newaxis].shape
(3, 1)
M + a[:, np.newaxis]
array([[1., 1.],
[2., 2.],
[3., 3.]])
广播规则对于任意二进制通用函数都是适用的
np.logaddexp(M, a[:, np.newaxis])
array([[1.31326169, 1.31326169],
[1.69314718, 1.69314718],
[2.31326169, 2.31326169]])
X = np.random.random((10, 3))
Xmean = X.mean(0)
Xmean
array([0.44962305, 0.53916304, 0.40801379])
X_centered = X -Xmean
X_centered.mean(0)
array([-1.11022302e-17, 6.66133815e-17, 5.55111512e-17])
x = np.linspace(0, 5, 50)
y = np.linspace(0, 5, 50)[:, np.newaxis]
z = np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(z, origin='lower', extent=[0, 5, 0, 5], cmap='viridis')
plt.colorbar();
比较、掩码和布尔逻辑
示例:统计下雨天数
import numpy as ny
import pandas as pd
# 利用Pandas抽取降雨量,放入一个NumPy数组
rainfall = pd.read_csv('/home/kesci/input/Handbook6828/Seattle2014.csv')['PRCP'].values
inches = rainfall / 254
inches.shape
(365,)
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set()
plt.hist(inches, 40);
类比操作
x = np.array([1, 2, 3, 4, 5])
x < 3
array([ True, True, False, False, False])
x > 3
array([False, False, False, True, True])
(2 * x) == (x ** 2)
array([False, True, False, False, False])
np.equal(x, 2)
array([False, True, False, False, False])
rng = np.random.RandomState(0)
x = rng.randint(10, size=(3, 4))
x
array([[5, 0, 3, 3],
[7, 9, 3, 5],
[2, 4, 7, 6]])
x < 6
array([[ True, True, True, True],
[False, False, True, True],
[ True, True, False, False]])
np.less(x, 6)
array([[ True, True, True, True],
[False, False, True, True],
[ True, True, False, False]])
操作布尔数组
print(x)
[[5 0 3 3]
[7 9 3 5]
[2 4 7 6]]
统计记录个数
# 有多少个值小于6?
np.count_nonzero(x < 6)
8
np.sum(x < 6)
8
# 每行有多少值小于6
np.sum(x < 6, axis=1)
array([4, 2, 2])
有没有值大于8
np.any(x > 8)
True
有没有值小于0
np.any(x < 0)
False
是否所有的值都小于10?
np.all(x < 10)
True
是否所有的值都等于6
np.all(x == 6)
False
是否所有的行都等于8
np.all(x < 8, axis=1)
array([ True, False, True])
统计降水量在0.5英寸和1英寸之间的天数
np.sum((inches > 0.5) & (inches < 1))
29
np.sum(~((inches <= 0.5) | (inches >= 1)))
29
print("Number days without rain: ", np.sum(inches == 0))
print("Number days with rain: ", np.sum(inches != 0))
print("Days with more than 0.5 inches: ", np.sum(inches > 0.5))
print("Rainy day with < 0.2 inches: ", np.sum((inches > 0) & (inches < 0.2)))
Number days without rain: 215
Number days with rain: 150
Days with more than 0.5 inches: 37
Rainy day with < 0.2 inches: 75
将布尔数组作为掩码
x < 5
array([[False, True, True, True],
[False, False, True, False],
[ True, True, False, False]])
x[x < 5]
array([0, 3, 3, 3, 2, 4])
为所有下雨天创建一个掩码
rainy = (inches > 0)
构建一个包含整个夏季日期的掩码(6.21是172天)
summer = (np.arange(365) - 172 < 90) & (np.arange(365) - 172 > 0)
print("Median precip on rainy days in 2014: ", np.median(inches[rainy]))
print("Median precip on summer days in 2014: ", np.median(inches[summer]))
print("Maximum precip on summer days in 2014: ", np.max(inches[summer]))
print("Median precip non-summer rainy days: ", np.median(inches[rainy & ~summer]))
Median precip on rainy days in 2014: 0.19488188976377951
Median precip on summer days in 2014: 0.0
Maximum precip on summer days in 2014: 0.8503937007874016
Median precip non-summer rainy days: 0.20078740157480315
索引
import numpy as np
rand = np.random.RandomState(42)
x = rand.randint(100, size=10)
print(x)
[51 92 14 71 60 20 82 86 74 74]
[x[3], x[7], x[2]]
[71, 86, 14]
ind = [3, 7, 4]
x[ind]
array([71, 86, 60])
ind = np.array([[3, 7], [4, 5]])
x[ind]
array([[71, 86],
[60, 20]])
X = np.arange(12).reshape((3, 4))
X
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
X[row, col]
array([ 2, 5, 11])
X[row[:, np.newaxis], col]
array([[ 2, 1, 3],
[ 6, 5, 7],
[10, 9, 11]])
row[:, np.newaxis] * col
array([[0, 0, 0],
[2, 1, 3],
[4, 2, 6]])
组合索引
print(X)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
X[2, [2, 0, 1]]
array([10, 8, 9])
X[1:, [2, 0, 1]]
array([[ 6, 4, 5],
[10, 8, 9]])
mask = np.array([1, 0, 1, 0], dtype=bool)
X[row[:, np.newaxis], mask]
array([[ 0, 2],
[ 4, 6],
[ 8, 10]])
用索引修改值
x = np.arange(10)
i = np.array([2, 1, 8, 4])
x[i] = 99
print(x)
[ 0 99 99 3 99 5 6 7 99 9]
x[i] -= 10
print(x)
[ 0 89 89 3 89 5 6 7 89 9]
x = np.zeros(10)
x[[0, 0]] = [4, 6]
print(x)
[6. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
i = [2, 3, 3, 4, 4, 4, 4]
x[i] += 1
print(x)
[6. 0. 1. 1. 1. 0. 0. 0. 0. 0.]
累加
x = np.zeros(10)
np.add.at(x, i, 1)
print(x)
[0. 0. 1. 2. 4. 0. 0. 0. 0. 0.]
数组的排序
选择排序
import numpy as np
def selection_sort(x):
for i in range(len(x)):
swap = i + np.argmin(x[i:])
(x[i], x[swap]) = (x[swap], x[i])
return x
x = np.array([2, 1, 4, 3, 5])
selection_sort(x)
array([1, 2, 3, 4, 5])
def bogosort(x):
while np.any(x[:-1] > x[1:]):
np.random.shuffle(x)
return x
x = np.array([2, 1, 4, 3, 5])
bogosort(x)
array([1, 2, 3, 4, 5])
NumPy中的快速排序
x = np.array([2, 1, 4, 3, 5])
np.sort(x)
array([1, 2, 3, 4, 5])
x.sort()
print(x)
[1 2 3 4 5]
返回索引值
x = np.array([2, 1, 4, 3, 5])
i = np.argsort(x)
print(i)
[1 0 3 2 4]
x[i]
array([1, 2, 3, 4, 5])
沿着行或列排序
rand = np.random.RandomState(42)
X = rand.randint(0, 10, (4, 6))
print(X)
[[6 3 7 4 6 9]
[2 6 7 4 3 7]
[7 2 5 4 1 7]
[5 1 4 0 9 5]]
对X的每一个行排序
np.sort(X, axis=0)
array([[2, 1, 4, 0, 1, 5],
[5, 2, 5, 4, 3, 7],
[6, 3, 7, 4, 6, 7],
[7, 6, 7, 4, 9, 9]])
对X的每一列排序
np.sort(X, axis=1)
array([[3, 4, 6, 6, 7, 9],
[2, 3, 4, 6, 7, 7],
[1, 2, 4, 5, 7, 7],
[0, 1, 4, 5, 5, 9]])
部分排序
找到第三小的,放在最左边右边则任意排序
x = np.array([7, 2, 3, 1, 6, 5, 4])
np.partition(x, 3)
array([2, 1, 3, 4, 6, 5, 7])
np.partition(X, 2, axis=1)
array([[3, 4, 6, 7, 6, 9],
[2, 3, 4, 7, 6, 7],
[1, 2, 4, 5, 7, 7],
[0, 1, 4, 5, 9, 5]])

本文深入探讨了NumPy库在Python数据科学中的高级应用,包括数组运算、广播规则、比较和掩码操作、布尔逻辑、索引技巧、数组排序以及部分排序方法。通过实例展示了如何高效处理和分析数据。
634

被折叠的 条评论
为什么被折叠?



