Numpy
import numpy as np
1、创建数组
a = np.array([2])
a
array([2])
a = np.array([2,2])
a
array([2, 2])
b = np.array([[1,2],[3,4]])
b
array([[1, 2],
[3, 4]])
2、数组属性
a.ndim
1
a.shape
(2,)
a.dtype
dtype('int32')
a.size
2
b.size
4
3、numpy函数
np.arange(0,1,0.1)
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
np.linspace(1,5,4)
array([1. , 2.33333333, 3.66666667, 5. ])
np.logspace(1,5,4)
array([1.00000000e+01, 2.15443469e+02, 4.64158883e+03, 1.00000000e+05])
np.zeros((2,3))
array([[0., 0., 0.],
[0., 0., 0.]])
np.ones((2,3))
array([[1., 1., 1.],
[1., 1., 1.]])
np.eye(4)
array([[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 1., 0.],
[0., 0., 0., 1.]])
np.diag([3,4,5])
array([[3, 0, 0],
[0, 4, 0],
[0, 0, 5]])
4、随机random
np.random.random(20)
array([5.19883509e-02, 2.87681413e-01, 4.96544076e-01, 2.32602258e-01,
4.48493146e-01, 7.42736891e-01, 4.28938352e-01, 6.64962985e-01,
5.55651140e-01, 2.53305944e-01, 9.24284587e-04, 7.68401309e-01,
9.22384046e-02, 8.58196238e-01, 1.24775337e-01, 9.63342219e-01,
6.86374634e-01, 4.50290470e-01, 3.89865050e-01, 9.49314497e-01])
np.random.rand(10,5)
array([[0.5712833 , 0.10389104, 0.27510421, 0.3776921 , 0.94843083],
[0.3031132 , 0.93306734, 0.84758866, 0.59395116, 0.40025089],
[0.39892501, 0.34362252, 0.36609281, 0.66723175, 0.54943704],
[0.90772769, 0.49961675, 0.39484495, 0.16780487, 0.45405735],
[0.93441868, 0.09611159, 0.77011245, 0.6082336 , 0.52785896],
[0.70769216, 0.96499182, 0.06946402, 0.74354479, 0.51895451],
[0.69492187, 0.89395789, 0.36932701, 0.51750774, 0.47772985],
[0.5991736 , 0.73576445, 0.58542434, 0.90572711, 0.63510628],
[0.99082072, 0.40778598, 0.88699359, 0.86848041, 0.22322259],
[0.45608093, 0.96157002, 0.91763542, 0.82673006, 0.33652886]])
np.random.randn(15)
array([ 1.31892466, -1.12436439, -0.53485163, -0.72648748, -0.70885778,
0.72722358, -0.85759594, 0.55040513, 0.51432475, -1.29170637,
-0.25737761, 1.04657207, -1.15773391, -0.71393983, 0.71314956])
np.random.randint(1,5,size=[3,5])
array([[2, 3, 2, 4, 2],
[3, 1, 1, 3, 1],
[4, 3, 1, 3, 4]])
5、数组的索引
5.1 一维
a = [1,2,3,4,5]
a[3]
4
5.2 二维
a = np.array([[1,2,3],[4,5,6]])
a
array([[1, 2, 3],
[4, 5, 6]])
a[:,1]
array([2, 5])
a[(0,1),(0,1)]
array([1, 5])
5.3 布尔
a = np.array([1,-1,2,-3,3,-2])
a[a>0]
array([1, 2, 3])
b = np.array([[1,2,3],[4,5,6],[7,8,9]])
b > 5
array([[False, False, False],
[False, False, True],
[ True, True, True]])
b[b>5]
array([6, 7, 8, 9])
6、数组变化
6.1、一维变二维
a = np.array([1,2,3,4,5,6])
a.reshape(2,3)
array([[1, 2, 3],
[4, 5, 6]])
6.2、二维变一维
a = np.array([[1,2],[3,4],[5,6]])
a.flatten()
array([1, 2, 3, 4, 5, 6])
a.flatten('F')
array([1, 3, 5, 2, 4, 6])
a.ravel()
array([1, 2, 3, 4, 5, 6])
6.3、拼接
a = np.array([1,2])
b = np.array([3,4])
np.hstack((a,b))
array([1, 2, 3, 4])
np.vstack((a,b))
array([[1, 2],
[3, 4]])
6.4、分割
a = np.array([[1,2],[3,4]])
np.split(a,2,axis=1)
[array([[1],
[3]]), array([[2],
[4]])]
np.hsplit(a,2)
[array([[1],
[3]]), array([[2],
[4]])]
np.vsplit(a,2)
[array([[1, 2]]), array([[3, 4]])]
7、矩阵
7.1、矩阵的创建
np.mat('1 2 3;4 5 6')
matrix([[1, 2, 3],
[4, 5, 6]])
np.matrix([[1,2],[3,4],[5,6]])
matrix([[1, 2],
[3, 4],
[5, 6]])
a = np.mat('1 2 3;4 5 6;7 8 9')
b = np.matrix([[1,2,3],[4,5,6],[7,8,9]])
np.bmat('a b;b a')
matrix([[1, 2, 3, 1, 2, 3],
[4, 5, 6, 4, 5, 6],
[7, 8, 9, 7, 8, 9],
[1, 2, 3, 1, 2, 3],
[4, 5, 6, 4, 5, 6],
[7, 8, 9, 7, 8, 9]])
7.2、矩阵运算
a = np.array([[1,2,3],[4,5,6]])
a.T
array([[1, 4],
[2, 5],
[3, 6]])
7.3、numpy运算
7.3.1 + - * / 比较运算
a = np.array([[1,2],[3,4]])
b = np.array([[1,2],[3,4]])
np.all(a == b)
False
a = np.array([[1,2],[3,4]])
b = np.array([[1,2],[4,5]])
np.any(a == b)
True
7.3.2 广播机制,二维与一维
a = np.array([[1,2],[3,4]])
b = np.array([10,10])
a + b
array([[11, 12],
[13, 14]])
a - b
array([[-9, -8],
[-7, -6]])
a * b
array([[10, 20],
[30, 40]])
8、numpy文件读写
np.save('filename',file)
np.savez('filesname',flile1,file2)
np.load('filename')
np.load('filesname')['arr_0']/['arr_1']
9、统计分析
sort(axis=0)/sort(axis=1)
argsort()
a = np.array([[1,2,3],[1,2,3],[1,2,3]])
np.unique(a)
array([1, 2, 3])
a = np.array([[1,2],[3,4],[5,6]])
a
array([[1, 2],
[3, 4],
[5, 6]])
np.tile(a,2)
array([[1, 2, 1, 2],
[3, 4, 3, 4],
[5, 6, 5, 6]])
a.repeat(2,axis=1)
array([[1, 1, 2, 2],
[3, 3, 4, 4],
[5, 5, 6, 6]])
a.repeat(2,axis=0)
array([[1, 2],
[1, 2],
[3, 4],
[3, 4],
[5, 6],
[5, 6]])
np.sum
np.mean
np.var
np.std
np.max
np.min
np.argmin
np.argmax
np.cumsum
np.cumprod
a = np.array([1,2,3,4,5])
np.cumsum(a)
array([ 1, 3, 6, 10, 15], dtype=int32)
np.cumprod(a)
array([ 1, 2, 6, 24, 120], dtype=int32)
a = np.array([1,2,3,4,5])
b = np.array([6,7,8,9,10])
c = np.array([9,7,5,3,1])
d = np.lexsort((a,b,c))
d
array([4, 3, 2, 1, 0], dtype=int64)
list(zip(a[d],b[d],c[d]))
[(5, 10, 1), (4, 9, 3), (3, 8, 5), (2, 7, 7), (1, 6, 9)]
Matplotlib
import matplotlib.pyplot as plt
plt.figure()
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
x = np.arange(1,10,0.1)
y = np.arange(1,10,0.1)
plt.plot(x,y)

x = [4,5,3,7,1]
y = [0,0,0,0,0.05]
plt.pie(x,y,autopct='%1.1f%%')

x = [1,2,3,4,5]
y = [4,5,3,7,1]
plt.bar(x,y)

x = [1,2,3,4,5]
y = [4,5,3,7,1]
plt.scatter(x,y)

Pandas
import pandas as pd
1、pandas存取文件
pd.read_csv()
pd.read_excel()
pd.read_table()
pd.to_csv()
2、dataframe属性
values
columns
index
dtypes
shape
size
ndim
3、dataframe 增删改查
df = pd.DataFrame({'小写':['a','b','c','d'],'大写':['A','B','C','D']},index=['一','二','三','四'])
df
dataframe['age'].replace(240,30)
dataframe['age'].repalce(np.NaN,30)
dataframe.sort_values(by = ['age'],ascending = False)
dataframe.drop(labels='columes',axis=0,inplace=True)
dataframe.drop(axis=1)
4、查找
dataframe.loc['行名','列名']
dataframe.loc[条件,'列名']
dataframe.iloc[行索引,列索引]
5、统计函数
mode()
median()
count()
value_count()
describe()
[count,max,min,25%,50%,75%,mean,std]
[counts,unique,top,freq]
6、时间处理
a = pd.to_datetime(dataframe)
[i.year for i in a]
[i.month for i in a]
[i.day for i in a]+pd.timedelta(day=1)
7、分组聚合方式
data.groupby(by='1')
sum()
mean()
data['a','b'].agg([np.sum,np.mean])
data.agg({'a':np.sum,'b':np.mean})
data[['a','b']].transform(func)
8、透视表交叉表
pd.pivot_table()
pd.crosstab()
9、Pandas数据预处理
9.1 堆叠
concat([a,b],axis,join)
9.2 merge(主键链接)
merge([a,b],how='inner\outer\left\right',on=['k1','k2'])
9.3 重叠合并
data1.combine_first(data2)
9.4清洗数据
data['a'].drop_duplicates()
data.drop_duplicates(subset['a','b'])
data[['a','b']].corr(method = 'pearson')
'''
3σ原则
如果一个数值于平均值之间的偏差超过3倍的标准差,那么我们可以认为这个值是异常值。
'''
from scipy.interpolate import interp1d
from scipy.interpolate import lagrange
from scipy.interpolate import spline
9.5 标准化
9.6 数据转换
get_dummies()
9.7 数据离散化
人工智能
监督
KNN(k-最近邻算法)
朴素贝叶斯(概率)
回归,岭回归
决策树(ID3,C4.5,CART)
非监督
聚类:K-means
关联算法:aprori,FP-growth
深度学习
cnn(tensorflow,paddlepaddle)
跟踪算法
KM,EKF