前言:DEAP是目前Python领域最流行的GP框架。因此,本系列教程将详细介绍基于DEAP实现GP的各种技巧。如有不足,也欢迎大家提出意见,以便对教程进行更新。
PyTorch加速
DEAP除了可以使用Numpy进行CPU加速,还可以基于PyTorch进行GPU加速。值得一提的是,在评估时,数据需要以Tensor的格式加载,因为传统的Numpy格式不能被GPU处理。当处理大规模数据时,基于PyTorch进行GPU加速可以显著提高计算速度。
import time
import torch
from deap import base, creator, tools, gp
# 确保你的GPU可用,如果不可用,则在CPU上运行
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 符号回归
def evalSymbReg(individual, pset):
# 编译GP树为函数
func = gp.compile(expr=individual, pset=pset)
# 使用torch创建一个张量
xs = torch.linspace(-10, 10, 100, device=device)
# 计算均方误差(MSE)
ys = torch.pow(xs, 2)
squared_errors = torch.pow(func(xs) - ys, 2)
mse = torch.mean(squared_errors).item() # 将结果转换回Python float
return (mse,)
# 创建个体和适应度函数
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
对应的算子一定要替换成PyTorch函数。
# 定义函数集合和终端集合
pset = gp.PrimitiveSet("MAIN", arity=1)
pset.addPrimitive(torch.add, 2)
pset.addPrimitive(torch.sub, 2)
pset.addPrimitive(torch.mul, 2)
pset.addPrimitive(torch.neg, 1)
pset.renameArguments(ARG0='x')
# 定义遗传编程操作
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("evaluate", evalSymbReg, pset=pset)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset)
现在,我们可以开始基于PyTorch进行GPU加速了。
import numpy
from deap import algorithms
# 定义统计指标
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)
# 使用默认算法
torch_time=[]
for i in range(3):
start=time.time()
population = toolbox.population(n=50)
hof = tools.HallOfFame(1)
pop, log = algorithms.eaSimple(population=population,
toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)
end=time.time()
print('time:',end-start)
torch_time.append(end-start)
print(str(hof[0]))
fitness size
----------------------------------------------------------- -----------------------------------------------
gen nevals avg gen max min nevals std avg gen max min nevals std
0 50 477687 0 1.17257e+07 0 50 2.29619e+06 4.68 0 7 2 50 1.84868
1 47 1354.51 1 8359.39 0 47 1438.73 4.62 1 9 2 47 1.9989
2 48 13450.8 2 153712 0 48 41448.7 5.22 2 13 2 48 2.44369
3 46 6779 3 153712 0 46 29901 4.5 3 13 3 46 2.26495
4 43 6690.99 4 153712 0 43 30128 3.96 4 11 3 43 1.84347
5 45 12718 5 157909 0 45 41896.2 4.48 5 13 3 45 2.32585
6 48 3996.83 6 153712 0 48 21606.1 3.54 6 9 3 48 1.25236
7 48 3826.92 7 153712 0 48 21604.5 3.3 7 9 2 48 1.04403
8 47 6731.25 8 153712 0 47 30137.6 3.38 8 9 3 47 1.19817
9 47 6400.96 9 153712 0 47 30094.7 3.22 9 8 3 47 0.807217
10 44 3240.75 10 153712 0 44 21503.3 3.28 10 7 3 44 0.825591
time: 0.038022756576538086
sub(mul(x, x), sub(x, x))
fitness size
----------------------------------------------- -----------------------------------------------
gen nevals avg gen max min nevals std avg gen max min nevals std
0 50 2766.28 0 18732.1 0 50 3573.52 4.12 0 7 2 50 1.77358
1 42 1560.6 1 2217.37 0 42 908.891 4.04 1 9 2 42 1.69658
2 44 4293.66 2 153712 0 44 21418.1 4.2 2 9 3 44 1.6
3 50 10572.5 3 157909 0 50 36647.1 4.18 3 11 3 50 1.77415
4 44 6534 4 153712 0 44 30053.6 3.84 4 9 3 44 1.55383
5 46 12299 5 608604 0 46 85187.8 3.76 5 7 3 46 1.37928
6 47 3783.26 6 159956 0 47 22376.3 3.92 6 9 3 47 1.56
7 45 9641.7 7 170363 0 45 37852 4.04 7 10 3 45 1.77719
8 44 83.2539 8 2081.35 0 44 407.859 3.58 8 10 3 44 1.62592
9 42 3115.87 9 153712 0 42 21515.7 3.32 9 7 3 42 1.00876
10 46 542.51 10 8325.39 0 46 1707.12 3.56 10 8 2 46 1.29861
time: 0.12821197509765625
mul(x, x)
fitness size
----------------------------------------------- -----------------------------------------------
gen nevals avg gen max min nevals std avg gen max min nevals std
0 50 8194.86 0 153712 0 50 29842.5 4.34 0 7 2 50 1.88266
1 50 241765 1 1.17257e+07 0 50 1.64084e+06 4.22 1 9 2 50 1.85785
2 44 2.00741e+07 2 1.00337e+09 0 44 1.4047e+08 4.3 2 11 2 44 1.88944
3 46 15462.6 3 608604 0 46 87345.6 3.58 3 9 3 46 1.32801
4 43 6607.05 4 153712 0 43 30143.5 3.52 4 9 3 43 1.37463
5 41 6772.88 5 153712 0 41 30129.7 3.64 5 9 3 41 1.50678
6 46 666.031 6 18732.1 0 46 2867.13 3.72 6 9 3 46 1.63756
7 44 2.03085e+07 7 1.00337e+09 0 44 1.40447e+08 3.64 7 9 3 44 1.60947
8 44 51862.8 8 2.42817e+06 0 44 340152 3.62 8 9 3 44 1.70751
9 46 9222.71 9 153712 0 46 36504.6 3.68 9 9 3 46 1.76
10 47 15371.2 10 608604 0 47 87360.6 3.76 10 9 3 47 1.78393
time: 0.028471946716308594
mul(x, x)
import numpy as np
# 定义函数集合和终端集合
pset = gp.PrimitiveSet("MAIN", arity=1)
pset.addPrimitive(np.add, 2)
pset.addPrimitive(np.subtract, 2)
pset.addPrimitive(np.multiply, 2)
pset.addPrimitive(np.negative, 1)
# 定义遗传编程操作
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset)
# 慢速评估
def evalSymbRegSlow(individual, pset):
# 编译GP树为函数
func = gp.compile(expr=individual, pset=pset)
# 创建评估数据
xs = [x/5.0 for x in range(-50, 51)]
# 评估生成的函数并计算MSE
mse = sum((func(x) - x**2)**2 for x in xs) / len(xs)
return (mse,)
toolbox.register("evaluate", evalSymbRegSlow, pset=pset)
py_time=[]
for i in range(3):
start=time.time()
population = toolbox.population(n=50)
hof = tools.HallOfFame(1)
pop, log = algorithms.eaSimple(population=population,
toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)
end=time.time()
print('time:',end-start)
py_time.append(end-start)
fitness size
----------------------------------------------------------- -----------------------------------------------
gen nevals avg gen max min nevals std avg gen max min nevals std
0 50 474278 0 1.17166e+07 0 50 2.29493e+06 4.2 0 7 2 50 1.72047
1 45 1731.19 1 8458.11 0 45 1660.56 4.6 1 12 2 45 2.22711
2 48 4292.07 2 153622 0 48 21381.6 4.54 2 9 2 48 2.09962
3 48 7275.17 3 159864 0 48 30871.6 4.36 3 10 3 48 2.05679
4 44 13372.3 4 159864 0 44 41948 3.98 4 10 3 44 1.87072
5 40 16029.4 5 608247 0 40 87893.1 3.92 5 9 3 40 1.78706
6 50 12798.6 6 153622 0 50 41306.8 3.9 6 9 3 50 1.66433
7 50 958755 7 4.74663e+07 0 50 6.64403e+06 3.92 7 11 3 50 1.79822
8 44 6687.19 8 153622 0 44 30041.3 3.64 8 9 3 44 1.41082
9 46 55513.4 9 2.42675e+06 0 46 340080 3.46 9 9 3 46 1.09927
10 49 3571.77 10 153622 0 49 21526.6 3.1 10 5 3 49 0.360555
time: 0.09351873397827148
fitness size
----------------------------------------------- -----------------------------------------------
gen nevals avg gen max min nevals std avg gen max min nevals std
0 50 1787.76 0 8322.11 0 50 1632.29 3.94 0 7 2 50 1.80455
1 47 7380.16 1 153622 0 47 29903.7 5.02 1 11 2 47 2.13063
2 47 259596 2 1.24743e+07 0 47 1.74534e+06 4.16 2 9 3 47 1.61691
3 43 237865 3 1.17166e+07 0 43 1.63997e+06 4.06 3 12 2 43 2.12988
4 40 6312.01 4 153622 0 40 30074.9 3.72 4 12 3 40 1.7893
5 38 3365.08 5 153622 0 38 21501.6 3.52 5 12 3 38 1.47296
6 40 16152.8 6 608247 0 40 87901.5 4.2 6 12 3 40 2.63059
7 44 42.2906 7 2114.53 0 44 296.034 3.42 7 12 2 44 1.61357
8 46 961702 8 4.74663e+07 0 46 6.64406e+06 4.12 8 12 3 46 2.19672
9 43 955804 9 4.74663e+07 0 43 6.64442e+06 4.3 9 12 3 43 2.34307
10 46 10091.8 10 184830 0 46 39557.3 3.92 10 11 3 46 1.84217
time: 0.0885322093963623
fitness size
----------------------------------------------- -----------------------------------------------
gen nevals avg gen max min nevals std avg gen max min nevals std
0 50 24284.7 0 608247 0 50 90848.5 4.04 0 7 2 50 1.68476
1 41 1058.62 1 2216.53 0 41 1057.79 4.02 1 13 2 41 2.05417
2 50 2.10053e+07 2 1.0024e+09 0 50 1.40357e+08 4.48 2 11 2 50 2.10941
3 46 234756 3 1.17166e+07 0 46 1.64026e+06 3.74 3 7 3 46 1.36836
4 45 3238.89 4 153622 0 45 21490.7 3.5 4 9 3 45 1.36015
5 48 416.786 5 8322.11 0 48 1665.11 3.24 5 6 3 48 0.708802
6 40 1042.98 6 8322.11 0 40 2703.71 3.32 6 7 3 40 0.76
7 47 175.962 7 2386.53 0 47 597.665 3.12 7 6 2 47 0.587878
8 44 3155.67 8 153622 0 44 21499.1 3.14 8 6 3 44 0.566039
9 44 421.546 9 8458.11 0 44 1680.2 3.3 9 9 3 44 1.13578
10 44 6186.5 10 153622 0 44 30096.6 3.18 10 7 3 44 0.74
time: 0.08585286140441895
最后,我们可以使用seaborn绘制一个图来比较PyTorch和Python的性能。可以看出,即便不使用GPU,PyTorch也可以提高速度。
from IPython.display import Image
import pandas as pd
import plotly.express as px
data = pd.DataFrame({'Category': ['PyTorch'] * len(torch_time) + ['Python'] * len(py_time),
'Time': np.concatenate([torch_time, py_time])})
fig = px.box(data, x='Category', y='Time', color='Category', width=400, height=400)
fig.update_layout(
xaxis_title='',
yaxis_title='Time',
title='Comparison of PyTorch and Python'
)
速度对比
感兴趣的小伙伴,赠送全套Python学习资料,包含面试题、简历资料等具体看下方。
一、Python所有方向的学习路线
Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照下面的知识点去找对应的学习资源,保证自己学得较为全面。
二、Python必备开发工具
工具都帮大家整理好了,安装就可直接上手!
三、最新Python学习笔记
当我学到一定基础,有自己的理解能力的时候,会去阅读一些前辈整理的书籍或者手写的笔记资料,这些笔记详细记载了他们对一些技术点的理解,这些理解是比较独到,可以学到不一样的思路。
四、Python视频合集
观看全面零基础学习视频,看视频学习是最快捷也是最有效果的方式,跟着视频中老师的思路,从基础到深入,还是很容易入门的。
五、实战案例
纸上得来终觉浅,要学会跟着视频一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。
六、面试宝典
简历模板
