【入门教程】基于Python DEAP框架实现遗传编程，基于PyTorch GPU加速

认真写程序的强哥

已于 2024-01-20 10:32:26 修改

阅读量1.1k

点赞数

CC 4.0 BY-SA版权

文章标签： python pytorch 开发语言 Python编程 Python学习人工智能

于 2023-11-16 10:50:33 首次发布

本文链接：https://blog.youkuaiyun.com/qq_41314882/article/details/134436840

前言：DEAP是目前Python领域最流行的GP框架。因此，本系列教程将详细介绍基于DEAP实现GP的各种技巧。如有不足，也欢迎大家提出意见，以便对教程进行更新。

PyTorch加速

DEAP除了可以使用Numpy进行CPU加速，还可以基于PyTorch进行GPU加速。值得一提的是，在评估时，数据需要以Tensor的格式加载，因为传统的Numpy格式不能被GPU处理。当处理大规模数据时，基于PyTorch进行GPU加速可以显著提高计算速度。

import time

import torch
from deap import base, creator, tools, gp

# 确保你的GPU可用，如果不可用，则在CPU上运行
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 符号回归
def evalSymbReg(individual, pset):
    # 编译GP树为函数
    func = gp.compile(expr=individual, pset=pset)

   # 使用torch创建一个张量
    xs = torch.linspace(-10, 10, 100, device=device)
    
    # 计算均方误差（MSE）
    ys = torch.pow(xs, 2)
    squared_errors = torch.pow(func(xs) - ys, 2)
    mse = torch.mean(squared_errors).item()  # 将结果转换回Python float
    
    return (mse,)

# 创建个体和适应度函数
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

对应的算子一定要替换成PyTorch函数。

# 定义函数集合和终端集合
pset = gp.PrimitiveSet("MAIN", arity=1)
pset.addPrimitive(torch.add, 2)
pset.addPrimitive(torch.sub, 2)
pset.addPrimitive(torch.mul, 2)
pset.addPrimitive(torch.neg, 1)
pset.renameArguments(ARG0='x')

# 定义遗传编程操作
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("evaluate", evalSymbReg, pset=pset)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset)

现在，我们可以开始基于PyTorch进行GPU加速了。

import numpy
from deap import algorithms

# 定义统计指标
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)

# 使用默认算法
torch_time=[]
for i in range(3):
    start=time.time()
    population = toolbox.population(n=50)
    hof = tools.HallOfFame(1)
    pop, log  = algorithms.eaSimple(population=population,
                               toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)
    end=time.time()
    print('time:',end-start)
    torch_time.append(end-start)
    print(str(hof[0]))

   	      	                          fitness                          	                      size                       
   	      	-----------------------------------------------------------	-----------------------------------------------  
gen	nevals	avg   	gen	max        	min	nevals	std        	avg 	gen	max	min	nevals	std      
0  	50    	477687	0  	1.17257e+07	0  	50    	2.29619e+06	4.68	0  	7  	2  	50    	1.84868  
1  	47    	1354.51	1  	8359.39    	0  	47    	1438.73    	4.62	1  	9  	2  	47    	1.9989   
2  	48    	13450.8	2  	153712     	0  	48    	41448.7    	5.22	2  	13 	2  	48    	2.44369  
3  	46    	6779   	3  	153712     	0  	46    	29901      	4.5 	3  	13 	3  	46    	2.26495  
4  	43    	6690.99	4  	153712     	0  	43    	30128      	3.96	4  	11 	3  	43    	1.84347  
5  	45    	12718  	5  	157909     	0  	45    	41896.2    	4.48	5  	13 	3  	45    	2.32585  
6  	48    	3996.83	6  	153712     	0  	48    	21606.1    	3.54	6  	9  	3  	48    	1.25236  
7  	48    	3826.92	7  	153712     	0  	48    	21604.5    	3.3 	7  	9  	2  	48    	1.04403  
8  	47    	6731.25	8  	153712     	0  	47    	30137.6    	3.38	8  	9  	3  	47    	1.19817  
9  	47    	6400.96	9  	153712     	0  	47    	30094.7    	3.22	9  	8  	3  	47    	0.807217  
10 	44    	3240.75	10 	153712     	0  	44    	21503.3    	3.28	10 	7  	3  	44    	0.825591  
time: 0.038022756576538086  
sub(mul(x, x), sub(x, x))  
   	      	                    fitness                    	                      size                       
   	      	-----------------------------------------------	-----------------------------------------------  
gen	nevals	avg    	gen	max    	min	nevals	std    	avg 	gen	max	min	nevals	std      
0  	50    	2766.28	0  	18732.1	0  	50    	3573.52	4.12	0  	7  	2  	50    	1.77358  
1  	42    	1560.6 	1  	2217.37	0  	42    	908.891	4.04	1  	9  	2  	42    	1.69658  
2  	44    	4293.66	2  	153712 	0  	44    	21418.1	4.2 	2  	9  	3  	44    	1.6      
3  	50    	10572.5	3  	157909 	0  	50    	36647.1	4.18	3  	11 	3  	50    	1.77415  
4  	44    	6534   	4  	153712 	0  	44    	30053.6	3.84	4  	9  	3  	44    	1.55383  
5  	46    	12299  	5  	608604 	0  	46    	85187.8	3.76	5  	7  	3  	46    	1.37928  
6  	47    	3783.26	6  	159956 	0  	47    	22376.3	3.92	6  	9  	3  	47    	1.56     
7  	45    	9641.7 	7  	170363 	0  	45    	37852  	4.04	7  	10 	3  	45    	1.77719  
8  	44    	83.2539	8  	2081.35	0  	44    	407.859	3.58	8  	10 	3  	44    	1.62592  
9  	42    	3115.87	9  	153712 	0  	42    	21515.7	3.32	9  	7  	3  	42    	1.00876  
10 	46    	542.51 	10 	8325.39	0  	46    	1707.12	3.56	10 	8  	2  	46    	1.29861  
time: 0.12821197509765625  
mul(x, x)  
   	      	                    fitness                    	                      size                       
   	      	-----------------------------------------------	-----------------------------------------------  
gen	nevals	avg    	gen	max   	min	nevals	std    	avg 	gen	max	min	nevals	std      
0  	50    	8194.86	0  	153712	0  	50    	29842.5	4.34	0  	7  	2  	50    	1.88266  
1  	50    	241765 	1  	1.17257e+07	0  	50    	1.64084e+06	4.22	1  	9  	2  	50    	1.85785  
2  	44    	2.00741e+07	2  	1.00337e+09	0  	44    	1.4047e+08 	4.3 	2  	11 	2  	44    	1.88944  
3  	46    	15462.6    	3  	608604     	0  	46    	87345.6    	3.58	3  	9  	3  	46    	1.32801  
4  	43    	6607.05    	4  	153712     	0  	43    	30143.5    	3.52	4  	9  	3  	43    	1.37463  
5  	41    	6772.88    	5  	153712     	0  	41    	30129.7    	3.64	5  	9  	3  	41    	1.50678  
6  	46    	666.031    	6  	18732.1    	0  	46    	2867.13    	3.72	6  	9  	3  	46    	1.63756  
7  	44    	2.03085e+07	7  	1.00337e+09	0  	44    	1.40447e+08	3.64	7  	9  	3  	44    	1.60947  
8  	44    	51862.8    	8  	2.42817e+06	0  	44    	340152     	3.62	8  	9  	3  	44    	1.70751  
9  	46    	9222.71    	9  	153712     	0  	46    	36504.6    	3.68	9  	9  	3  	46    	1.76     
10 	47    	15371.2    	10 	608604     	0  	47    	87360.6    	3.76	10 	9  	3  	47    	1.78393  
time: 0.028471946716308594  
mul(x, x)

import numpy as np

# 定义函数集合和终端集合
pset = gp.PrimitiveSet("MAIN", arity=1)
pset.addPrimitive(np.add, 2)
pset.addPrimitive(np.subtract, 2)
pset.addPrimitive(np.multiply, 2)
pset.addPrimitive(np.negative, 1)

# 定义遗传编程操作
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset)

# 慢速评估
def evalSymbRegSlow(individual, pset):
    # 编译GP树为函数
    func = gp.compile(expr=individual, pset=pset)
    
    # 创建评估数据
    xs = [x/5.0 for x in range(-50, 51)]
    
    # 评估生成的函数并计算MSE
    mse = sum((func(x) - x**2)**2 for x in xs) / len(xs)
    
    return (mse,)


toolbox.register("evaluate", evalSymbRegSlow, pset=pset)

py_time=[]
for i in range(3):
    start=time.time()
    population = toolbox.population(n=50)
    hof = tools.HallOfFame(1)
    pop, log  = algorithms.eaSimple(population=population,
                               toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)
    end=time.time()
    print('time:',end-start)
    py_time.append(end-start)

   	      	                          fitness                          	                      size                       
   	      	-----------------------------------------------------------	-----------------------------------------------  
gen	nevals	avg   	gen	max        	min	nevals	std        	avg	gen	max	min	nevals	std      
0  	50    	474278	0  	1.17166e+07	0  	50    	2.29493e+06	4.2	0  	7  	2  	50    	1.72047  
1  	45    	1731.19	1  	8458.11    	0  	45    	1660.56    	4.6	1  	12 	2  	45    	2.22711  
2  	48    	4292.07	2  	153622     	0  	48    	21381.6    	4.54	2  	9  	2  	48    	2.09962  
3  	48    	7275.17	3  	159864     	0  	48    	30871.6    	4.36	3  	10 	3  	48    	2.05679  
4  	44    	13372.3	4  	159864     	0  	44    	41948      	3.98	4  	10 	3  	44    	1.87072  
5  	40    	16029.4	5  	608247     	0  	40    	87893.1    	3.92	5  	9  	3  	40    	1.78706  
6  	50    	12798.6	6  	153622     	0  	50    	41306.8    	3.9 	6  	9  	3  	50    	1.66433  
7  	50    	958755 	7  	4.74663e+07	0  	50    	6.64403e+06	3.92	7  	11 	3  	50    	1.79822  
8  	44    	6687.19	8  	153622     	0  	44    	30041.3    	3.64	8  	9  	3  	44    	1.41082  
9  	46    	55513.4	9  	2.42675e+06	0  	46    	340080     	3.46	9  	9  	3  	46    	1.09927  
10 	49    	3571.77	10 	153622     	0  	49    	21526.6    	3.1 	10 	5  	3  	49    	0.360555  
time: 0.09351873397827148  
   	      	                    fitness                    	                      size                       
   	      	-----------------------------------------------	-----------------------------------------------  
gen	nevals	avg    	gen	max    	min	nevals	std    	avg 	gen	max	min	nevals	std      
0  	50    	1787.76	0  	8322.11	0  	50    	1632.29	3.94	0  	7  	2  	50    	1.80455  
1  	47    	7380.16	1  	153622 	0  	47    	29903.7	5.02	1  	11 	2  	47    	2.13063  
2  	47    	259596 	2  	1.24743e+07	0  	47    	1.74534e+06	4.16	2  	9  	3  	47    	1.61691  
3  	43    	237865 	3  	1.17166e+07	0  	43    	1.63997e+06	4.06	3  	12 	2  	43    	2.12988  
4  	40    	6312.01	4  	153622     	0  	40    	30074.9    	3.72	4  	12 	3  	40    	1.7893   
5  	38    	3365.08	5  	153622     	0  	38    	21501.6    	3.52	5  	12 	3  	38    	1.47296  
6  	40    	16152.8	6  	608247     	0  	40    	87901.5    	4.2 	6  	12 	3  	40    	2.63059  
7  	44    	42.2906	7  	2114.53    	0  	44    	296.034    	3.42	7  	12 	2  	44    	1.61357  
8  	46    	961702 	8  	4.74663e+07	0  	46    	6.64406e+06	4.12	8  	12 	3  	46    	2.19672  
9  	43    	955804 	9  	4.74663e+07	0  	43    	6.64442e+06	4.3 	9  	12 	3  	43    	2.34307  
10 	46    	10091.8	10 	184830     	0  	46    	39557.3    	3.92	10 	11 	3  	46    	1.84217  
time: 0.0885322093963623  
   	      	                    fitness                    	                      size                       
   	      	-----------------------------------------------	-----------------------------------------------  
gen	nevals	avg    	gen	max   	min	nevals	std    	avg 	gen	max	min	nevals	std      
0  	50    	24284.7	0  	608247	0  	50    	90848.5	4.04	0  	7  	2  	50    	1.68476  
1  	41    	1058.62	1  	2216.53	0  	41    	1057.79	4.02	1  	13 	2  	41    	2.05417  
2  	50    	2.10053e+07	2  	1.0024e+09	0  	50    	1.40357e+08	4.48	2  	11 	2  	50    	2.10941  
3  	46    	234756     	3  	1.17166e+07	0  	46    	1.64026e+06	3.74	3  	7  	3  	46    	1.36836  
4  	45    	3238.89    	4  	153622     	0  	45    	21490.7    	3.5 	4  	9  	3  	45    	1.36015  
5  	48    	416.786    	5  	8322.11    	0  	48    	1665.11    	3.24	5  	6  	3  	48    	0.708802  
6  	40    	1042.98    	6  	8322.11    	0  	40    	2703.71    	3.32	6  	7  	3  	40    	0.76      
7  	47    	175.962    	7  	2386.53    	0  	47    	597.665    	3.12	7  	6  	2  	47    	0.587878  
8  	44    	3155.67    	8  	153622     	0  	44    	21499.1    	3.14	8  	6  	3  	44    	0.566039  
9  	44    	421.546    	9  	8458.11    	0  	44    	1680.2     	3.3 	9  	9  	3  	44    	1.13578   
10 	44    	6186.5     	10 	153622     	0  	44    	30096.6    	3.18	10 	7  	3  	44    	0.74      
time: 0.08585286140441895

最后，我们可以使用seaborn绘制一个图来比较PyTorch和Python的性能。可以看出，即便不使用GPU，PyTorch也可以提高速度。

from IPython.display import Image
import pandas as pd
import plotly.express as px

data = pd.DataFrame({'Category': ['PyTorch'] * len(torch_time) + ['Python'] * len(py_time),
                     'Time': np.concatenate([torch_time, py_time])})

fig = px.box(data, x='Category', y='Time', color='Category', width=400, height=400)
fig.update_layout(
    xaxis_title='',
    yaxis_title='Time',
    title='Comparison of PyTorch and Python'
)