事先声明,本人为在读大学生,并非专业人士,因此对白鲸优化算法的解读更趋近于个人理解,而不是学术理解,至于优化,是本人在一次竞赛时想到的点子,主要作用于限制条件太多时间复杂度极大导致无法输出结果。
项目已经在github开源,包括核心代码,比赛论文(仅供参考)与竞赛数据,各位reader可以试一试算法效果,如果觉得有点帮助的话麻烦点个收藏,灰常感谢doge。
下面是我在一次竞赛适用的多目标白鲸优化算法的代码
import numpy as np
import pandas as pd
import random
class WhaleOptimizer:
def __init__(self, data_file, max_iter=3, whale_num=2):
self.data = pd.read_excel(data_file, sheet_name='修改后数据')
self.max_iter = max_iter
self.whale_num = whale_num
self.dim = (self.data.shape[0], 7)
self.LB = np.zeros(self.dim) # 下界设为0
self.UB = np.full(self.dim, 3)
self.X = np.random.uniform(0, 1, (whale_num, self.dim)) * (self.UB - self.LB) + self.LB
self.X = np.round(self.X).astype(int) # 将 X 取整并转换为整数类型
self.V = np.zeros((whale_num, self.dim))
self.gBest_score = 0 # 初始评分为0
self.gBest_X = self.X[0, :].copy() # 初始化为随机一个鲸鱼的位置
def fitFunc(self, input):
edible_quantity = np.array(input)
protein = np.array(self.data['蛋白质g'])
cash = np.array(self.data['价格'])
# 将数据分成早、中、晚三组
breakfast_indices = range(33)
lunch_indices = range(33, 92)
dinner_indices = range(92, 141)
# 计算每组的得分
breakfast_score = self.calculate_score(edible_quantity, protein, breakfast_indices)
lunch_score = self.calculate_score(edible_quantity, protein, lunch_indices)
dinner_score = self.calculate_score(edible_quantity, protein, dinner_indices)
# 将得分相加得到总分
total_score = (breakfast_score + lunch_score + dinner_score)/3
save_economics = - sum(cash * edible_quantity)
final_score = total_score + save_economics
return final_score
def seperate_score(self, input):
edible_quantity = np.array(input)
protein = np.array(self.data['蛋白质g'])
cash = np.array(self.data['价格'])
# 将数据分成早、中、晚三组
breakfast_indices = range(33)
lunch_indices = range(33, 92)
dinner_indices = range(92, 141)
# 计算每组的得分
breakfast_score = self.calculate_score(edible_quantity, protein, breakfast_indices)
lunch_score = self.calculate_score(edible_quantity, protein, lunch_indices)
dinner_score = self.calculate_score(edible_quantity, protein, dinner_indices)
# 将得分相加得到总分
total_score = (breakfast_score + lunch_score + dinner_score) / 3
save_economics = - sum(cash * edible_quantity)
return total_score,save_economics
def calculate_score(self, edible_quantity, protein, indices):
amino_acid_1 = sum(np.array(self.data['异亮氨酸'])[indices] * edible_quantity[indices])
amino_acid_2 = sum(np.array(self.data['亮氨酸'])[indices] * edible_quantity[indices])
amino_acid_3 = sum(np.array(self.data['赖氨酸'])[indices] * edible_quantity[indices])
amino_acid_4 = sum(np.array(self.data['含硫氨基酸'])[indices] * edible_quantity[indices])
amino_acid_5 = sum(np.array(self.data['芳香族氨基酸'])[indices] * edible_quantity[indices])
amino_acid_6 = sum(np.array(self.data['苏氨酸'])[indices] * edible_quantity[indices])
amino_acid_7 = sum(np.array(self.data['色氨酸'])[indices] * edible_quantity[indices])
amino_acid_8 = sum(np.array(self.data['缬氨酸'])[indices] * edible_quantity[indices])
meal_protein = sum(edible_quantity[indices] * protein[indices])
if meal_protein == 0:
return -np.inf # 避免除零错误
scores = np.array([
amino_acid_1 / (meal_protein * 40 * 0.01),
amino_acid_2 / (meal_protein * 70 * 0.01),
amino_acid_3 / (meal_protein * 55 * 0.01),
amino_acid_4 / (meal_protein * 35 * 0.01),
amino_acid_5 / (meal_protein * 60 * 0.01),
amino_acid_6 / (meal_protein * 40 * 0.01),
amino_acid_7 / (meal_protein * 10 * 0.01),
amino_acid_8 / (meal_protein * 50 * 0.01)
])
score = np.min(scores)
return score
def apply_constraints(self,):
energy = np.array(self.data['能量'])
protein = np.array(self.data['蛋白质g'])
fat = np.array(self.data['脂肪g'])
carbohydrate = np.array(self.data['碳水化合物g'])
boy_nonproductive_nutrient = self.data.loc[:, '钙mg':'维生素Cmg'].values
standard_boy_nonproductive_nutrient = [800,12,12.5,800,1.4,1.4,100]
perfect_matrix = []
while len(perfect_matrix) < self.whale_num:
X = self.X
for i in range(self.whale_num):
num_active_vars = np.random.randint(9, 13)
active_indices = np.random.choice(self.dim, num_active_vars, replace=False)
inactive_indices = np.setdiff1d(np.arange(self.dim), active_indices)
X[i, inactive_indices] = 0
for i in range(self.whale_num):
total_energy = np.sum(X[i] * energy)
while total_energy > 2640:
non_zero_indices = np.where(X[i] > 0)[0]
if len(non_zero_indices) == 0:
break
idx = np.random.choice(non_zero_indices)
X[i, idx] = max(X[i, idx] - 1, 0)
total_energy = np.sum(X[i] * energy)
while total_energy <= 2160:
idx = np.random.choice(self.dim)
X[i, idx] = min(X[i, idx] + 1, self.UB[idx])
total_energy = np.sum(X[i] * energy)
protein_energy_ratio = np.sum(X[i] * protein * 4) / total_energy
fat_energy_ratio = np.sum(X[i] * fat * 9) / total_energy
carbohydrate_energy_ratio = np.sum(X[i] * carbohydrate * 4) / total_energy
if 0.10 <= protein_energy_ratio <= 0.15 and 0.20 <= fat_energy_ratio <= 0.30 and 0.50 <= carbohydrate_energy_ratio <= 0.65:
# print("protein_energy_ratio",protein_energy_ratio)
# print("fat_energy_ratio",fat_energy_ratio)
# print("carbohydrate_energy_ratio",carbohydrate_energy_ratio)
nutrient_totals = np.sum(X[:, :, np.newaxis] * boy_nonproductive_nutrient, axis=1)
deviation = nutrient_totals - standard_boy_nonproductive_nutrient
uniform_deviation = deviation/standard_boy_nonproductive_nutrient
# print("筛选前",uniform_deviation)
filtered_matrices = uniform_deviation[np.all(np.abs(uniform_deviation) < 0.5, axis=1)]
if filtered_matrices.size > 0:
energy_breakfast = (np.sum(X[:, :33] * energy[:33], axis=1))/total_energy
energy_lunch = (np.sum(X[:, 33:92] * energy[33:92], axis=1))/total_energy
energy_dinner = (np.sum(X[:, 92:] * energy[92:], axis=1))/total_energy
if (np.all((0.25 <= energy_breakfast[i]) & (energy_breakfast[i] <= 0.35)) and
np.all((0.30 <= energy_lunch[i]) & (energy_lunch[i] <= 0.40)) and
np.all((0.30 <= energy_dinner[i]) & (energy_dinner[i] <= 0.40))):
perfect_matrix.append(X[i])
print(len(perfect_matrix))
continue
else:
pass
else:
pass
else:
pass
return np.array(perfect_matrix)
def opt(self):
t = 0
while t < self.max_iter:
self.X = self.apply_constraints() # Apply constraints before evaluation
for i in range(self.whale_num):
self.X[i, :] = np.clip(self.X[i, :], self.LB, self.UB) # 检查边界
fit = self.fitFunc(self.X[i, :])
# 更新全局最优解
if fit > self.gBest_score and self.check_constraints(self.X[i, :]):
self.gBest_score = fit
self.gBest_X = self.X[i, :].copy()
print(self.gBest_X)
print(self.gBest_score)
a = 2 * (self.max_iter - t) / self.max_iter
for i in range(self.whale_num):
p = np.random.uniform()
R1 = np.random.uniform()
R2 = np.random.uniform()
A = 2 * a * R1 - a
C = 2 * R2
l = 2 * np.random.uniform() - 1
if p >= 0.5:
D = abs(self.gBest_X - self.X[i, :])
self.V[i, :] += A * D
else:
if abs(A) < 1:
D = abs(C * self.gBest_X - self.X[i, :])
self.V[i, :] += D
else:
rand_index = np.random.randint(low=0, high=self.whale_num)
X_rand = self.X[rand_index, :]
D = abs(X_rand - self.X[i, :])
self.V[i, :] += D
# 将 V 取整并强制为整数类型
self.V[i, :] = np.round(self.V[i, :]).astype(int)
# 更新位置后将 X 强制为整数类型
self.X[i, :] = self.X[i, :].astype(int) + self.V[i, :]
self.X[i, :] = np.clip(self.X[i, :], self.LB, self.UB)
# 检查并更新全局最优解
fit = self.fitFunc(self.X[i, :])
if fit > self.gBest_score and self.check_constraints(self.X[i, :]):
self.gBest_score = fit
self.gBest_X = self.X[i, :].copy()
print(self.gBest_X)
print(self.gBest_score)
if t % 10 == 0:
print(f'At iteration: {t}, Best Score: {self.gBest_score}')
t += 1
return self.gBest_score, self.gBest_X
def check_constraints(self,gBest_X):
energy = np.array(self.data['能量'])
protein = np.array(self.data['蛋白质g'])
fat = np.array(self.data['脂肪g'])
carbohydrate = np.array(self.data['碳水化合物g'])
boy_nonproductive_nutrient = self.data.loc[:, '钙mg':'维生素Cmg'].values
standard_boy_nonproductive_nutrient = [800, 12, 12.5, 800, 1.4, 1.4, 100]
total_energy = np.sum(gBest_X * energy)
protein_energy_ratio = np.sum(gBest_X * protein * 4) / total_energy
fat_energy_ratio = np.sum(gBest_X * fat * 9) / total_energy
carbohydrate_energy_ratio = np.sum(gBest_X * carbohydrate * 4) / total_energy
energy_breakfast = (np.sum(gBest_X[:33] * energy[:33])) / total_energy
energy_lunch = (np.sum(gBest_X[33:92] * energy[33:92])) / total_energy
energy_dinner = (np.sum(gBest_X[92:] * energy[92:])) / total_energy
nutrient_totals = np.sum(gBest_X[:, np.newaxis] * boy_nonproductive_nutrient, axis=0)
deviation = nutrient_totals - standard_boy_nonproductive_nutrient
uniform_deviation = deviation / standard_boy_nonproductive_nutrient
if (2160 <= total_energy <= 2640 and
0.10 <= protein_energy_ratio <= 0.15 and
0.20 <= fat_energy_ratio <= 0.30 and
0.50 <= carbohydrate_energy_ratio <= 0.65 and
np.all(abs(uniform_deviation) < 0.5) and
0.25 <= energy_breakfast <= 0.35 and
0.30 <= energy_lunch <= 0.40 and
0.30 <= energy_dinner <= 0.40):
return True
else:
return False
# 修改参数以适应新场景
data_file = "优化模型数据.xlsx"
optimizer = WhaleOptimizer(data_file)
gBest_score, gBest_X = optimizer.opt()
comment_twin = optimizer.seperate_score(gBest_X)
print("最大总分评分", gBest_score)
print("最优解", gBest_X)
print("AAS评分, 经济评分",comment_twin)
白鲸优化算法解读
白鲸优化算法,顾名思义,是一种以白鲸的习性为参考的启发式优化模型。
启发式优化模型,简单理解就是在一个三维立体空间内,x,y为变量,z是值,然后根据x,y的不同,得到一个关于z的曲面,种群数就是随机生成n个点在这个曲面上,然后让他们去找最优解;同时为避免陷入局部最优解,各个算法通过不同的办法,比如遗传算法的变异和交叉,退火算法的有概率跳出,又或者是像白鲸算法的白鲸搜寻操作,本质都是一样的。
总结就是,通过一些初始点,让他们去找他们自己认为的最优解,如果有更好的就更新,没有就不更新,然后再辅以跳出全局最优的手段,去尽可能搜寻全局最优。
class WhaleOptimizer:
def __init__(self, data_file, max_iter=3, whale_num=2):
self.data = pd.read_excel(data_file, sheet_name='修改后数据')
self.max_iter = max_iter
self.whale_num = whale_num
self.dim = (self.data.shape[0], 7)
self.LB = np.zeros(self.dim) # 下界设为0
self.UB = np.full(self.dim, 3)
self.X = np.random.uniform(0, 1, (whale_num, self.dim)) * (self.UB - self.LB) + self.LB
self.X = np.round(self.X).astype(int) # 将 X 取整并转换为整数类型
self.V = np.zeros((whale_num, self.dim))
self.gBest_score = 0 # 初始评分为0
self.gBest_X = self.X[0, :].copy() # 初始化为随机一个鲸鱼的位置
首先,先定义个类WhaleOptimizer,关于类的用法有很多文章,可以自行学习。
max_iter 是迭代次数,就是循环进行多少次
whale_num 是白鲸数,就是一次循环中有多少个初始种群,有多少个初始点。
dim 是变量/特征数,即最优函数中有多少个变量。
LB 是变量下界 , UB是变量上界(是持续的界限,而不是仅仅最开始的界限)
self.X = np.random.uniform(0, 1, (whale_num, self.dim)) * (self.UB - self.LB) + self.LB在进行初始点随机化
self.X = np.round(self.X).astype(int) :我打的竞赛需要整数,如果不需要整数限制,可删掉
self.gBest_score = 0 即可能的区间里最差的解,这里最差是0
def fitFunc(self, input):
edible_quantity = np.array(input)
protein = np.array(self.data['蛋白质g'])
cash = np.array(self.data['价格'])
# 将数据分成早、中、晚三组
breakfast_indices = range(33)
lunch_indices = range(33, 92)
dinner_indices = range(92, 141)
# 计算每组的得分
breakfast_score = self.calculate_score(edible_quantity, protein, breakfast_indices)
lunch_score = self.calculate_score(edible_quantity, protein, lunch_indices)
dinner_score = self.calculate_score(edible_quantity, protein, dinner_indices)
# 将得分相加得到总分
total_score = (breakfast_score + lunch_score + dinner_score)/3
save_economics = - sum(cash * edible_quantity)
final_score = total_score + save_economics
return final_score
这个地方是优化函数,最核心的就是你要得到一个final_score 评分函数,然后返回这个值final_score,默认是让final_score评分最大,如果需要最小就加个负号。
def calculate_score(self, edible_quantity, protein, indices):
amino_acid_1 = sum(np.array(self.data['异亮氨酸'])[indices] * edible_quantity[indices])
amino_acid_2 = sum(np.array(self.data['亮氨酸'])[indices] * edible_quantity[indices])
amino_acid_3 = sum(np.array(self.data['赖氨酸'])[indices] * edible_quantity[indices])
amino_acid_4 = sum(np.array(self.data['含硫氨基酸'])[indices] * edible_quantity[indices])
amino_acid_5 = sum(np.array(self.data['芳香族氨基酸'])[indices] * edible_quantity[indices])
amino_acid_6 = sum(np.array(self.data['苏氨酸'])[indices] * edible_quantity[indices])
amino_acid_7 = sum(np.array(self.data['色氨酸'])[indices] * edible_quantity[indices])
amino_acid_8 = sum(np.array(self.data['缬氨酸'])[indices] * edible_quantity[indices])
meal_protein = sum(edible_quantity[indices] * protein[indices])
if meal_protein == 0:
return -np.inf # 避免除零错误
scores = np.array([
amino_acid_1 / (meal_protein * 40 * 0.01),
amino_acid_2 / (meal_protein * 70 * 0.01),
amino_acid_3 / (meal_protein * 55 * 0.01),
amino_acid_4 / (meal_protein * 35 * 0.01),
amino_acid_5 / (meal_protein * 60 * 0.01),
amino_acid_6 / (meal_protein * 40 * 0.01),
amino_acid_7 / (meal_protein * 10 * 0.01),
amino_acid_8 / (meal_protein * 50 * 0.01)
])
score = np.min(scores)
return score
这个地方就是一个计算函数,让代码看起来更加清晰。
def apply_constraints(self,):
energy = np.array(self.data['能量'])
protein = np.array(self.data['蛋白质g'])
fat = np.array(self.data['脂肪g'])
carbohydrate = np.array(self.data['碳水化合物g'])
boy_nonproductive_nutrient = self.data.loc[:, '钙mg':'维生素Cmg'].values
standard_boy_nonproductive_nutrient = [800,12,12.5,800,1.4,1.4,100]
perfect_matrix = []
while len(perfect_matrix) < self.whale_num:
X = self.X
for i in range(self.whale_num):
num_active_vars = np.random.randint(9, 13)
active_indices = np.random.choice(self.dim, num_active_vars, replace=False)
inactive_indices = np.setdiff1d(np.arange(self.dim), active_indices)
X[i, inactive_indices] = 0
for i in range(self.whale_num):
total_energy = np.sum(X[i] * energy)
while total_energy > 2640:
non_zero_indices = np.where(X[i] > 0)[0]
if len(non_zero_indices) == 0:
break
idx = np.random.choice(non_zero_indices)
X[i, idx] = max(X[i, idx] - 1, 0)
total_energy = np.sum(X[i] * energy)
while total_energy <= 2160:
idx = np.random.choice(self.dim)
X[i, idx] = min(X[i, idx] + 1, self.UB[idx])
total_energy = np.sum(X[i] * energy)
protein_energy_ratio = np.sum(X[i] * protein * 4) / total_energy
fat_energy_ratio = np.sum(X[i] * fat * 9) / total_energy
carbohydrate_energy_ratio = np.sum(X[i] * carbohydrate * 4) / total_energy
if 0.10 <= protein_energy_ratio <= 0.15 and 0.20 <= fat_energy_ratio <= 0.30 and 0.50 <= carbohydrate_energy_ratio <= 0.65:
# print("protein_energy_ratio",protein_energy_ratio)
# print("fat_energy_ratio",fat_energy_ratio)
# print("carbohydrate_energy_ratio",carbohydrate_energy_ratio)
nutrient_totals = np.sum(X[:, :, np.newaxis] * boy_nonproductive_nutrient, axis=1)
deviation = nutrient_totals - standard_boy_nonproductive_nutrient
uniform_deviation = deviation/standard_boy_nonproductive_nutrient
# print("筛选前",uniform_deviation)
filtered_matrices = uniform_deviation[np.all(np.abs(uniform_deviation) < 0.5, axis=1)]
if filtered_matrices.size > 0:
energy_breakfast = (np.sum(X[:, :33] * energy[:33], axis=1))/total_energy
energy_lunch = (np.sum(X[:, 33:92] * energy[33:92], axis=1))/total_energy
energy_dinner = (np.sum(X[:, 92:] * energy[92:], axis=1))/total_energy
if (np.all((0.25 <= energy_breakfast[i]) & (energy_breakfast[i] <= 0.35)) and
np.all((0.30 <= energy_lunch[i]) & (energy_lunch[i] <= 0.40)) and
np.all((0.30 <= energy_dinner[i]) & (energy_dinner[i] <= 0.40))):
perfect_matrix.append(X[i])
print(len(perfect_matrix))
continue
else:
pass
else:
pass
else:
pass
return np.array(perfect_matrix)
这是约束条件函数,可以很清晰的看出,约束变量很多,足足有四个
因为白鲸模型的性质,比如设置是3个白鲸,那么初始随机化三个点[(1,5),(2,5),(9,1)],那么只有当这三个点都通过约束条件的检验才能到下一步优化函数部分,但在如此严格的约束条件下,想要所有白鲸同时满足4个约束条件,几乎不可能,因此很难不输出解。(如果约束条件较少是有可能会出解的,本质无非是迭代检验)
但面临这个问题,我采取“精英主义”思想,每次迭代的过程中,把符合条件的白鲸存储起来,比如200次迭代,只有一个白鲸通过检验,那么就把这个白鲸储存到perfect_matrix = []列表中,直到perfect_matrix 达到正常一次需要的白鲸数3,把这个perfect_matrix 列表中的三个白鲸作为一个整体再传输到下一步中。
通俗理解就是,一个班里30个人,现在要求得到一个精英班,要求每个人主科110+,小科85+。原来的白鲸优化算法是无限迭代,直到找到一个班的人全都符合条件,那么这个班就是精英班。
更改之后变成,把每个班符合条件的人单拎出来,放到精英班预备,直到精英班满员,那么就得到一个精英班。
这种方式可大大降低时间复杂度,有兴趣的朋友可以试一下两种方式的差距。
def apply_constraints(self,):
energy = np.array(self.data['能量'])
protein = np.array(self.data['蛋白质g'])
fat = np.array(self.data['脂肪g'])
carbohydrate = np.array(self.data['碳水化合物g'])
boy_nonproductive_nutrient = self.data.loc[:, '钙mg':'维生素Cmg'].values
standard_boy_nonproductive_nutrient = [800,12,12.5,800,1.4,1.4,100]
while len(perfect_matrix) < self.whale_num:
X = self.X
for i in range(self.whale_num):
num_active_vars = np.random.randint(9, 13)
active_indices = np.random.choice(self.dim, num_active_vars, replace=False)
inactive_indices = np.setdiff1d(np.arange(self.dim), active_indices)
X[i, inactive_indices] = 0
for i in range(self.whale_num):
total_energy = np.sum(X[i] * energy)
while total_energy > 2640:
non_zero_indices = np.where(X[i] > 0)[0]
if len(non_zero_indices) == 0:
break
idx = np.random.choice(non_zero_indices)
X[i, idx] = max(X[i, idx] - 1, 0)
total_energy = np.sum(X[i] * energy)
while total_energy <= 2160:
idx = np.random.choice(self.dim)
X[i, idx] = min(X[i, idx] + 1, self.UB[idx])
total_energy = np.sum(X[i] * energy)
protein_energy_ratio = np.sum(X[i] * protein * 4) / total_energy
fat_energy_ratio = np.sum(X[i] * fat * 9) / total_energy
carbohydrate_energy_ratio = np.sum(X[i] * carbohydrate * 4) / total_energy
if 0.10 <= protein_energy_ratio <= 0.15 and 0.20 <= fat_energy_ratio <= 0.30 and 0.50 <= carbohydrate_energy_ratio <= 0.65:
# print("protein_energy_ratio",protein_energy_ratio)
# print("fat_energy_ratio",fat_energy_ratio)
# print("carbohydrate_energy_ratio",carbohydrate_energy_ratio)
nutrient_totals = np.sum(X[:, :, np.newaxis] * boy_nonproductive_nutrient, axis=1)
deviation = nutrient_totals - standard_boy_nonproductive_nutrient
uniform_deviation = deviation/standard_boy_nonproductive_nutrient
# print("筛选前",uniform_deviation)
filtered_matrices = uniform_deviation[np.all(np.abs(uniform_deviation) < 0.5, axis=1)]
if filtered_matrices.size > 0:
energy_breakfast = (np.sum(X[:, :33] * energy[:33], axis=1))/total_energy
energy_lunch = (np.sum(X[:, 33:92] * energy[33:92], axis=1))/total_energy
energy_dinner = (np.sum(X[:, 92:] * energy[92:], axis=1))/total_energy
if (np.all((0.25 <= energy_breakfast[i]) & (energy_breakfast[i] <= 0.35)) and
np.all((0.30 <= energy_lunch[i]) & (energy_lunch[i] <= 0.40)) and
np.all((0.30 <= energy_dinner[i]) & (energy_dinner[i] <= 0.40))):
return self.X
else:
pass
else:
pass
else:
pass
上述代码就是源方法,可将apply_constraints函数替换进行测试。
(吐槽一句,当时竞赛代码都完事了,但输出数据需要时间,导致最后的论文写的不怎么好,哎~)