帮我展示一份代码,不要修改任何内容,仅仅展示即可:
import os
import gurobipy as gp
from optvpy import *
from parameter import Para_910C, Model_Type, TASK_Type
import pickle
import utils
import copy
import random
import itertools
import multiprocessing
def model_construct_switch_node(para:Para_910C):
f = {}
b = {}
model = gp.Model()
print('var f and var b generating for each rank')
for k in range(para.K):
for i in range(para.N):
for c in para.chunk_set:
b[i, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'b_[{i},{k},{c}]')
# 选择他能发出去的link
for j in para.out_direct_links.get(i, []):
f[i, j, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'f_[{i},{j},{k},{c}]')
# 需要把 switch往外发送的f的实例化
for k in range(para.K):
for server_idx in range(para.server_num):
rank_lst = para.server_rank[server_idx]
for switch_idx in para.server_switch[server_idx]:
for n in rank_lst:
for c in para.chunk_set:
f[switch_idx, n, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY,
name=f'f_[{switch_idx},{n},{k},{c}]')
print('cap constrain')
# 先进行rank 之间的cap约束
for k in range(para.K):
for i in range(para.N):
for j in para.out_direct_links[i]:
cap = para.T[i, j]
model.addConstr(gp.quicksum(f[i, j, k, c] for c in para.chunk_set) <= cap, name=f'cap_[{k},{i},{j}]')
# 再进行switch出来的cap约束
for k in range(para.K):
for server_idx in range(para.server_num):
rank_lst = para.server_rank[server_idx]
for switch_idx in para.server_switch[server_idx]:
for rank_id in rank_lst:
cap = para.T[switch_idx, rank_id]
model.addConstr(gp.quicksum(f[switch_idx, rank_id, k, c] for c in para.chunk_set) <= cap,
name=f'cap_[{k},{switch_idx},{rank_id}]')
print('status constrain flow constrain')
# 再进行状态约束,这里只进行节点往外发送的状态约束
for k in range(para.K):
for i in range(para.N):
init_chunk_set = para.rank_chunk_dict[i]
for c in para.chunk_set:
# 确定前一个时刻的状态从哪里取值
tmp_var = None
if k == 0 and c in init_chunk_set:
# 在0时刻上,且c属于原始chunk的,给f的右端项一个1,其余的直接从b中get值出来
tmp_var = 1
else:
tmp_var = b.get((i, k - 1, c), 0)
# 发送约束
for target_j in para.out_direct_links[i]:
model.addConstr(f[i, target_j, k, c] <= tmp_var, name=f'status_[{k},{i},{target_j},{c}]')
# 状态平衡
in_expr = gp.quicksum(
f.get((source_j, i, k - para.delta[source_j, i], c), 0) for source_j in para.in_direct_links[i])
model.addConstr(b[i, k, c] == tmp_var + in_expr, name=f'flow_[{k},{i},{c}]')
# switch cons
# todo 可以考虑加这样一个约束,对于c来说,当rank1能不能接收switch接收c取决于rank2-7有没有c
for server_idx in range(para.server_num):
switch_lst = para.server_switch[server_idx]
rank_lst = para.server_rank[server_idx]
for k in range(para.K):
for c in para.chunk_set:
for switch_idx in switch_lst:
send_expr = gp.quicksum([f.get((rank_id, switch_idx, k, c), 0) for rank_id in rank_lst])
recv_expr = gp.quicksum([f.get((switch_idx, rank_id, k, c), 0) for rank_id in rank_lst])
model.addConstr(send_expr == recv_expr, name = f'switch_[{switch_idx},{k},{c}]')
for rank_lst_idx in range(0, len(rank_lst), 2):
rank_0 = rank_lst[rank_lst_idx]
rank_1 = rank_lst[rank_lst_idx + 1]
other_rank_lst = [rank_id for rank_id in rank_lst if rank_id != rank_0 and rank_id!= rank_1]
# 对于rank1来说,其是否能从每一个switch中接收c都取决于 other rank有没有发送c
for switch_idx in switch_lst:
other_rank_send_expr = gp.quicksum([f.get((other_rank, switch_idx, k, c), 0) for other_rank in other_rank_lst])
model.addConstr(f[switch_idx, rank_0, k, c] <= other_rank_send_expr, name =f'control_[{k},{c},{switch_idx},{rank_0}]')
model.addConstr(f[switch_idx, rank_1, k, c] <= other_rank_send_expr, name =f'control_[{k},{c},{switch_idx},{rank_1}]')
# objective
# all2all 的 obj设计上应该考虑 将chunk数量均分,然后给每一个chunk分配好对应的位置
expr = gp.LinExpr()
for rank_id, cur_chunk_lst in para.final_obj_dict.items():
for c in cur_chunk_lst:
for k in range(para.K):
for source_j in para.in_direct_links[rank_id]:
if k + para.delta[source_j, rank_id] >= para.K:
continue
expr += (para.K - k + 1) * f[source_j, rank_id, k, c]
model.setObjective(expr, gp.GRB.MAXIMIZE)
model.optimize()
# model.write('tmp.lp')
# env = OPTVEnv()
# model_optv = OPTVModel(env)
# model_optv.Read('tmp.lp')
# model_optv.ComputeIIS()
f_res = {key: val.X for key, val in f.items() if val.X}
return f_res, model.Runtime
def model_construct_hyper_edge_no_sio_switch_linear(para:Para_910C, K = None, initial_rank_dict = None, final_obj_dict = None):
# 对于rolling horizon场景下适用
# initial rank chunk && final obj dict
print("linear")
if not initial_rank_dict:
initial_rank_dict = copy.deepcopy(para.rank_chunk_dict)
if not final_obj_dict:
final_obj_dict = copy.deepcopy(para.final_obj_dict)
if not K:
K = para.K
f = {}
b = {}
model = gp.Model()
print('var f and var b generating for each rank')
for k in range(K):
for i in para.all_rank_set: # para.all_rank_set={0,3,16,19}
for c in para.chunk_set: # {0, 1, 2, 3, 12, 13, 14, 15, 64, 65, 66, 67, 76, 77, 78, 79}
b[i, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'b_({i},{k},{c})')
# 选择他能发出去的link
for j in para.out_direct_links.get(i, []): # para.out_direct_links={0: [16, 3, 19], 3: [0, 16, 19], 16: [0, 3, 19], 19: [0, 16, 3]}。也就是rank需要发送到的rank
f[i, j, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'f_({i},{j},{k},{c})')
print('hyper edge band upper bound')
# 对于每一个rank来说,其给switch发出的大小受总量的限制
for k in range(K):
for i in para.all_rank_set:
# hccs 发送固有上限
hyper_links = [hyper_link for hyper_link in para.out_direct_links[i] if hyper_link != para.pair_dict.get(i)] ##!!pair_dict={},是空的,或许是遗留问题?out_direct_links里本来就没有直连的
model.addConstr(gp.quicksum(f[i, j, k, c] for c in para.chunk_set for j in hyper_links) <= para.hyper_bound, name=f'f_jc_hccs_({i},{k})') # hccscap约束
# sio 发送固有上限
# 可能存在没有sio链路的情况 ## 确实是,目前没有sio链路,都是hyperedge发送。
nei_rank = para.pair_dict.get(i)
if nei_rank:
model.addConstr(gp.quicksum(f[i, nei_rank, k, c] for c in para.chunk_set) <= para.T[i, nei_rank], name=f'f_jc_sio_({i},{nei_rank},{k})') # 目前T里面都是0
print('status constrain flow constrain')
# 再进行状态约束,这里只进行节点往外发送的状态约束
for k in range(K):
for i in para.all_rank_set:
init_chunk_set = initial_rank_dict[i] # 这个是i rank自己的chunk
for c in para.chunk_set: # 总共需要发送or接受的chunk,也就是所有会使用的到的chunk
# 确定前一个时刻的状态从哪里取值
tmp_var = None
if k == 0 and c in init_chunk_set:
# 在0时刻上,且c属于原始chunk的,给f的右端项一个1,其余的直接从b中get值出来
tmp_var = 1 # 代表初始的b,也就是可以发送
else:
tmp_var = b.get((i, k - 1, c), 0) # 这里的get 0,有点危险。k-1时刻有,k时刻才能发送
# 发送约束
### 对于一个i rank,他的j rank从para.out_direct_links[i]取就好了。
for target_j in para.out_direct_links[i]:
model.addConstr(f[i, target_j, k, c] <= tmp_var, name=f'status_({i},{k},{target_j},{c})')
# 状态平衡
### 我有一个疑问,这里没有过滤c,也就是说source的rank可能给i发送他自己的c?虽然是这样子不会出错。
in_expr = gp.quicksum(
f.get((source_j, i, k - para.delta[source_j, i], c), 0) for source_j in para.in_direct_links[i])
model.addConstr(b[i, k, c] == tmp_var + in_expr, name=f'flow_({i},{k},{c})')
# 给每一个link上都构造一个flag变量y
# todo 给接收端也加上一个上限 新增一个recv flag, 由f in 决定
flag = {}
flag_in = {}
cap_var = {}
z_var = {}
o_var = {}
bigM = 1000
# 不需要
for k in range(K):
for i in para.all_rank_set:
cap_var[k,i] = model.addVar(lb = 0, ub = para.hyper_bound, vtype = gp.GRB.CONTINUOUS, name=f'cap_({k},{i})')
hyper_links = [hyper_link for hyper_link in para.out_direct_links[i] if hyper_link != para.pair_dict.get(i)]
hyper_in_links = [hyper_link for hyper_link in para.in_direct_links[i] if hyper_link != para.pair_dict.get(i)]
sio_link = para.pair_dict.get(i)
# rank出口的hccs约束 flag变量表示每一个hyper link是否有发送
for hyper_link in hyper_links:
flag[k, i, hyper_link] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'flag_({k},{i},{hyper_link})') # y变量。
sum_expr = gp.quicksum(f[i,hyper_link, k, c] for c in para.chunk_set)
# y <= sum, sum <= y*M
model.addConstr(flag[k,i, hyper_link] <= sum_expr, name=f'y_l_out_({k},{i},{hyper_link})')
model.addConstr(sum_expr <= 100 * flag[k, i, hyper_link], name=f'y_r_out_({k},{i},{hyper_link})')
# rank入口的hccs约束 flag_in表示每一个hyper link是否有接收
for hyper_in_link in hyper_in_links:
flag_in[k, i, hyper_in_link] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'flag_in_({k},{i},{hyper_in_link})')
sum_in_expr = gp.quicksum(f[hyper_in_link, i, k - para.delta[hyper_in_link, i], c] for c in para.chunk_set)
model.addConstr(flag_in[k, i, hyper_in_link] <= sum_in_expr, name=f'y_l_in_({k},{i},{hyper_in_link})')
model.addConstr(sum_in_expr <= 100 * flag_in[k, i, hyper_in_link], name=f'y_r_in_({k},{i},{hyper_in_link})')
### 上面其实做了两件事:1.k时刻,i给j发送的chunk数不超过;2.k时刻,到达i的chunk数不超过。
### 新的cap计算
# cap两侧的约束
for hyper_link in hyper_links:
o_var[k,i,hyper_link] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'o_({k},{i},{hyper_link})')
sum_f_expr = gp.quicksum(f[i,hyper_link,k,c] for c in para.chunk_set)
model.addConstr(sum_f_expr <= cap_var[k,i], name=f'cap1_({i},{hyper_link},{k})')
model.addConstr(cap_var[k, i] <= sum_f_expr + (1 - o_var[k,i,hyper_link])*bigM, name=f'cap2_({i},{hyper_link},{k})')
# o_var的约束
sum_o_expr = gp.quicksum(o_var[k,i,hyper_link] for hyper_link in hyper_links)
model.addConstr(1 <= sum_o_expr, name=f'o_({i},{k})')
# non-linear cons: hccs 并发降速
sum_flag_expr = gp.quicksum(flag[k, i, hyper_link] for hyper_link in hyper_links)
# model.addConstr(sum_flag_expr * cap_var[k,i] <= para.hyper_bound) # todo:不需要了!!!!!!!!!!
# 总数上限
# 6/7 switch num 出口总数限制
model.addConstr(sum_flag_expr <= 7, name=f'up_out_({i},{k})')
# 入口总数限制
model.addConstr(gp.quicksum(flag_in[k,i, hyper_in_link] for hyper_in_link in hyper_in_links) <= 7, name=f'up_in_({i},{k})')
# 每一个link上的带宽上限取决于并发降速的带宽 todo:这是什么约束?抽象模型里没见到
for hyper_link in hyper_links:
c_sum_expr = gp.quicksum(f[i, hyper_link, k, c] for c in para.chunk_set)
model.addConstr(c_sum_expr <= cap_var[k,i], name=f'hccsup_({i},{hyper_link},{k})')
if sio_link is not None:
# 针对sio的带宽进行一次限制
# sum < z * 7 7:switch num upper bound
z_var[k, i] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'z_({k},{i})')
model.addConstr(sum_flag_expr <= z_var[k,i] * 7, name=f'z_({i},{k})') # z是一个指示变量,表示是否有hccs发送
# sum_f 小于 siocap
rank_to_switch_expr = gp.quicksum(f[i, sio_link, k, c] for c in para.chunk_set)
model.addConstr(rank_to_switch_expr <= para.T[i, sio_link], name=f'sioup1_({i},{sio_link},{k})')
# sum_f 小于 cap+(1-z)M
model.addConstr(rank_to_switch_expr <= cap_var[k, i] + (1-z_var[k,i])*bigM, name=f'sioup2_({i},{sio_link},{k})')
# sio_cap = sio_cap - z(sio_cap - cap)
# rank_to_switch_expr = gp.quicksum(f[i, sio_link, k, c] for c in para.chunk_set)
# model.addConstr(rank_to_switch_expr <= para.T[i, sio_link] - z_var[k,i]*(para.T[i, sio_link] - cap_var[k,i]))
# objective
# all2all 的 obj设计上应该考虑 将chunk数量均分,然后给每一个chunk分配好对应的位置
expr = gp.LinExpr()
for rank_id, cur_chunk_lst in final_obj_dict.items():
for c in cur_chunk_lst:
for k in range(K):
for source_j in para.in_direct_links[rank_id]:
if k + para.delta[source_j, rank_id] >= K:
continue
expr += (K - k + 1) * f[source_j, rank_id, k, c]
model.setObjective(expr, gp.GRB.MAXIMIZE)
"""
# model.Params.MIPGap = 0.05
model.optimize()
# model.write('tmp.lp')
# env = OPTVEnv()
# model_optv = OPTVModel(env)
# model_optv.Read('tmp.lp')
# model_optv.ComputeIIS()
f_res = {key: val.X for key, val in f.items() if val.X}
cap_res = {key: val.X for key, val in cap_var.items() if val.X}
flag_res = {key: val.X for key, val in flag.items() if val.X}
z_res = {key: val.X for key, val in z_var.items() if val.X}
"""
# return f_res, model.Runtime
return model
def model_construct_hyper_edge_no_sio_switch(para:Para_910C, K = None, initial_rank_dict = None, final_obj_dict = None):
# 对于rolling horizon场景下适用
# initial rank chunk && final obj dict
if not initial_rank_dict:
initial_rank_dict = copy.deepcopy(para.rank_chunk_dict)
if not final_obj_dict:
final_obj_dict = copy.deepcopy(para.final_obj_dict)
if not K:
K = para.K
f = {}
b = {}
model = gp.Model()
print('var f and var b generating for each rank')
for k in range(K):
for i in para.all_rank_set: # para.all_rank_set={0,3,16,19}
for c in para.chunk_set: # {0, 1, 2, 3, 12, 13, 14, 15, 64, 65, 66, 67, 76, 77, 78, 79}
b[i, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'b_({i},{k},{c})')
# 选择他能发出去的link
for j in para.out_direct_links.get(i, []): # para.out_direct_links={0: [16, 3, 19], 3: [0, 16, 19], 16: [0, 3, 19], 19: [0, 16, 3]}。也就是rank需要发送到的rank
f[i, j, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'f_({i},{j},{k},{c})')
print('hyper edge band upper bound')
# 对于每一个rank来说,其给switch发出的大小受总量的限制
for k in range(K):
for i in para.all_rank_set:
# hccs 发送固有上限
hyper_links = [hyper_link for hyper_link in para.out_direct_links[i] if hyper_link != para.pair_dict.get(i)] ##!!pair_dict={},是空的,或许是遗留问题?out_direct_links里本来就没有直连的
model.addConstr(gp.quicksum(f[i, j, k, c] for c in para.chunk_set for j in hyper_links) <= para.hyper_bound) # hccscap约束
# sio 发送固有上限
# 可能存在没有sio链路的情况 ## 确实是,目前没有sio链路,都是hyperedge发送。
nei_rank = para.pair_dict.get(i)
if nei_rank:
model.addConstr(gp.quicksum(f[i, nei_rank, k, c] for c in para.chunk_set) <= para.T[i, nei_rank]) # 目前T里面都是0
print('status constrain flow constrain')
# 再进行状态约束,这里只进行节点往外发送的状态约束
for k in range(K):
for i in para.all_rank_set:
init_chunk_set = initial_rank_dict[i]
for c in para.chunk_set:
# 确定前一个时刻的状态从哪里取值
tmp_var = None
if k == 0 and c in init_chunk_set:
# 在0时刻上,且c属于原始chunk的,给f的右端项一个1,其余的直接从b中get值出来
tmp_var = 1 # 代表初始的b,也就是可以发送
else:
tmp_var = b.get((i, k - 1, c), 0) # 这里的get 0,有点危险。k-1时刻有,k时刻才能发送
# 发送约束
for target_j in para.out_direct_links[i]:
model.addConstr(f[i, target_j, k, c] <= tmp_var, name=f'status_({k},{i},{target_j},{c})')
# 状态平衡
### 我有一个疑问,这里没有过滤c,也就是说source的rank可能给i发送他自己的c?虽然是这样子不会出错。
in_expr = gp.quicksum(
f.get((source_j, i, k - para.delta[source_j, i], c), 0) for source_j in para.in_direct_links[i])
model.addConstr(b[i, k, c] == tmp_var + in_expr, name=f'flow_({k},{i},{c})')
# 给每一个link上都构造一个flag变量y
# todo 给接收端也加上一个上限 新增一个recv flag, 由f in 决定
flag = {}
flag_in = {}
cap_var = {}
z_var = {}
for k in range(K):
for i in para.all_rank_set:
cap_var[k,i] = model.addVar(lb = 0, ub = para.hyper_bound, vtype = gp.GRB.CONTINUOUS)
hyper_links = [hyper_link for hyper_link in para.out_direct_links[i] if hyper_link != para.pair_dict.get(i)]
hyper_in_links = [hyper_link for hyper_link in para.in_direct_links[i] if hyper_link != para.pair_dict.get(i)]
sio_link = para.pair_dict.get(i)
# rank出口的hccs约束 flag变量表示每一个hyper link是否有发送
for hyper_link in hyper_links:
flag[k, i, hyper_link] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'flag_({k},{i},{hyper_link})') # y变量。
sum_expr = gp.quicksum(f[i,hyper_link, k, c] for c in para.chunk_set)
# y <= sum, sum <= y*M
model.addConstr(flag[k,i, hyper_link] <= sum_expr)
model.addConstr(sum_expr <= 100 * flag[k, i, hyper_link])
# rank入口的hccs约束 flag_in表示每一个hyper link是否有接收
for hyper_in_link in hyper_in_links:
flag_in[k, i, hyper_in_link] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'flag_in_({k},{i},{hyper_in_link})')
sum_in_expr = gp.quicksum(f[hyper_in_link, i, k - para.delta[hyper_in_link, i], c] for c in para.chunk_set)
model.addConstr(flag_in[k, i, hyper_in_link] <= sum_in_expr)
model.addConstr(sum_in_expr <= 100 * flag_in[k, i, hyper_in_link])
# non-linear cons: hccs 并发降速
sum_flag_expr = gp.quicksum(flag[k, i, hyper_link] for hyper_link in hyper_links)
model.addConstr(sum_flag_expr * cap_var[k,i] <= para.hyper_bound)
# 总数上限
# 6/7 switch num 出口总数限制
model.addConstr(sum_flag_expr <= 7)
# 入口总数限制
model.addConstr(gp.quicksum(flag_in[k,i, hyper_in_link] for hyper_in_link in hyper_in_links) <= 7)
# 每一个link上的带宽上限取决于并发降速的带宽
for hyper_link in hyper_links:
c_sum_expr = gp.quicksum(f[i, hyper_link, k, c] for c in para.chunk_set)
model.addConstr(c_sum_expr <= cap_var[k,i])
if sio_link is not None:
# 针对sio的带宽进行一次限制
# sum < z * 7 7:switch num upper bound
z_var[k, i] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'z_({k},{i})')
model.addConstr(sum_flag_expr <= z_var[k,i] * 7) # z是一个指示变量,表示是否有hccs发送
# sio_cap = sio_cap - z(sio_cap - cap)
rank_to_switch_expr = gp.quicksum(f[i, sio_link, k, c] for c in para.chunk_set)
model.addConstr(rank_to_switch_expr <= para.T[i, sio_link] - z_var[k,i]*(para.T[i, sio_link] - cap_var[k,i]))
# objective
# all2all 的 obj设计上应该考虑 将chunk数量均分,然后给每一个chunk分配好对应的位置
expr = gp.LinExpr()
for rank_id, cur_chunk_lst in final_obj_dict.items():
for c in cur_chunk_lst:
for k in range(K):
for source_j in para.in_direct_links[rank_id]:
if k + para.delta[source_j, rank_id] >= K:
continue
expr += (K - k + 1) * f[source_j, rank_id, k, c]
model.setObjective(expr, gp.GRB.MAXIMIZE)
"""
# model.Params.MIPGap = 0.05
model.optimize()
# model.write('tmp.lp')
# env = OPTVEnv()
# model_optv = OPTVModel(env)
# model_optv.Read('tmp.lp')
# model_optv.ComputeIIS()
f_res = {key: val.X for key, val in f.items() if val.X}
cap_res = {key: val.X for key, val in cap_var.items() if val.X}
flag_res = {key: val.X for key, val in flag.items() if val.X}
z_res = {key: val.X for key, val in z_var.items() if val.X}
return f_res, model.Runtime
"""
return model
def model_construct_hyper_edge(para:Para_910C):
f = {}
b = {}
model = gp.Model()
print('var f and var b generating for each rank')
for k in range(para.K):
for i in range(para.N):
for c in para.chunk_set:
b[i, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'b_[{i},{k},{c}]')
# 选择他能发出去的link
for j in para.out_direct_links.get(i, []):
f[i, j, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'f_[{i},{j},{k},{c}]')
print('virtual switch node gene')
for k in range(para.K):
for c in para.chunk_set:
for switch_idx in para.all_switch_set:
for out_rank in para.switch_out_links.get(switch_idx, []):
f[switch_idx, out_rank, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'f_[{switch_idx},{out_rank},{k},{c}]')
print('rdma switch')
if para.rdma_switch:
# 如果此时是多机环境,则需要对其rdma switch进行建模
# 此时先实例化送rdma switch的发送
for k in range(para.K):
for c in para.chunk_set:
for rank_id in range(para.N):
f[para.rdma_switch, rank_id, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'f_[{para.rdma_switch},{rank_id},{k},{c}]')
# print('cap constrain')
# 先进行rank 之间的cap约束
# 分为三个部分:rank, sio-switch, rdma-switch 的out link
# for k in range(para.K):
# # rank
# # 考虑总约束的情况下,这里可以注释
# for i in range(para.N):
# for j in para.out_direct_links[i]:
#
# cap = para.T[i, j]
# model.addConstr(gp.quicksum(f[i, j, k, c] for c in para.chunk_set) <= cap, name=f'cap_[{k},{i},{j}]')
#
# # sio switch
# # 这里要看一下 可以进行注释
# for switch_idx in para.all_switch_set:
# out_links = para.switch_out_links[switch_idx]
# for j in range(out_links):
# cap = para.T[switch_idx, j]
# model.addConstr(gp.quicksum(f[switch_idx, j, k, c] for c in para.chunk_set) <= cap, name=f'cap_[{k},{switch_idx},{j}]')
#
# # rdma switch
# if para.rdma_switch:
# out_links = list(range(para.N))
# for j in out_links:
# cap = para.T[para.rdma_switch, j]
# model.addConstr(gp.quicksum(f[para.rdma_switch, j, k, c] for c in para.chunk_set) <= cap,
# name=f'cap_[{k},{para.rdma_switch},{j}]')
print('hyper edge band upper bound')
# 对于每一个rank开说,其给switch发出的大小受总量的限制
for k in range(para.K):
for i in range(para.N):
# 此处使用 int类型表示其是否是一个 hyper link
hyper_links = [hyper_link for hyper_link in para.out_direct_links[i] if type(hyper_link) == int]
model.addConstr(gp.quicksum(f[i, j, k, c] for c in para.chunk_set for j in hyper_links) <= para.hyper_bound)
print('status constrain flow constrain')
# 再进行状态约束,这里只进行节点往外发送的状态约束
for k in range(para.K):
for i in range(para.N):
init_chunk_set = para.rank_chunk_dict[i]
for c in para.chunk_set:
# 确定前一个时刻的状态从哪里取值
tmp_var = None
if k == 0 and c in init_chunk_set:
# 在0时刻上,且c属于原始chunk的,给f的右端项一个1,其余的直接从b中get值出来
tmp_var = 1
else:
tmp_var = b.get((i, k - 1, c), 0)
# 发送约束
for target_j in para.out_direct_links[i]:
model.addConstr(f[i, target_j, k, c] <= tmp_var, name=f'status_[{k},{i},{target_j},{c}]')
# 状态平衡
in_expr = gp.quicksum(
f.get((source_j, i, k - para.delta[source_j, i], c), 0) for source_j in para.in_direct_links[i])
model.addConstr(b[i, k, c] == tmp_var + in_expr, name=f'flow_[{k},{i},{c}]')
print('switch balance')
# 在每一个switch上保持进出一致:k,c
# 主要是给sio 虚拟switch的约束
for switch_idx in para.all_switch_set:
out_links = para.switch_out_links[switch_idx]
in_links = para.switch_in_links[switch_idx]
for k in range(para.K):
for c in para.chunk_set:
in_expr = gp.quicksum(f[i, switch_idx, k, c] for i in in_links)
out_expr = gp.quicksum(f[switch_idx, j, k, c] for j in out_links)
model.addConstr(in_expr == out_expr, name = 'balance')
# # todo non-linear
# # 给每一个link上都构造一个flag变量y
# flag = {}
# cap_var = {}
# z_var = {}
# for k in range(para.K):
# for i in range(para.N):
# cap_var[k,i] = model.addVar(lb = 0, ub = para.hyper_bound, vtype = gp.GRB.CONTINUOUS)
# z_var[k,i] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'z_[{k},{i}]')
# hyper_links = [hyper_link for hyper_link in para.out_direct_links[i] if type(hyper_link) == int]
# sio_link = [hyper_link for hyper_link in para.out_direct_links[i] if type(hyper_link) != int][0]
#
# for hyper_link in hyper_links:
# flag[k, i, hyper_link] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'flag_[{k},{i},{hyper_link}]')
#
# sum_expr = gp.quicksum(f[i,hyper_link, k, c] for c in para.chunk_set)
#
# # y <= sum, sum <= y*M
# model.addConstr(flag[k,i, hyper_link] <= sum_expr)
# model.addConstr(sum_expr <= 100 * flag[k, i, hyper_link])
#
# sum_flag_expr = gp.quicksum(flag[k,i, hyper_link] for hyper_link in hyper_links)
# # non-linear cons
# model.addConstr(sum_flag_expr * cap_var[k,i] <= para.hyper_bound)
#
# # 总数上限
# model.addConstr(sum_flag_expr <= 7)
#
# # 每一个link上的带宽
# for hyper_link in hyper_links:
# c_sum_expr = gp.quicksum(f[i, hyper_link, k, c] for c in para.chunk_set)
# model.addConstr(c_sum_expr <= cap_var[k,i])
#
# # 针对sio的带宽进行一次限制
# switch_idx = para.rank_to_switch[rank_id]
# model.addConstr(sum_flag_expr <= z_var[k,i] * (para.N + 1) )
#
# rank_to_switch_expr = gp.quicksum(f[rank_id, switch_idx, k, c] for c in para.chunk_set)
# model.addConstr(rank_to_switch_expr <= para.T[rank_id, switch_idx] - z_var[k,i](para.T[rank_id, switch_idx] - cap_var[k,i]))
# 将rnk0 - rank1的情况ban掉
for switch_idx in para.all_switch_set:
rank0, rank1 = para.switch_out_links[switch_idx]
for k in range(para.K):
for c in para.chunk_set:
model.addConstr(f[rank0, rank1, k, c] == 0)
model.addConstr(f[rank1, rank0, k, c] == 0)
# objective
# all2all 的 obj设计上应该考虑 将chunk数量均分,然后给每一个chunk分配好对应的位置
expr = gp.LinExpr()
for rank_id, cur_chunk_lst in para.final_obj_dict.items():
for c in cur_chunk_lst:
for k in range(para.K):
for source_j in para.in_direct_links[rank_id]:
if k + para.delta[source_j, rank_id] >= para.K:
continue
expr += (para.K - k + 1) * f[source_j, rank_id, k, c]
model.setObjective(expr, gp.GRB.MAXIMIZE)
model.optimize()
# model.write('tmp.lp')
# env = OPTVEnv()
# model_optv = OPTVModel(env)
# model_optv.Read('tmp.lp')
# model_optv.ComputeIIS()
f_res = {key: val.X for key, val in f.items() if val.X}
# cap_res = {key: val.X for key, val in cap.items() if val.X}
# flag_res = {key: val.X for key, val in flag.items() if val.X}
with open('ttt.pkl', 'wb') as f:
pickle.dump((f_res, cap_res, flag_res), f)
exit()
return f_res, model.Runtime
def model_construct(para:Para_910C):
# 使用switch balance的模型
f = {}
b = {}
model = gp.Model()
print('var f and var b generating for each rank')
for k in range(para.K):
for i in range(para.N):
for c in para.chunk_set:
b[i, k, c] = model.addVar(lb=0, ub=1, vtype=gp.GRB.BINARY, name=f'b_[{i},{k},{c}]')
# 选择他能发出去的link
for j in para.out_direct_links.get(i, []):
f[i,j,k,c] = model.addVar(lb = 0, ub = 1, vtype = gp.GRB.BINARY, name = f'f_[{i},{j},{k},{c}]')
# 需要把 switch往外发送的f的实例化
for k in range(para.K):
for server_idx in range(para.server_num):
rank_lst = para.server_rank[server_idx]
for switch_idx in para.server_switch[server_idx]:
for n in rank_lst:
for c in para.chunk_set:
f[switch_idx, n, k, c] = model.addVar(lb = 0, ub = 1, vtype = gp.GRB.BINARY, name =f'f_[{switch_idx},{n},{k},{c}]')
print('cap constrain')
# 先进行rank 之间的cap约束
for k in range(para.K):
for i in range(para.N):
for j in para.out_direct_links[i]:
cap = para.T[i,j]
model.addConstr(gp.quicksum(f[i,j,k,c] for c in para.chunk_set) <= cap, name = f'cap_[{k},{i},{j}]')
# 再进行switch出来的cap约束
for k in range(para.K):
for server_idx in range(para.server_num):
rank_lst = para.server_rank[server_idx]
for switch_idx in para.server_switch[server_idx]:
for rank_id in rank_lst:
cap = para.T[switch_idx, rank_id]
model.addConstr(gp.quicksum(f[switch_idx, rank_id, k, c] for c in para.chunk_set) <= cap, name=f'cap_[{k},{switch_idx},{rank_id}]')
print('status constrain flow constrain')
# 再进行状态约束,这里只进行节点往外发送的状态约束
for k in range(para.K):
for i in range(para.N):
init_chunk_set = para.rank_chunk_dict[i]
for c in para.chunk_set:
# 确定前一个时刻的状态从哪里取值
tmp_var = None
if k == 0 and c in init_chunk_set:
# 在0时刻上,且c属于原始chunk的,给f的右端项一个1,其余的直接从b中get值出来
tmp_var = 1
else:
tmp_var = b.get((i, k - 1, c), 0)
# 发送约束
for target_j in para.out_direct_links[i]:
model.addConstr(f[i,target_j,k,c] <= tmp_var, name = f'status_[{k},{i},{target_j},{c}]')
# 状态平衡
in_expr = gp.quicksum(f.get((source_j, i, k-para.delta[source_j, i], c), 0) for source_j in para.in_direct_links[i])
model.addConstr(b[i, k, c] == tmp_var + in_expr, name = f'flow_[{k},{i},{c}]')
# switch cons
for server_idx in range(para.server_num):
switch_lst = para.server_switch[server_idx]
rank_lst = para.server_rank[server_idx]
for switch_idx in switch_lst:
for rank_id in rank_lst:
other_rank_lst = [other_rank for other_rank in rank_lst if other_rank != rank_id]
for k in range(para.K):
for c in para.chunk_set:
recv_expr = gp.quicksum([f.get((switch_idx, other_rank, k, c), 0) for other_rank in other_rank_lst])
model.addConstr(f[rank_id, switch_idx, k, c] == recv_expr, name = f'switch_[{switch_idx},{rank_id},{k},{c}]')
# model.addConstr(f[0, 's0', 0, 0] == 1, name = 'tmp')
# model.addConstr(f['s0',3, 0, 0] == 1, name='tmp')
# todo 以上述为例,s0 给 3发出东西后,按道理 其也应该等于 除了1 之外的其他rank给s0发送一个chunk 0才能维持所有的switch约束保持正确 因此有bug
# objective
expr = gp.LinExpr()
for rank_id in range(para.N):
initial_chunk = para.rank_chunk_dict[rank_id]
for k in range(para.K):
for j in range(para.N):
if rank_id == j:
continue
for source_j in para.in_direct_links[j]:
if k + para.delta[source_j, j] >= para.K:
continue
for c in initial_chunk:
expr += (para.K - k + 1) * f[source_j, j, k, c]
model.setObjective(expr, gp.GRB.MAXIMIZE)
model.optimize()
# model.write('tmp.lp')
# env = OPTVEnv()
# model_optv = OPTVModel(env)
# model_optv.Read('tmp.lp')
# model_optv.ComputeIIS()
f_res = {key:val.X for key, val in f.items() if val.X}
return f_res
def rolling_horizon_solve(para:Para_910C, iter_K):
# todo 形式上只要将原模型中的已经到达最终位置的 计入到每一个rank的initial chunk就可以了?
# todo 要不要修改一下obj,可以修改一下!
final_obj_dict = copy.deepcopy(para.final_obj_dict)
initial_rank_chunks = copy.deepcopy(para.rank_chunk_dict)
rolling_iter = 0
f_final_res = {}
final_solving_time = 0
while final_obj_dict:
f_res, solving_time = model_construct_hyper_edge_no_sio_switch(para, iter_K, initial_rank_chunks, final_obj_dict)
f_res_filter = utils.res_filter(f_res, initial_rank_chunks, final_obj_dict)
initial_rank_chunks, final_obj_dict = utils.iter_update(f_res_filter, initial_rank_chunks, final_obj_dict)
for i,j,k,c in f_res.keys():
f_final_res[i,j,k + rolling_iter * iter_K, c] = 1
final_solving_time += solving_time
rolling_iter += 1
print(f_final_res)
return f_final_res, final_solving_time
def script():
with open('tmp.pkl', 'rb') as f:
data = pickle.load(f)
res = data['res']
para = data['para']
new_res = utils.switch_prepare(res)
new_res = utils.res_filter(new_res, para.rank_chunk_dict, para.final_obj_dict)
if para.task_mode == TASK_Type.REDUCE_SCATTER:
# 针对reduce scatter进行进一步的 结果改进!
# todo 这里有点问题,从0
new_res = utils.reduce_scatter_inverse(new_res, para.rank_chunk_dict, para.final_obj_dict)
tmp = list(new_res.keys())
tmp.sort(key = lambda x:x)
for i,j,k,c in tmp:
# 算一下c是哪一个chunk上来的
source = None
for rank in range(para.N):
chunks = para.rank_chunk_dict.get(rank, [])
if c in chunks:
source = rank
break
print(i,j,k,c,"----", source)
if source != i:
print('attention')
ki_dict = {}
for (i, j, k, c) in tmp:
ki_dict[k, i] = ki_dict.get((k, i), []) + [(i, j, k, c)]
for (k, i), f_lst in ki_dict.items():
nei_rank = para.pair_dict.get(i)
send_dict = {}
for _, j, _, c in f_lst:
send_dict[j] = send_dict.get(j, []) + [c]
hyper_link_num = sum([1 for j in send_dict.keys() if j != nei_rank])
sio_send_num = len(send_dict.get(nei_rank, []))
max_sum = max([len(vals) for vals in send_dict.values()])
if hyper_link_num:
split_bound = para.hyper_bound / hyper_link_num
for hyper_rank, chunks in send_dict.items():
assert split_bound >= len(chunks)
assert sio_send_num <= split_bound
else:
split_bound = para.T[0,1]
assert sio_send_num <= para.T[0,1]
print(f'epoch:{k}, rank:{i}, hyper_send:{hyper_link_num}, split_bound:{split_bound}, sio_send_num:{sio_send_num}, max_sum:{max_sum}')
obj = max([key[2] for key in new_res.keys()]) + 1
return new_res, obj
def find_factors(n):
return [i for i in range(1, n + 1) if n % i == 0]
def process_instance(i):
# server_num采样
server_num = random.randint(1, 4)
if server_num != 1:
P = random.choice(all_lists)
elif server_num == 1:
P = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
else:
raise Exception("unknown server num")
# task_mode = TASK_Type.ALL_GATHER
task_mode = random.choice([TASK_Type.REDUCE_SCATTER, TASK_Type.ALL_GATHER])
# Input_size采样
num = random.choice([32,64,128])
Input_size = num * 1024 * 1024 # Bytes
# C采样
if task_mode == TASK_Type.REDUCE_SCATTER or task_mode == TASK_Type.ALL2ALL:
C = random.choice([len(P) * server_num,len(P) * server_num *2,len(P) * server_num*3])
elif task_mode == TASK_Type.ALL_GATHER:
C = random.randint(1, 10)
else:
raise Exception("unknown task mode")
# HCCSCAP采样
HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
while HCCSCAP <= 0:
print("chunk太大了,一个epoch发不完")
C += 1
HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
### HCCSCAP不做上限约束
# while HCCSCAP > 10:
# print("chunk太小,HCCSCAP太大")
# Input_size *= 2
# HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
print("HCCSCAP:", HCCSCAP)
# para与instance生成
para = Para_910C()
file_name = f'{topo_name}_{server_num}_{P}_{HCCSCAP}_{Input_size / 1024 / 1024 / 1024}_{C}_{model_mode.value}_{task_mode.value}'
if model_mode == Model_Type.HYPER_EDGE:
para.init_hyper_edge_no_sio(topo_name, tau, C, K, Input_size, server_num, switch, task_mode, P)
m = model_construct_hyper_edge_no_sio_switch_linear(para)
else:
raise Exception("wrong model_mode")
## 保存
m.write(filename=f"./instance/train/910C/instance_{i}_{file_name}.lp")
# with open(f'./para/train/910c_para_{i}_{file_name}.pkl', 'wb') as f:
# pickle.dump({'file_name': file_name,
# 'para': para}, f)
def process_test_instance(i):
# server_num采样
server_num = random.randint(1, 4)
if server_num != 1:
P = random.choice(all_lists)
elif server_num == 1:
P = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
else:
raise Exception("unknown server num")
# task_mode = TASK_Type.ALL_GATHER
task_mode = TASK_Type.REDUCE_SCATTER
# task_mode = TASK_Type.ALL2ALL
# task_mode = random.choice([TASK_Type.REDUCE_SCATTER, TASK_Type.ALL_GATHER, TASK_Type.ALL2ALL])
# task_mode = random.choice([TASK_Type.REDUCE_SCATTER, TASK_Type.ALL_GATHER, TASK_Type.ALL2ALL])
# Input_size采样
num = random.choice([32, 64, 128])
Input_size = num * 1024 * 1024 # Bytes
# C采样
if task_mode == TASK_Type.REDUCE_SCATTER or task_mode == TASK_Type.ALL2ALL:
C = random.choice([len(P) * server_num, len(P) * server_num * 2])
elif task_mode == TASK_Type.ALL_GATHER:
C = random.randint(1, 10)
else:
raise Exception("unknown task mode")
# HCCSCAP采样
HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
while HCCSCAP <= 0:
print("chunk太大了,一个epoch发不完")
C += 1
HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
### HCCSCAP不做上限约束
# while HCCSCAP > 10:
# print("chunk太小,HCCSCAP太大")
# Input_size *= 2
# HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
print("HCCSCAP:", HCCSCAP)
# para与instance生成
para = Para_910C()
file_name = f'{topo_name}_{server_num}_{P}_{HCCSCAP}_{Input_size / 1024 / 1024 / 1024}_{C}_{model_mode.value}_{task_mode.value}'
if model_mode == Model_Type.HYPER_EDGE:
para.init_hyper_edge_no_sio(topo_name, tau, C, K, Input_size, server_num, switch, task_mode, P)
m = model_construct_hyper_edge_no_sio_switch_linear(para)
# m = model_construct_hyper_edge_no_sio_switch(para)
else:
raise Exception("wrong model_mode")
## 保存
m.write(filename=f"./instance/test_zhongqi_reduce_scatter/910C/instance_{i}_{file_name}.lp")
# m.write(filename=f"./instance/test/910C/instance_{i}_{file_name}.lp")
# with open(f'./para/test/910c_para_{i}_{file_name}.pkl', 'wb') as f:
# pickle.dump({'file_name': file_name,
# 'para': para}, f)
if __name__ == '__main__':
instanceNum = 1
tau = 4.4e2 / 1e6 # Epoch duration 1e-3 for 910A
K = 8 # An upperbound of the total epoch number
switch = 7 # 不动
model_mode = Model_Type.HYPER_EDGE
topo_name = "910C_2" # switch_node
random.seed(1145)
para_folder = "./para"
if not os.path.exists(para_folder):
os.makedirs(para_folder, exist_ok=True)
if not os.path.exists(os.path.join(para_folder, "train")):
os.makedirs(os.path.join(para_folder, "train"), exist_ok=True)
if not os.path.exists(os.path.join(para_folder, "test")):
os.makedirs(os.path.join(para_folder, "test"), exist_ok=True)
instance_folder = "./instance"
if not os.path.exists(instance_folder):
os.makedirs(instance_folder, exist_ok=True)
if not os.path.exists(os.path.join(instance_folder, "train/910C")):
os.makedirs(os.path.join(instance_folder, "train/910C"), exist_ok=True)
if not os.path.exists(os.path.join(instance_folder, "test/910C")):
os.makedirs(os.path.join(instance_folder, "test/910C"), exist_ok=True)
### P采样
all_lists = []
for r in range(1, 17):
for p in itertools.combinations(range(16), r):
if len(list(p)) <= 3:
all_lists.append(list(p))
## 实例数目
# todo:目前的test还是all gather
trainNum = 1
# trainNum = 200
testNum = 10
processes = []
max_processes = 32 # 定义最多同时开启的进程数量为10
if False:
with multiprocessing.Pool(processes=max_processes) as pool:
pool.map(process_instance, range(trainNum))
if True:
with multiprocessing.Pool(processes=max_processes) as pool:
pool.map(process_test_instance, range(testNum))
# for i in range(trainNum):
# p = multiprocessing.Process(target=process_instance, args=(i,))
# p.start()
# processes.append(p)
"""
for i in range(trainNum):
server_num = random.randint(1, 4)
P = random.choice(all_lists)
# task_mode = TASK_Type.REDUCE_SCATTER
task_mode = TASK_Type.ALL_GATHER
# C采样
if task_mode == TASK_Type.REDUCE_SCATTER or task_mode == TASK_Type.ALL2ALL:
C = len(P) * server_num # todo:待修改
# factors = find_factors(len(P)* server_num) # 找到len(P)* server_num的所有因子(todo:我感觉需要排除1,最大因子,以及一些过大的C)
# C = random.choice(factors)
elif task_mode == TASK_Type.ALL_GATHER:
C = random.randint(1,20) # todo:待修改
else:
raise Exception("unknown task mode")
# HCCSCAP采样
num = random.randint(16, 512)
Input_size = num * 1024 * 1024 # Bytes
HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
while HCCSCAP <= 0:
print("chunk太大了,一个epoch发不完")
Input_size /= 2
HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
while HCCSCAP >= 10:
print("chunk太小,HCCSCAP太大")
Input_size *= 2
HCCSCAP = round(140 * 1024 ** 3 / (Input_size / C) * tau)
print("HCCSCAP:", HCCSCAP)
# para与instance生成
para = Para_910C()
file_name = f'{topo_name}_{server_num}_{P}_{Input_size / 1024 / 1024 / 1024}_{C}_{model_mode.value}_{task_mode.value}'
if model_mode == Model_Type.HYPER_EDGE:
para.init_hyper_edge_no_sio(topo_name, tau, C, K, Input_size, server_num, switch, task_mode, P)
m = model_construct_hyper_edge_no_sio_switch_linear(para)
else:
# model_mode == 'switch'
raise Exception("这里没改好,而且似乎也不需要")
para.init(topo_name, tau, C, K, Input_size, server_num, 6, task_mode)
f_res, solving_time = model_construct_switch_node(para)
## 保存
m.write(filename=f"./instance/train/910c_instance_{i}_{file_name}.lp")
with open(f'./para/train/910c_para_{i}_{file_name}.pkl', 'wb') as f:
pickle.dump({'file_name': file_name,
'para': para}, f)
"""
最新发布