#225.排队

本文介绍了一种解决农夫约翰让连续位置的奶牛参加飞盘游戏的问题算法,通过预处理最小值和最大值来高效求解每组奶牛的最大身高差。

【题目描述】:

每天,农夫约翰的N头奶牛总是按同一顺序排好队,有一天,约翰决定让一些牛玩一场飞盘游戏(Ultimate Frisbee),他决定在队列里选择一群位置连续的奶牛进行比赛,为了避免比赛结果过于悬殊,要求挑出的奶牛身高不要相差太大。

约翰准备了Q组奶牛选择,并告诉你所有奶牛的身高Hi。他想知道每组里最高的奶牛和最矮的奶牛身高差是多少。

注意:在最大的数据上,输入输出将占据大部分时间。

【输入描述】:

第一行,两个用空格隔开的整数N和Q。

第2到第N+1行,每行一个整数,第i+1行表示第i头奶牛的身高Hi

第N+2到第N+Q+1行,每行两个用空格隔开的整数A和B,表示选择从A到B的所有牛(1<=A<=B<=N)

【输出描述】:

共Q行,每行一个整数,代表每个询问的答案。

【样例输入】:

6 3
1
7
3
4
2
5
1 5
4 6
2 2

【样例输出】:

6
3
0

【时间限制、数据范围及描述】:

时间:1s 空间:128M

1<=N<=50,000; 1<=Q<=200,000; 1<=Hi<=10^6
以下即为写的代码:
#include<stdio.h>
#include<iostream>
#include<algorithm>
#include<math.h>

using namespace std;
const int N=50005;
int n,q;
int a[N],f1[N][20],f2[N][20];
int read()//快速读入部分
{
	int f=1,ans=0;
	char c;
	c=getchar();
	while(c<'0'||c>'9') 
	{
		if(c=='-')
		{
			f=-1; 
		}
		c=getchar(); 
	}
	while('0'<=c&&c<='9') 
	{
		ans=ans*10+c-'0'; 
		c=getchar();
	}
	return ans*f; 
}

void Init()//初始化部分,运用dp的思想
{
	for(int i=1;i<=n;i++)
	f1[i][0]=f2[i][0]=a[i];
	for(int j=1;(1<<j)<=n;j++)
	{
		for(int i=1;i+(1<<j)-1<=n;i++)
		{
			f1[i][j]=min(f1[i][j-1],f1[i+(1<<(j-1))][j-1]);
			f2[i][j]=max(f2[i][j-1],f2[i+(1<<(j-1))][j-1]);
		}
	}
	return ;
}

int work1(int s,int t)//对于最小值的查询
{
	int k=(int)(log(t-s+1.0)*1.0/log(2.0));
	return min(f1[s][k],f1[t-(1<<k)+1][k]);
}
int work2(int s,int t)//对于最大值的查询
{
	int k=(int)(log(t-s+1.0)*1.0/log(2.0));
	return max(f2[s][k],f2[t-(1<<k)+1][k]);
}
int main()
{
//	freopen("51.in","r",stdin);
//	freopen("51.out","w",stdout);
	n=read();
	q=read();
	for(int i=1;i<=n;i++)
	a[i]=read();
	Init();
	for(int i=1;i<=q;i++)
	{
		int x=read();
		int y=read();
		printf("%d\n",work2(x,y)-work1(x,y));
	}
	//for(int i=1;i<=n;i++)
	//cout<<a[i]<<endl;
	return 0;
}

DEFAULT_REWARD_HYPERPARAMS = { # 1. 奖励组件权重(含事故场景自适应调整) "weight": { "peak": { # 高峰时段:优先降低延误与等待 "delay_wait": 0.45, "queue": 0.15, "approach": 0.15, "travel": 0.15, "coordination": 0.10 # 区域协同权重(高峰保持原设计) }, "non_peak": { # 非高峰时段:提升协同与通行权重 "delay_wait": 0.35, "queue": 0.15, "approach": 0.15, "travel": 0.20, "coordination": 0.20 # 【优化】非高峰协同权重从0.15→0.20 }, "accident_adjust": { # 【优化】事故时多指标放大(原单一系数→字典) "queue": 1.5, # 排队权重放大 "delay_wait": 1.2 # 延误-等待权重也放大 } }, # 2. 天气影响系数(保持原设计) "weather_impact": { 0: {"delay": 1.0, "waiting": 1.0, "queue": 1.0, "travel": 1.0, "coord": 1.0}, 1: {"delay": 1.15, "waiting": 1.1, "queue": 1.15, "travel": 0.85, "coord": 1.15}, 2: {"delay": 1.35, "waiting": 1.25, "queue": 1.35, "travel": 0.75, "coord": 1.35}, 3: {"delay": 1.5, "waiting": 1.4, "queue": 1.5, "travel": 0.65, "coord": 1.5} }, # 3. 平滑与归一化参数(保持原设计) "smoothing": { "alpha": 0.3, "ema_decay": 0.8, "sigmoid_sensitivity": 0.8 }, # 4. 通行奖励参数(保持原设计) "travel_reward": { "distance_scale": 100.0, "base_entry_reward": 5.0, "max_completed_reward": 5, "count_bonus_max": 0.3, "potential_value_coeff": 0.5 }, # 5. 拥堵判断与奖励(新增区域拥堵参数) "congestion": { "global_congest_threshold": 5.0, "approach_congest_threshold": 0.5, "queue_drop_steps": 2, "congest_relief_bonus": 0.05, "relief_rate_coeff": 0.03, "region_congest_threshold": 0.5, # 【新增】区域拥堵阈值 "region_congest_penalty_scale": 1.5 # 【新增】区域拥堵时惩罚放大系数 }, # 6. 区域协调参数(提升协同奖励权重) "coordination": { "base_penalty_weight": 0.1, "synergy_bonus_weight": 0.1 # 【优化】协同奖励权重从0.05→0.1 }, # 7. 动态基线参数(保持原设计) "base_reward": { "initial": 0.2, "decay_steps": 1000, "decay_max_ratio": 0.15 }, # 8. 日志配置(保持原设计) "log": { "print_interval": 100 }, # 9. 车型权重(保持原设计) "vehicle_type_weight": { 1: 1.0, 2: 1.5, 3: 1.2, 4: 0.8, 5: 0.5, "default": 1.0 }, # 10. 【新增】事故延误惩罚参数 "accident_delay": { "threshold": 15.0, # 事故场景下延误超阈值(秒) "penalty": 0.05 # 额外扣除的奖励 } } def calculate_travel_reward(junction_id, fp, vehicles, invalid_lanes, config): cfg = config["travel_reward"] total_distance = 0.0 total_potential_value = 0.0 valid_count = 0 completed_count = 0 rewarded_completed = set() vehicle_dict = {v["v_id"]: v for v in vehicles} current_v_ids = set(vehicle_dict.keys()) vehicle_type_weight = config["vehicle_type_weight"] junction_metrics = fp.junction_metrics.get(junction_id, {}) completed_vehicles = junction_metrics.get("completed_vehicles", set()) for v_id, distance in fp.vehicle_distance_store.items(): if v_id not in current_v_ids or v_id not in vehicle_dict: continue vehicle = vehicle_dict[v_id] if (vehicle.get("target_junction") == junction_id and (on_enter_lane(vehicle, invalid_lanes) or in_junction(vehicle)) and v_id not in completed_vehicles): if vehicle.get("lane") in invalid_lanes or fp.vehicle_status.get(v_id, 1) != 0: continue v_config_id = vehicle.get("v_config_id") if not v_config_id: weight = vehicle_type_weight["default"] else: vehicle_config = fp.vehicle_configs.get(v_config_id, {}) v_type = vehicle_config.get("v_type", "default") weight = vehicle_type_weight.get(v_type, vehicle_type_weight["default"]) total_distance += (distance / cfg["distance_scale"]) * weight valid_count += 1 position = vehicle.get("position_in_lane", {}) distance_to_stop = position.get("y", 50.0) potential_value = max(0.0, (50.0 - distance_to_stop) / 50.0) * cfg["potential_value_coeff"] total_potential_value += potential_value * weight if v_id in completed_vehicles and v_id not in rewarded_completed: if completed_count < cfg["max_completed_reward"]: total_distance += cfg["base_entry_reward"] * 2 completed_count += 1 rewarded_completed.add(v_id) total_reward_value = total_distance + total_potential_value total_count = valid_count + completed_count if total_count <= 0: return 0.1 else: avg_value = total_reward_value / total_count count_bonus = min(cfg["count_bonus_max"], total_count * 0.01) return np.tanh(avg_value + count_bonus) def reward_shaping(_obs, _extra_info, act, agent, config=None): cfg = config or DEFAULT_REWARD_HYPERPARAMS fp = agent.preprocess junction_ids = fp.get_sorted_junction_ids() rewards = {j_id: {&#39;total&#39;: 0.0, &#39;components&#39;: {}} for j_id in junction_ids} frame_state = _obs.get("framestate", {}) vehicles = frame_state.get("vehicles", []) fp.update_traffic_info(_obs, _extra_info) all_junction_waiting = fp.get_all_junction_waiting_time(vehicles) if vehicles else {} invalid_lanes = fp.get_invalid_lanes() global_avg_queue = fp.get_all_avg_queue() is_global_congested = global_avg_queue > cfg["congestion"]["global_congest_threshold"] weather = fp.get_weather() weather_map = {0: "晴", 1: "雨", 2: "雪", 3: "雾"} weather_name = weather_map.get(weather, "未知") weather_factors = cfg["weather_impact"].get(weather, cfg["weather_impact"][0]) is_peak = fp.is_peak_hour() weight_key = "peak" if is_peak else "non_peak" if not hasattr(agent, &#39;prev_metrics&#39;): agent.prev_metrics = {} for j_id in junction_ids: capacity = fp.get_junction_capacity(j_id) agent.prev_metrics[j_id] = { "avg_delay_wait": 8.0 + capacity * 0.015, "avg_queue": 2.0 + capacity * 0.002, "travel_reward": 0.0, "prev_queue": 0.0 } if not hasattr(agent, &#39;ema_delay_wait&#39;): agent.ema_delay_wait = {j_id: 0.0 for j_id in junction_ids} if not hasattr(agent, &#39;prev_approach_congestion&#39;): agent.prev_approach_congestion = {j_id: 0.0 for j_id in junction_ids} if not hasattr(agent, &#39;prev_metrics_trend&#39;): agent.prev_metrics_trend = { j_id: {"queue": []} for j_id in junction_ids } def sigmoid_scale(x, sensitivity=None): sens = sensitivity or cfg["smoothing"]["sigmoid_sensitivity"] x_clamped = np.clip(x, -1000, 1000) return 2.0 / (1 + np.exp(-sens * x_clamped)) - 1.0 for j_id in junction_ids: junction = fp.junction_dict.get(j_id, {}) if not junction: rewards[j_id][&#39;total&#39;] = 0.0 continue signal_id = junction.get("signal", "") capacity = fp.get_junction_capacity(j_id) region_id = fp.get_region(j_id) region_id = region_id if region_id is not None else -1 enter_lanes = junction.get("cached_enter_lanes", []) valid_enter_lanes = [lane for lane in enter_lanes if lane not in invalid_lanes] is_accident = len(valid_enter_lanes) < len(enter_lanes) # 1. 延误-等待综合奖励 current_avg_delay = fp.get_junction_avg_delay(j_id) current_avg_waiting = all_junction_waiting.get(j_id, 0.0) current_avg_delay_wait = (current_avg_delay + current_avg_waiting) / 2 agent.ema_delay_wait[j_id] = ( cfg["smoothing"]["ema_decay"] * agent.ema_delay_wait[j_id] + (1 - cfg["smoothing"]["ema_decay"]) * current_avg_delay_wait ) current_smoothed = agent.ema_delay_wait[j_id] prev_avg_delay_wait = agent.prev_metrics[j_id]["avg_delay_wait"] delay_wait_delta = prev_avg_delay_wait - current_smoothed delay_wait_change = delay_wait_delta / max(1.0, current_avg_delay_wait) * 10 delay_wait_reward = cfg["weight"][weight_key]["delay_wait"] * sigmoid_scale(delay_wait_change) delay_wait_reward *= weather_factors["delay"] rewards[j_id][&#39;components&#39;][&#39;delay_wait&#39;] = delay_wait_reward # 2. 排队奖励(事故适配+速率奖励) current_avg_queue = fp.get_junction_avg_queue(j_id) prev_avg_queue = agent.prev_metrics[j_id]["avg_queue"] prev_prev_queue = agent.prev_metrics[j_id]["prev_queue"] queue_delta = prev_avg_queue - current_avg_queue normalized_queue = current_avg_queue / max(1.0, capacity) queue_delta_normalized = queue_delta / max(1.0, capacity) queue_reward = ( cfg["weight"][weight_key]["queue"] * sigmoid_scale(queue_delta_normalized * 5.0) + weather_factors["queue"] * sigmoid_scale(normalized_queue, sensitivity=2.0) ) agent.prev_metrics_trend[j_id]["queue"].append(current_avg_queue) if len(agent.prev_metrics_trend[j_id]["queue"]) > cfg["congestion"]["queue_drop_steps"]: agent.prev_metrics_trend[j_id]["queue"].pop(0) if len(agent.prev_metrics_trend[j_id]["queue"]) == cfg["congestion"]["queue_drop_steps"]: if all( agent.prev_metrics_trend[j_id]["queue"][i] > agent.prev_metrics_trend[j_id]["queue"][i+1] for i in range(cfg["congestion"]["queue_drop_steps"] - 1) ): queue_reward += cfg["congestion"]["congest_relief_bonus"] relief_rate = agent.prev_metrics_trend[j_id]["queue"][0] - current_avg_queue rate_bonus = relief_rate * cfg["congestion"]["relief_rate_coeff"] queue_reward += rate_bonus # 事故场景:同时放大排队和延误-等待权重 + 延误超阈值惩罚 if is_accident: queue_reward *= cfg["weight"]["accident_adjust"].get("queue", 1.0) delay_wait_reward *= cfg["weight"]["accident_adjust"].get("delay_wait", 1.0) if current_avg_delay > cfg["accident_delay"]["threshold"]: delay_wait_reward -= cfg["accident_delay"]["penalty"] rewards[j_id][&#39;components&#39;][&#39;queue&#39;] = queue_reward rewards[j_id][&#39;components&#39;][&#39;delay_wait&#39;] = delay_wait_reward # 同步更新delay_wait # 3. 进口道奖励 phase_remaining = fp.get_phase_remaining_time(signal_id) if signal_id else 0 approach_queue = sum(len(fp.lane_volume.get(lane, [])) for lane in valid_enter_lanes) total_possible_queue = sum(fp.get_lane_capacity(lane) for lane in valid_enter_lanes) demand_ratio = approach_queue / (total_possible_queue + 1e-5) if total_possible_queue > 0 else 0.5 approach_reward = cfg["weight"][weight_key]["approach"] * (demand_ratio + 0.1) * max(0.0, 1 - phase_remaining / 5) approach_reward *= 1.0 / weather_factors["delay"] if hasattr(fp, &#39;lane_congestion&#39;): approach_congestion = [fp.lane_congestion.get(lane, 0.0) for lane in valid_enter_lanes] avg_approach_congestion = np.mean(approach_congestion) if approach_congestion else 0.0 if (avg_approach_congestion < cfg["congestion"]["approach_congest_threshold"] and agent.prev_approach_congestion[j_id] >= cfg["congestion"]["approach_congest_threshold"]): approach_reward += cfg["congestion"]["congest_relief_bonus"] agent.prev_approach_congestion[j_id] = avg_approach_congestion rewards[j_id][&#39;components&#39;][&#39;approach&#39;] = approach_reward # 4. 通行奖励 travel_reward = calculate_travel_reward( junction_id=j_id, fp=fp, vehicles=vehicles, invalid_lanes=invalid_lanes, config=cfg ) travel_reward *= weather_factors["travel"] rewards[j_id][&#39;components&#39;][&#39;travel&#39;] = cfg["weight"][weight_key]["travel"] * travel_reward # 5. 区域协调奖励(区域拥堵时放大惩罚) coordination_penalty = 0.0 if region_id != -1 and hasattr(fp, &#39;region_dict&#39;): region_dict = getattr(fp, &#39;region_dict&#39;, {}) region_junctions = region_dict.get(region_id, []) if len(region_junctions) > 1: region_avg_queue = fp.get_region_avg_queue(region_id) queue_deviation = abs(current_avg_queue - region_avg_queue) region_cap = fp.get_region_capacity(region_id) region_congestion = region_avg_queue / max(1.0, region_cap) if region_cap > 0 else 0.0 # 区域拥堵时放大协同惩罚 region_scale = cfg["congestion"]["region_congest_penalty_scale"] if region_congestion > cfg["congestion"]["region_congest_threshold"] else 1.0 coordination_factor = (1.0 + 2.0 * min(1.0, region_congestion)) * weather_factors["coord"] * region_scale coordination_penalty = -cfg["coordination"]["base_penalty_weight"] * coordination_factor * sigmoid_scale(queue_deviation, sensitivity=0.5) queue_diffs = [] for j_near in region_junctions: if j_near == j_id: continue near_queue = fp.get_junction_avg_queue(j_near) queue_diffs.append(abs(current_avg_queue - near_queue)) if queue_diffs: avg_queue_diff = np.mean(queue_diffs) coordination_penalty += cfg["coordination"]["synergy_bonus_weight"] * sigmoid_scale(-avg_queue_diff, sensitivity=0.1) rewards[j_id][&#39;components&#39;][&#39;coordination&#39;] = cfg["weight"][weight_key]["coordination"] * coordination_penalty # 6. 总奖励计算(事故时多指标权重调整) weight_set = cfg["weight"][weight_key].copy() if is_accident: scaled_keys = ["queue", "delay_wait"] original_weights = {k: weight_set[k] for k in scaled_keys} total_scale = sum(original_weights[k] * cfg["weight"]["accident_adjust"].get(k, 1.0) for k in scaled_keys) original_total = sum(original_weights.values()) scale_ratio = total_scale / original_total other_weight_sum = sum(w for k, w in weight_set.items() if k not in scaled_keys) if other_weight_sum > 0: adjust_ratio = (sum(weight_set.values()) * scale_ratio - sum(original_weights[k] * cfg["weight"]["accident_adjust"].get(k, 1.0) for k in scaled_keys)) / other_weight_sum for k in weight_set: if k not in scaled_keys: weight_set[k] *= adjust_ratio weight_sum = sum(weight_set.values()) or 1.0 normalized_weights = {k: v / weight_sum for k, v in weight_set.items()} base_reward = cfg["base_reward"]["initial"] if hasattr(agent, &#39;train_step&#39;): decay_ratio = min(agent.train_step / cfg["base_reward"]["decay_steps"], 1.0) base_reward -= cfg["base_reward"]["decay_max_ratio"] * cfg["base_reward"]["initial"] * decay_ratio total_reward = base_reward + sum( normalized_weights[k] * rewards[j_id][&#39;components&#39;][k] for k in normalized_weights ) rewards[j_id][&#39;total&#39;] = np.clip(total_reward, -1.0, 1.0) # 7. 更新历史缓存 agent.prev_metrics[j_id] = { "avg_delay_wait": cfg["smoothing"]["alpha"] * current_avg_delay_wait + (1 - cfg["smoothing"]["alpha"]) * prev_avg_delay_wait, "avg_queue": cfg["smoothing"]["alpha"] * current_avg_queue + (1 - cfg["smoothing"]["alpha"]) * prev_avg_queue, "travel_reward": cfg["smoothing"]["alpha"] * travel_reward + (1 - cfg["smoothing"]["alpha"]) * agent.prev_metrics[j_id]["travel_reward"], "prev_queue": prev_avg_queue } if (hasattr(agent, &#39;train_step&#39;) and agent.train_step % cfg["log"]["print_interval"] == 0 and hasattr(agent, &#39;logger&#39;)): for j_id in junction_ids: comp = rewards[j_id][&#39;components&#39;] region_id = fp.get_region(j_id) region_id = region_id if region_id is not None else -1 region_cap = fp.get_region_capacity(region_id) region_congestion = fp.get_region_avg_queue(region_id) / max(1.0, region_cap) if region_cap > 0 else 0.0 enter_lanes = fp.junction_dict.get(j_id, {}).get("cached_enter_lanes", []) valid_enter_lanes = [lane for lane in enter_lanes if lane not in invalid_lanes] is_accident = len(valid_enter_lanes) < len(enter_lanes) agent.logger.info( f"Step {agent.train_step} | Junc {j_id} (Region {region_id}) - " f"DelayWait: {comp[&#39;delay_wait&#39;]:.2f}, Queue: {comp[&#39;queue&#39;]:.2f}, " f"Approach: {comp[&#39;approach&#39;]:.2f}, Travel: {comp[&#39;travel&#39;]:.2f}, " f"Coord: {comp[&#39;coordination&#39;]:.2f} | " f"Congestion: {region_congestion:.2f}, Peak: {is_peak}, Weather: {weather_name} " f"(Factors: D:{weather_factors[&#39;delay&#39;]}, Q:{weather_factors[&#39;queue&#39;]}) | " f"Accident: {is_accident}, Total: {rewards[j_id][&#39;total&#39;]:.2f}" ) return tuple(rewards[j_id][&#39;total&#39;] for j_id in junction_ids)检查此奖励函数的奖励与惩罚平衡性上是否合理?
最新发布
09-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值