346. Moving Average from Data Stream

本文介绍了一种基于滑动窗口实现动态平均值计算的方法。通过队列维护窗口内的数值并实时更新平均值,适用于数据流处理场景。文章提供了详细的算法实现及代码示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

问题描述

Given a stream of integers and a window size, calculate the moving average of all integers in the sliding window.

For example,

MovingAverage m = new MovingAverage(3);
m.next(1) = 1
m.next(10) = (1 + 10) / 2
m.next(3) = (1 + 10 + 3) / 3
m.next(5) = (10 + 3 + 5) / 3

题目链接:


思路分析

滑动窗口计算平均数,限定计算size个数的平均数。

将每一次的输入保存在队列中,并保持一个sum来记录队列中元素和。如果队列中元素小于size,push进队后计算平均数,注意++n,因为元素个数增多了;如果到了size了,就先pop再push,sum做相同操作,再进行计算。

代码
class MovingAverage {
public:
    /** Initialize your data structure here. */
    queue<int> window;
    double sum = 0;
    int cap;
    MovingAverage(int size) {
        cap = size;
    }

    double next(int val) {
        int n = window.size();
        if (n < cap){
            sum = sum + val;
            window.push(val);
            return sum/(++n);
        }
        else{
            sum = sum - window.front() + val;
            window.pop();
            window.push(val);
            return sum/cap;
        }

    }
};

/**
 * Your MovingAverage object will be instantiated and called as such:
 * MovingAverage obj = new MovingAverage(size);
 * double param_1 = obj.next(val);
 */

时间复杂度: O(1)
空间复杂度: O(n)


反思

对于队列的使用。

import numpy as np import torch import torch.nn as nn import torch.optim as optim import matplotlib.pyplot as plt import matplotlib.colors as mcolors from collections import deque import random import os EPISODES = 5 # 训练轮数 # 修改文件扩展名为TXT DATA_PATH = "/tmp/pycharm_project_79/D3QN-main/mk/Mk01.txt" # ================= 增强版数据集解析 ================= def parse_mk01(file_path): """动态解析MK01文件,包含多层数据校验""" jobs = [] machine_ids = set() try: with open(file_path, 'r') as f: lines = [line.strip() for line in f if line.strip()] # 动态解析机器数 machine_count = 10 # 默认值 if len(lines) > 0: header = list(map(int, lines[0].split()[:2])) # 只取前两个有效数字 job_count = max(header[0], 0) if len(header) > 0 else 0 if len(header) > 1: machine_count = max(machine_count, header[1]) lines = lines[1:] current_idx = 0 for job_id in range(job_count): if current_idx >= len(lines): break # 解析工序数 op_info = lines[current_idx].split() op_count = int(op_info[0]) if op_info else 0 current_idx += 1 operations = [] for _ in range(op_count): if current_idx >= len(lines): break parts = list(map(int, lines[current_idx].split())) current_idx += 1 machines = [] # 动态解析机器-时间对 for i in range(0, len(parts), 2): if i + 1 >= len(parts): break raw_machine = parts[i] duration = max(parts[i + 1], 1) # 确保持续时间有效 # 计算有效机器ID machine = (abs(raw_machine) - 1) % machine_count machine_ids.add(machine) machines.append((machine, duration)) # 确保至少有一个有效机器 if not machines: machine = 0 machines.append((machine, 10)) machine_ids.add(machine) operations.append({"machines": machines}) # 确保至少有一个有效工序 if operations: jobs.append({ "operations": operations, "due_date": None }) # 生成有效机器列表 valid_machines = list(machine_ids) if machine_ids else [0] return { "jobs": jobs if jobs else [{ "operations": [{"machines": [(0, 10)]}], "due_date": 100 }], "machines": valid_machines } except Exception as e: print(f"解析警告:{str(e)},已启用备用数据") return { "jobs": [{ "operations": [{"machines": [(0, 10)]}], "due_date": 100 }], "machines": [0] } # ================= 神经网络架构 ================= class DuelingDQN(nn.Module): def __init__(self, input_dim, output_dim): super().__init__() self.feature = nn.Sequential( nn.Linear(input_dim, 256), nn.ReLU(), nn.Linear(256, 128), nn.ReLU() ) self.value_stream = nn.Sequential( nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, 1) ) self.advantage_stream = nn.Sequential( nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, output_dim) ) def forward(self, x): x = self.feature(x) value = self.value_stream(x) advantage = self.advantage_stream(x) return value + (advantage - advantage.mean(dim=-1, keepdim=True)) # ================= 增强版智能体类定义 ================= class OperationAgent: """工序智能体:带安全保护的机器选择""" def __init__(self, state_dim, action_dim): self.policy_net = DuelingDQN(state_dim, action_dim) self.target_net = DuelingDQN(state_dim, action_dim) self.target_net.load_state_dict(self.policy_net.state_dict()) self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4) self.memory = deque(maxlen=100000) self.batch_size = 128 self.gamma = 0.99 self.epsilon = 1.0 self.epsilon_min = 0.01 self.epsilon_decay = 0.995 def select_action(self, state, valid_actions): try: if not valid_actions: return 0 # 返回默认机器0(需确保其存在) # 添加有效性检查 valid_actions = list(set(valid_actions)) # 去重 if not valid_actions: return 0 if np.random.random() < self.epsilon: return np.random.choice(valid_actions) else: with torch.no_grad(): state_tensor = torch.FloatTensor(state).unsqueeze(0) q_values = self.policy_net(state_tensor) return valid_actions[torch.argmax(q_values[0, valid_actions]).item()] except Exception as e: print(f"动作选择异常: {str(e)}, 使用默认动作") return valid_actions[0] if valid_actions else 0 def update_epsilon(self): self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay) def store_experience(self, state, action, reward, next_state, done): self.memory.append(( torch.FloatTensor(state), torch.LongTensor([action]), torch.FloatTensor([reward]), torch.FloatTensor(next_state), torch.BoolTensor([done]) )) def optimize(self): if len(self.memory) < self.batch_size: return batch = random.sample(self.memory, self.batch_size) states, actions, rewards, next_states, dones = zip(*batch) states = torch.stack(states) actions = torch.stack(actions) rewards = torch.stack(rewards) next_states = torch.stack(next_states) dones = torch.stack(dones) current_q = self.policy_net(states).gather(1, actions) next_q = self.target_net(next_states).max(1)[0].detach() target_q = rewards + (1 - dones.float()) * self.gamma * next_q.unsqueeze(1) loss = nn.MSELoss()(current_q, target_q) self.optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0) self.optimizer.step() def update_target_net(self): self.target_net.load_state_dict(self.policy_net.state_dict()) class QueueAgent: """队列智能体:管理工序优先级排序""" def __init__(self, state_dim): self.policy_net = DuelingDQN(state_dim, 2) # 接受/拒绝 self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4) self.memory = deque(maxlen=50000) self.batch_size = 64 def prioritize_operations(self, state, operations): with torch.no_grad(): state_tensor = torch.FloatTensor(state).unsqueeze(0) priority_scores = self.policy_net(state_tensor).squeeze().numpy() sorted_indices = np.argsort(-priority_scores) return [operations[i] for i in sorted_indices] class MachineAgent: """机器智能体:执行加工决策""" def __init__(self, state_dim): self.policy_net = DuelingDQN(state_dim, 3) # 加工/维护/空闲 self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-4) self.memory = deque(maxlen=50000) self.batch_size = 64 def select_action(self, state): with torch.no_grad(): state_tensor = torch.FloatTensor(state).unsqueeze(0) q_values = self.policy_net(state_tensor) return torch.argmax(q_values).item() # ================= 调度环境 ================= class FlexibleJobShopEnv: def __init__(self, config): # 数据校验与修复 self.jobs = [] for job in config.get("jobs", []): valid_ops = [] for op in job.get("operations", []): if len(op.get("machines", [])) > 0: valid_ops.append(op) if valid_ops: self.jobs.append({ "operations": valid_ops, "due_date": job.get("due_date", 100) }) # 确保至少有一个有效作业 if not self.jobs: self.jobs = [{ "operations": [{"machines": [(0, 10)]}], "due_date": 100 }] # 动态生成机器列表 self.machines = list(set( m for job in self.jobs for op in job["operations"] for m, _ in op["machines"] )) or [0] # 其他初始化参数 self.max_steps = 1000 self.job_arrival_rate = 0.1 self.machine_break_prob = 0.02 self.reset() def reset(self): self.current_step = 0 self.schedule = {m: [] for m in self.machines} self.active_jobs = [] self.completed_jobs = [] self.machine_states = { m: { "status": "idle", "current_job": None, "remaining_time": 0 } for m in self.machines } self.event_queue = deque() # 安全初始化作业 try: for _ in range(min(3, len(self.jobs))): self._add_job(random.choice(range(len(self.jobs)))) except: self._add_job(0) def _add_job(self, job_idx): try: job_data = self.jobs[job_idx] job = { "id": len(self.active_jobs), "operations": [op.copy() for op in job_data["operations"]], "current_op": 0, "arrival_time": self.current_step, "due_date": self.current_step + np.random.randint(50, 100) } self.active_jobs.append(job) except: self.active_jobs.append({ "id": len(self.active_jobs), "operations": [{"machines": [(0, 10)]}], "current_op": 0, "arrival_time": self.current_step, "due_date": self.current_step + 100 }) def _get_state(self): """构建全局状态向量""" state = [] # 机器状态特征 for m in self.machines: state += [ 1 if self.machine_states[m]["status"] == "busy" else 0, self.machine_states[m]["remaining_time"] / 100 ] # 作业状态特征 job_features = [] for job in self.active_jobs: job_features += [ (job["due_date"] - self.current_step) / 100, job["current_op"] / len(job["operations"]) ] state += job_features[:10] # 取前5个作业的特征 # 队列状态 state.append(len(self.event_queue) / 20) return np.array(state, dtype=np.float32) def step(self, machine_actions): """执行一个时间步""" reward = 0 done = False # 处理机器动作 for machine, action in machine_actions.items(): state = self.machine_states[machine] if action == 0 and state["status"] == "idle": if self.event_queue: selected_op = self.event_queue.popleft() duration = selected_op["duration"] self.schedule[machine].append({ "job_id": selected_op["job_id"], "op_idx": selected_op["op_idx"], "start": self.current_step, "end": self.current_step + duration }) state.update({ "status": "busy", "current_job": selected_op["job_id"], "remaining_time": duration }) reward += 2.0 # 成功处理奖励 # 更新机器状态 for machine in self.machines: state = self.machine_states[machine] if state["status"] == "busy": state["remaining_time"] -= 1 if state["remaining_time"] <= 0: job_id = state["current_job"] job = next(j for j in self.active_jobs if j["id"] == job_id) job["current_op"] += 1 if job["current_op"] >= len(job["operations"]): self.completed_jobs.append(job) self.active_jobs.remove(job) reward += 10.0 # 完成作业奖励 state.update({"status": "idle", "current_job": None}) # 处理动态事件 self._handle_events() # 计算奖励 reward += self._calculate_utilization_reward() reward -= self._calculate_tardiness_penalty() reward -= len(self.event_queue) * 0.1 # 队列长度惩罚 # 检查终止条件 self.current_step += 1 if self.current_step >= self.max_steps: done = True if len(self.completed_jobs) >= len(self.jobs): done = True reward += 100.0 # 提前完成所有作业奖励 return self._get_state(), reward, done, {} def _calculate_utilization_reward(self): busy_machines = sum(1 for m in self.machines if self.machine_states[m]["status"] == "busy") return (busy_machines / len(self.machines)) * 2.5 def _calculate_tardiness_penalty(self): penalty = 0 for job in self.active_jobs: if self.current_step > job["due_date"]: penalty += (self.current_step - job["due_date"]) * 0.2 return min(penalty, 15.0) # 限制最大惩罚 def _handle_events(self): # 新作业到达 if np.random.poisson(self.job_arrival_rate): self._add_job(random.choice(range(len(self.jobs)))) # 机器故障处理 for machine in self.machines: if self.machine_states[machine]["status"] == "busy": if np.random.rand() < self.machine_break_prob: self.machine_states[machine]["status"] = "break" self.machine_states[machine]["remaining_time"] = np.random.randint(5, 15) def render(self): """可视化当前状态""" plt.figure(figsize=(12, 6)) colors = list(mcolors.TABLEAU_COLORS.values()) # 机器利用率 plt.subplot(1, 2, 1) util = [len(ops) for ops in self.schedule.values()] plt.bar(range(len(self.machines)), util, color=colors[0]) plt.title("Machine Utilization") plt.xlabel("Machine ID") plt.ylabel("Completed Operations") # 作业进度 plt.subplot(1, 2, 2) if self.active_jobs: progress = [ (job["current_op"] / max(len(job["operations"]), 1)) * 100 # 防止除零 for job in self.active_jobs ] plt.bar(range(len(progress)), progress, color=colors[1]) plt.title("Job Progress") plt.xlabel("Job ID") plt.ylabel("Completion (%)") plt.tight_layout() plt.pause(0.01) plt.close() # ================= 训练框架 ================= class Trainer: def __init__(self, env_config): self.env = FlexibleJobShopEnv(env_config) # 动态获取状态维度 self.env.reset() sample_state = self.env._get_state() state_dim = len(sample_state) self.op_agents = [ OperationAgent(state_dim=state_dim, action_dim=10) for _ in range(len(env_config["jobs"])) ] self.queue_agent = QueueAgent(state_dim=state_dim) self.machine_agents = { m: MachineAgent(state_dim=state_dim) for m in env_config["machines"] } def train(self, episodes=5): rewards_history = [] moving_avg = [] for ep in range(episodes): state = self.env.reset() total_reward = 0 done = False while not done: # 工序智能体决策 # 在Trainer的train方法中修改: op_actions = [] for job in self.env.active_jobs: if job["current_op"] < len(job["operations"]): # 确保存在可选机器 machines = job["operations"][job["current_op"]]["machines"] if not machines: machines = [(0, 10)] # 默认机器 valid_machines = [m[0] for m in machines] agent = self.op_agents[job["id"]] action = agent.select_action(state, valid_machines) # 查找对应duration try: duration = next(d for m, d in machines if m == action) except StopIteration: duration = 10 # 默认值 op_actions.append({ "job_id": job["id"], "op_idx": job["current_op"], "machine": action, "duration": duration }) # 队列智能体排序 sorted_ops = self.queue_agent.prioritize_operations(state, op_actions) self.env.event_queue = deque(sorted_ops) # 机器智能体执行 machine_actions = {} for machine in self.env.machines: if self.env.machine_states[machine]["status"] == "idle": action = self.machine_agents[machine].select_action(state) machine_actions[machine] = action # 环境步进 next_state, reward, done, _ = self.env.step(machine_actions) total_reward += reward # 存储经验 for agent in self.op_agents: agent.store_experience(state, action, reward, next_state, done) state = next_state # 优化模型 for agent in self.op_agents: agent.optimize() agent.update_epsilon() if ep % 10 == 0: agent.update_target_net() rewards_history.append(total_reward) moving_avg.append(np.mean(rewards_history[-10:])) print(f"Episode {ep + 1}/{episodes}, Reward: {total_reward:.2f}, Epsilon: {self.op_agents[0].epsilon:.3f}") # 定期保存模型 if ep % 50 == 0: self.save_models(f"checkpoint_ep{ep}") # 训练后可视化 self.plot_training(rewards_history, moving_avg) self.env.render() def save_models(self, path): os.makedirs(path, exist_ok=True) for idx, agent in enumerate(self.op_agents): torch.save(agent.policy_net.state_dict(), f"{path}/op_agent_{idx}.pth") torch.save(self.queue_agent.policy_net.state_dict(), f"{path}/queue_agent.pth") for m in self.env.machines: torch.save(self.machine_agents[m].policy_net.state_dict(), f"{path}/machine_{m}.pth") def plot_training(self, rewards, moving_avg): plt.figure(figsize=(12, 6)) plt.plot(rewards, alpha=0.6, label='Episode Reward') plt.plot(moving_avg, linewidth=2, label='Moving Average (10)') plt.xlabel("Episode") plt.ylabel("Reward") plt.title("Training Progress") plt.legend() plt.savefig("training_progress.png") plt.close() # ================= 主程序 ================= if __name__ == "__main__": # 加载配置 mk_data = parse_mk01(DATA_PATH) # 使用配置好的路径 # 初始化训练器 trainer = Trainer(mk_data) # 开始训练 try: trainer.train(episodes=EPISODES) except KeyboardInterrupt: print("\n训练被用户中断,正在保存当前模型...") trainer.save_models("interrupted_training") except Exception as e: print(f"训练过程中出现异常:{str(e)}") exit(1) print("训练完成!结果已保存至当前目录") 这个代码报错,如何解决?new(): data must be a sequence (got NoneType)
最新发布
05-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值