6-7 Deque(25 point(s))

本文介绍了一种双向队列(deque)的数据结构实现方法,使用双向链表作为底层存储结构,支持在队首和队尾进行插入和删除操作,所有操作的时间复杂度均为O(1)。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

6-7 Deque(25 point(s))

A "deque" is a data structure consisting of a list of items, on which the following operations are possible:

  • Push(X,D): Insert item X on the front end of deque D.
  • Pop(D): Remove the front item from deque D and return it.
  • Inject(X,D): Insert item X on the rear end of deque D.
  • Eject(D): Remove the rear item from deque D and return it.Write routines to support the deque that take O(1) time per operation.

Format of functions:

Deque CreateDeque();
int Push( ElementType X, Deque D );
ElementType Pop( Deque D );
int Inject( ElementType X, Deque D );
ElementType Eject( Deque D );

where Deque is defined as the following:

typedef struct Node *PtrToNode;
struct Node {
    ElementType Element;
    PtrToNode Next, Last;
};
typedef struct DequeRecord *Deque;
struct DequeRecord {
    PtrToNode Front, Rear;
};

Here the deque is implemented by a doubly linked list with a header. Front and Rear point to the two ends of the deque respectively. Front always points to the header. The deque is empty when Front and Rear both point to the same dummy header.Note: Push and Inject are supposed to return 1 if the operations can be done successfully, or 0 if fail. If the deque is empty, Pop and Eject must return ERROR which is defined by the judge program.

Sample program of judge:

#include <stdio.h>
#include <stdlib.h>

#define ElementType int
#define ERROR 1e5
typedef enum { push, pop, inject, eject, end } Operation;

typedef struct Node *PtrToNode;
struct Node {
    ElementType Element;
    PtrToNode Next, Last;
};
typedef struct DequeRecord *Deque;
struct DequeRecord {
    PtrToNode Front, Rear;
};
Deque CreateDeque();
int Push( ElementType X, Deque D );
ElementType Pop( Deque D );
int Inject( ElementType X, Deque D );
ElementType Eject( Deque D );

Operation GetOp();          /* details omitted */
void PrintDeque( Deque D ); /* details omitted */

int main()
{
    ElementType X;
    Deque D;
    int done = 0;

    D = CreateDeque();
    while (!done) {
        switch(GetOp()) {
        case push: 
            scanf("%d", &X);
            if (!Push(X, D)) printf("Memory is Full!\n");
            break;
        case pop:
            X = Pop(D);
            if ( X==ERROR ) printf("Deque is Empty!\n");
            break;
        case inject: 
            scanf("%d", &X);
            if (!Inject(X, D)) printf("Memory is Full!\n");
            break;
        case eject:
            X = Eject(D);
            if ( X==ERROR ) printf("Deque is Empty!\n");
            break;
        case end:
            PrintDeque(D);
            done = 1;
            break;
        }
    }
    return 0;
}

/* Your function will be put here */

Sample Input:

Pop
Inject 1
Pop
Eject
Push 1
Push 2
Eject
Inject 3
End

Sample Output:

Deque is Empty!
Deque is Empty!
Inside Deque: 2 3
这个题只要能明白双向队列的功能原理实现就好说了
//模型:front不动,每次插入的时候是在front和它下一个节点之间插入,而rear每次都是移动的,每次都是插到后面,并且rear每次都要
Deque CreateDeque(){//移动指向最后一个上。
    Deque p;
    p = (Deque)malloc(sizeof(struct DequeRecord));//创建头尾指针
    p->Front = (PtrToNode)malloc(sizeof(struct Node));//在头上先创建一个虚的节点
    p->Front->Last = NULL;//前一个为null说明队列左边没东西
    p->Rear = p->Front;//左右相同指向同一个
    p->Rear->Next = NULL;//后一个为null说明右边没有东西
    return p;
}
int Push( ElementType X, Deque D ){//每次在front和下一个节点之间插入
    struct Node* temp;
    temp = (struct Node*)malloc(sizeof(struct Node));
    if(!temp)return 0;//内存满了,申请失败
    temp->Element = X;//赋值
    if(D->Front==D->Rear){//如果是个空双端队列
        D->Front->Next = temp;//插入第一个点
        temp->Last = D->Front;//回指向front
        D->Rear = temp;//rear指针移动指向第一个新插入的,代表从左边进入的第一个最靠近右边
        temp->Next = NULL;//第一次在左边进入的第一个点下一个将不再指向任何东西,因为每次
        return 1;         //都在前一个插入,第一个点会越来越远
    }
    //一般情况
    temp->Next = D->Front->Next;//新节点下一个指向原来front指向的下一个
    temp->Last = D->Front;//新节点的前一个指向fronr
    D->Front->Next->Last = temp;//front原来所值元素的前一个指向新的节点
    D->Front->Next = temp;//front的下一个指向新节点
    return 1;
}
ElementType Pop( Deque D ){
    if(D->Front==D->Rear)
        return ERROR;//如果空队列返回错误
    int temp = D->Front->Next->Element;//保存pop出的值
    struct Node* t = D->Front->Next;//保存它是为了最后把它内存释放掉
    if(D->Front->Next==D->Rear){//队列中只有一个元素的时候
        D->Rear = D->Front;//删除后rear前移使得rear和front相等
        D->Rear->Next = NULL;//虚节点指向空
        free(t);
        return temp;
    }
    //一般情况
    D->Front->Next->Next->Last = D->Front;//我们要删除front的前一个所以删除之后front前一个的前一个的Last应该指回Front
    D->Front->Next = D->Front->Next->Next;//同理front下一个应该是原来没删前下一个的下一个
    free(t);
    return temp;
}
int Inject( ElementType X, Deque D ){//从右边插入就直接插到后面,然后rear后移
    struct Node* temp = (struct Node*)malloc(sizeof(struct Node));
    if(!temp)return 0;
    temp->Element = X;
    if(D->Front==D->Rear){//空双端队列
        D->Front->Next = temp;
        temp->Last = D->Front;
        D->Rear = temp;
        return 1;//和push的一样
    }
    //一般情况
    D->Rear->Next = temp;//rear下一个等于新节点
    temp->Last = D->Rear;//新节点前一个等于现在rear指的点
    temp->Next = NULL;//temp的下一个指向空
    D->Rear = temp;//rear右移到当前点
    return 1;
}
ElementType Eject( Deque D ){
    if(D->Front==D->Rear){//空队列返回错误
        return ERROR;
    }
    int temp = D->Rear->Element;//保存值
    struct Node* t = D->Rear;
    D->Rear = D->Rear->Last;//删掉节点rear指回前一个节点
    D->Rear->Next = NULL;//现在节点为最后一个,所以指向空
    free(t);
    return temp;

}


 
解释一下class Agent: def __init__(self): self.n_games = 0 self.epsilon = 0 # randomness self.gamma = 0 # discount rate self.memory = deque(maxlen=MAX_MEMORY) # popleft() self.model = Linear_QNet(11, 256, 3) self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) def get_state(self, snakegame): head = snakegame.snake[0] point_l = Point(head.x - 20, head.y) point_r = Point(head.x + 20, head.y) point_u = Point(head.x, head.y - 20) point_d = Point(head.x, head.y + 20) dir_l = snakegame.direction == Direction.LEFT dir_r = snakegame.direction == Direction.RIGHT dir_u = snakegame.direction == Direction.UP dir_d = snakegame.direction == Direction.DOWN state = [ # Danger straight (dir_r and snakegame.is_collision(point_r)) or (dir_l and snakegame.is_collision(point_l)) or (dir_u and snakegame.is_collision(point_u)) or (dir_d and snakegame.is_collision(point_d)), # Danger right (dir_u and snakegame.is_collision(point_u)) or (dir_d and snakegame.is_collision(point_d)) or (dir_l and snakegame.is_collision(point_l)) or (dir_r and snakegame.is_collision(point_r)), # Danger left (dir_d and snakegame.is_collision(point_d)) or (dir_u and snakegame.is_collision(point_u)) or (dir_r and snakegame.is_collision(point_r)) or (dir_l and snakegame.is_collision(point_l)), # Move direction dir_l, dir_r, dir_u, dir_d, #Food Location snakegame.food.x < snakegame.head.x, # food left snakegame.food.x > snakegame.head.x, # food right snakegame.food.y < snakegame.head.y, # food up snakegame.food.y > snakegame.head.y, # food down ] return np.array(state, dtype=int) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) #popleft if MAX_MEMORY is reached def train_long_memory(self): if len(self.memory) > BATCH_SIZE: mini_sample = random.sample(self.memory, BATCH_SIZE) #List of tuples else: mini_sample = self.memory states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) # for state, action, reward, next_state, done in mini_sample: # self.trainer.train_step(state, action, reward, next_state, done) def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) def get_action(self, state): # random moves: tradeoff eploration / exploitation self.epsilon = 80 - self.n_games final_move = [0, 0, 0] if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) final_move[move] = 1 else: state0 = torch.tensor(state, dtype=torch.float) prediction = self.model(state0) move = torch.argmax(prediction).item() final_move[move] = 1 return final_move
03-20
<think>好的,用户让我解释一下这个Agent类的代码。首先,我需要仔细阅读代码,理解每个部分的功能。用户之前问过关于PyTorch中unsqueeze的问题,可能对深度学习和强化学习有一定了解,所以这次可能在实现一个强化学习的蛇游戏AI。 首先,这个Agent类看起来是用于强化学习中的Q学习或者深度Q网络(DQN)。类中有初始化方法,定义了各种属性,比如n_games、epsilon、gamma、memory、model和trainer。需要逐一解释这些属性的作用。 接下来是get_state方法,它接收snakegame对象,返回当前的状态。状态包括蛇头周围的危险区域、移动方向和食物位置。需要详细说明每个状态变量的含义,比如危险直行、左右危险等,以及如何检测碰撞。 然后,remember方法将状态、动作、奖励等存入记忆队列,这里用了deque,可能有经验回放的机制。train_long_memory和train_short_memory分别处理批量训练和单步训练,可能对应DQN中的经验回放和实时更新。 get_action方法实现epsilon-greedy策略,平衡探索和利用。这里epsilon随着n_games增加而减少,随机动作的概率降低,模型预测的动作概率增加。需要解释为什么这样设计,以及动作选择的过程。 需要检查是否有潜在问题,比如epsilon的计算是否合理(80 - n_games可能导致负数),记忆队列的大小是否合适,以及模型结构是否正确。用户可能想知道这个Agent如何工作,以及各部分如何协同实现强化学习。 最后,要组织解释结构,先概述整体,再分方法详细说明,指出关键点和可能的应用场景,确保用户理解代码的逻辑和实现细节。同时,注意用户可能对DQN和强化学习的术语不熟悉,需要用通俗的语言解释。</think>以下是针对蛇类游戏智能体的强化学习代码的逐层解析: --- ### 一、整体结构 这个 `Agent` 类实现了**基于深度Q学习(DQN)的蛇类游戏AI**,核心功能包含: - 状态感知(`get_state`) - 经验记忆(`remember`) - 动作决策(`get_action`) - 神经网络训练(`train_xxx`) --- ### 二、核心组件详解 #### 1. 初始化方法 `__init__` ```python def __init__(self): self.n_games = 0 # 游戏回合计数器 self.epsilon = 0 # 探索率(ε-greedy策略) self.gamma = 0 # 未来奖励折扣率 self.memory = deque(maxlen=MAX_MEMORY) # 经验回放缓存 self.model = Linear_QNet(11, 256, 3) # Q网络:输入11维,隐藏层256,输出3动作 self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) # 训练器 ``` - **关键参数**: - `MAX_MEMORY`:经验回放缓存容量 - `LR`:学习率(learning rate) --- #### 2. 状态感知 `get_state` ```python def get_state(self, snakegame): # 计算头部四个方向的检测点 head = snakegame.snake[0] point_l = Point(head.x - 20, head.y) # 左 point_r = Point(head.x + 20, head.y) # 右 point_u = Point(head.x, head.y - 20) # 上 point_d = Point(head.x, head.y + 20) # 下 # 获取当前移动方向 dir_l = snakegame.direction == Direction.LEFT dir_r = snakegame.direction == Direction.RIGHT dir_u = snakegame.direction == Direction.UP dir_d = snakegame.direction == Direction.DOWN state = [ # 正前方危险检测 (根据当前方向判断碰撞) (dir_r and snakegame.is_collision(point_r)) or ..., # 右侧危险检测 (方向与检测点的逻辑组合) (dir_u and snakegame.is_collision(point_u)) or ..., # 左侧危险检测 (dir_d and snakegame.is_collision(point_d)) or ..., # 当前移动方向 (One-Hot编码) dir_l, dir_r, dir_u, dir_d, # 食物方位 (相对头部坐标判断) snakegame.food.x < snakegame.head.x, # 食物在左 snakegame.food.x > snakegame.head.x, # 食物在右 snakegame.food.y < snakegame.head.y, # 食物在上 snakegame.food.y > snakegame.head.y # 食物在下 ] return np.array(state, dtype=int) ``` - **状态维度**:11个布尔值 - 危险检测(3项) + 移动方向(4项) + 食物方位(4项) - **碰撞检测逻辑**: - 例如当向右移动时,检查右侧点是否碰撞(`dir_r and is_collision(point_r)`) --- #### 3. 经验记忆 `remember` ```python def remember(self, state, action, reward, next_state, done): self.memory.append( (state, action, reward, next_state, done) ) ``` - **功能**:存储经验元组 $(s_t, a_t, r_t, s_{t+1}, \text{done})$ - **技术点**:使用 `deque` 实现先进先出队列,自动移除旧经验 --- #### 4. 训练方法 ##### 4.1 短期记忆训练 ```python def train_short_memory(self, state, action, reward, next_state, done): self.trainer.train_step(state, action, reward, next_state, done) ``` - **用途**:实时单步训练,立即更新网络 ##### 4.2 长期记忆训练 ```python def train_long_memory(self): # 从经验池中随机采样 mini_sample = random.sample(self.memory, BATCH_SIZE) if len(self.memory) > BATCH_SIZE else self.memory # 解包并批量训练 states, actions, rewards, next_states, dones = zip(*mini_sample) self.trainer.train_step(states, actions, rewards, next_states, dones) ``` - **关键技术**:经验回放(Experience Replay) - **优势**:打破数据相关性,提高样本利用率 --- #### 5. 动作决策 `get_action` ```python def get_action(self, state): self.epsilon = 80 - self.n_games # 探索率衰减 final_move = [0, 0, 0] # ε-greedy策略 if random.randint(0, 200) < self.epsilon: move = random.randint(0, 2) # 随机探索 else: state_tensor = torch.tensor(state, dtype=torch.float) prediction = self.model(state_tensor) move = torch.argmax(prediction).item() # 选择Q值最大的动作 final_move[move] = 1 return final_move ``` - **策略分析**: - 初期高探索率(`epsilon`随游戏次数递减) - 动作编码:例如 `[1,0,0]` 表示左转 - **注意点**:当`n_games > 80`时,`epsilon`会变为负数,需增加保护逻辑 --- ### 三、关键设计亮点 1. **状态设计**:通过相对位置检测实现免视觉输入 2. **双重训练机制**:短期记忆(在线学习) + 长期记忆(经验回放) 3. **探索策略**:自适应ε衰减平衡探索与利用 4. **网络结构**:简单线性网络适合低维状态空间 --- ### 四、潜在改进方向 1. **探索率保护**:增加 `epsilon = max(epsilon, 5)` 维持最小探索 2. **状态增强**:添加蛇身长度、移动步数等特征 3. **网络升级**:改用DQN变体(Double DQN, Dueling DQN) 4. **奖励设计**:细化奖励信号(如趋向食物时的渐进奖励) 该实现展现了强化学习在游戏AI中的典型应用方式,通过状态-动作-奖励的循环交互实现自主决策能力的进化。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值