from tensorflow.keras import optimizers, layers, models, losses
from collections import deque
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import random
import gym
import copy
# 配置GPU内存
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
def build_model():
actor_input = [layers.Input(shape=[8])]
A_H_1 = layers.Dense(units=1024, activation='relu')(actor_input[0])
A_H_2 = layers.Dense(units=1024, activation='relu')(A_H_1)
A_out = layers.Dense(units=2, activation='tanh')(A_H_2)
actor_model = models.Model(inputs=actor_input, outputs=A_out)
critic_input = [layers.Input(shape=[8]), layers.Input(shape=[2])]
C_concat = layers.concatenate(critic_input)
C_H_1 = layers.Dense(units=1024, activation='relu')(C_concat)
C_H_2 = layers.Dense(units=1024, activation='relu')(C_H_1)
C_out = layers.Dense(units=1)(C_H_2)
critic_model = models.Model(inputs
【强化学习】tensorflow2.0构造DDPG训练LunarLanderContinuous-v2
于 2022-02-08 11:49:33 首次发布
本文介绍了一种基于深度确定性政策梯度(DDPG)的强化学习算法,用于LunarLander连续环境中的控制问题。通过构建Actor-Critic模型,实现智能体的记忆存储、行为选择和模型训练,展示了如何使用这种算法进行环境互动与学习优化。

最低0.47元/天 解锁文章

被折叠的 条评论
为什么被折叠?



