import os
import time
import numpy as np
import gym
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input
from keras.layers.merge import Add, Concatenate
from keras.optimizers import Adam
import keras.backend as K
import random
from collections import deque
def stack_samples(samples):
array = np.array(samples)
s_ts = np.stack(array[:,0]).reshape((array.shape[0],-1))
actions = np.stack(array[:,1]).reshape((array.shape[0],-1))
rewards = np.stack(array[:,2]).reshape((array.shape[0],-1))
s_ts1 = np.stack(array[:,3]).reshape((array.shape[0],-1))
dones = np.stack(array[:,4]).reshape((array.shape[0],-1))
return s_ts, actions, rewards, s_ts1, dones
class Agent(object):
def __init__(self,sess):
self.sess = sess
self.epsilon = 0.9
self.gamma = 0.99
self.epsilon_decay = 0.99995
self.tau = 0.01
self.memory = deque(maxlen=4000)
self.actor_state_input, self.actor_model = self.create_actor_model()
_, self.target_actor_model = self.create_actor_model()
self.actor_critic_grad = tf.placeholder(tf.float32,[None,1])
actor_model_w
keras-ddpg
最新推荐文章于 2023-04-19 10:26:09 发布