REINFORCE 算法实现
REINFORCE算法是策略梯度算法最原始的实现算法,这里采用tensorflow2.0进行实现
import tensorflow as tf
import gym
from matplotlib import pyplot as plt
import numpy as np
def PGReinforce_run(PGReinforce_agent=None, episode=1000):
PGReinforce_agent = PGReinforce_agent.PGReinforce(n_actions=2, n_features=4)
PGReinforce_agent.net_init()
score = []
env = gym.make('CartPole-v1')
bias = 5
for i_episode in range(episode):
# 初始化,
observation = env.reset()
done = False
t = 0
while not done:
env.render()
action = PGReinforce_agent.choose_action(observation)
PGReinforce_agent.traj_store(observation, action)
observation_, reward, done, info = env.step(action)
x, x_dot, theta, theta_dot = observation
r2 = - abs(theta)*5
# r1 = - abs(x)
PGReinforce_agent.r_calculate(reward + r2)
observation = observation_
t += 1
# PGReinforce_agent.loss_calculate()
print("Episode finished after {} time steps".format(t + 1))
score.append(t + 1)
PGReinforce_agent.learn(5)
if (i_episode + 1) % 100 == 0:
plt.plot(score) # 绘制波形
# plt.draw()
plt.savefig(f"RL_algorithm_package/img/pic_