def update():
while True:
#fresh env
env.render()
#RL choose action based on observation
action = RL.choose_action(str(observation))
#Rl take action and get next observation and reward
observation_,reward,done = env.step(action)
#RL learn from this transition
Rl.learn(str(observation),action,reward,str(observation_))
#swap observation
observation = observation_
#break while loop when end of this episode
if done:
break
#end of game
print("game over")
env.destroy()
强化学习的代码格式
最新推荐文章于 2025-06-17 23:55:43 发布