import numpy as np
import matplotlib.pyplot as plt
grid_size = 5
posA = [0,1]
primeA = [4,1]
posB = [0,3]
primeB = [2,3]
discount = 0.9
actions = ['L', 'U', 'R', 'D']
actionProb = [[dict({'L':0.25, 'U':0.25, 'R':0.25, 'D':0.25})] * grid_size] * grid_size
#environment
NextState = []
actionReward = []
for i in range(grid_size):
NextState.append([])
actionReward.append([])
for j in range(grid_size):
next = dict()
reward = dict()
if i == 0:
next['U'] = [i, j]
reward['U'] = -1.0
else:
next['U'] = [i - 1, j]
reward['U'] = 0.0
if i == grid_size - 1:
next['D'] = [i, j]
reward['D'] = -1.0
else:
next['D&#
强化学习圣经-GridWorld实现
最新推荐文章于 2024-04-10 13:20:06 发布