RL Policy Evaluation Python实现
根据UCL课程Lecture 3的4x4 grid求policy evaluation的方法,可以写成如下实现:
states = [i for i in range(16)]
values = [0 for _ in range(16)]
actions = ["n", "e", "s", "w"]
ds_actions = {
"n":-1, "e":1, "s":4, "w": -1}
gamma = 1.00
def nextState(s,a):
next_state = s
if (s%4 == 0 and a == "w") or (s<4 and a == "n") or ((s+1)%4 == 0 and a == "e") or (s > 11 and a == "s"):
pass
else:
ds = ds_actions[a]
next_state = s + ds
return next_state
def rewardOf(s):
return 0 if s in [0,15] else -1
def isTerminateState(s):
return s in [