首次访问蒙特卡洛策略评估
每次访问蒙特卡洛策略评估
TD
实例:
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File : RandomWork.py
@Time : 2021/12/12 16:57:13
@Author : zhangzhe
@Version : 1.0
@Contact : 841121040@qq.com
@License : (C)Copyright 2017-2018, Liugroup-NLPR-CASIA
@Desc : None
'''
# here put the import lib
# Algorithm: Temporal Difference——Policy Evaluation
# Project :Random Walking
# Author : XD_MaoHai
# Reference: Jabes
# Date : 2021/11/18
# 导入库函数
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# 设置进度条
from tqdm import tqdm
# 初始化A-E的状态价值为0.5,右侧终点值为1,左侧终点值为0
# VALUES = [左侧终点 A B C D E 右侧终点]
VALUES = np.zeros