import random
import time
import numpy as np
import sys
if sys.version_info.major == 2:
import Tkinter as tk
else:
import tkinter as tk
# 下面定义一个球的类,有canvas和color两个对象
class Ball: # 定义一个Ball类的函数
def __init__(self, canvas, color): # 这是Ball类的属性函数,Ball类下的函数都有这些性质
self.canvas = canvas
self.id = canvas.create_oval(10, 10, 25, 25, fill=color) # 返回所绘小球的调用值放入对象self.id
self.canvas.move(self.id, 245, 100) # 移动小球到(245,100)坐标处,
starts = [-3,-2,-1,1,2,3]
random.shuffle(starts)
self.x = starts[0] # 使得小球左右方向运动随机
self.y = -3 # 默认开始的小球向上方运动
self.canvas_height = self.canvas.winfo_height() # 画布高度函数winfo_height()返回值放入canvas_height对象中
self.canvas_width = self.canvas.winfo_width() # winfo_width()返回画布宽度放入canvas_width对象中
#self.hit_bottom = False # 设定hit_bottom初始值为false
def draw(self): # 声明draw函数,
pos = self.canvas.coords(self.id) # 把小球的左上角和右下角的坐标以列表形式(可能元组)放入pos对象中
if pos[1] <= 0: # 如果小球碰到画布上方
self.y = 3 # 则改变移动方向向下方
if pos[0] <= 0: # 如果小球碰到了画布左边,则把X轴速度改成每次向右3个像素
self.x = 3
if pos[2] >= 500: # 如果小球碰到了画布右边,则把速度改成每次向左3个像素
self.x = -3
self.canvas.move(self.id, self.x, self.y) # 移动小球,移动速度为(self.x,self.y),在init中的属性可以直接用
class Paddle: # 定义一个paddle类
def __init__(self, canvas, color): # paddle类的属性函数,默认有两个变量画布和颜色
self.canvas = canvas # 将canvas对象赋给self.canvas
self.id = canvas.create_rectangle(0, 0, 100, 10, fill=color) # 创建球拍,将球拍的调用编号存入self.id
self.canvas.move(self.id, 200, 300) # 将球拍移动到(200,300)处
self.x = 0 #
self.canvas_width = self.canvas.winfo_width() # 将画布的宽度放入canvas_width对象
#self.canvas.bind_all('<KeyPress-Left>', self.turn_left) # 用bind_all()函数绑定键盘左键与tun_left函数
#self.canvas.bind_all('<KeyPress-Right>', self.turn_right) # 绑定键盘右键与turn_right函数
def draw(self): # 声明一个draw函数
# 左右移动球拍的速度为self.x,默认不动
self.canvas.move(self.id, self.x, 0)
class Ball_Game(tk.Tk,object):
def __init__(self):
self.ok=False#训练到一定次数才更新界面,比如当 训练到一定次数是改为True
super(Ball_Game,self).__init__()
self.actions=[0,1]#right left
self.n_actions=len(self.actions)
self.n_features=7#球中心坐标(2),水平速度(1),球坐标(2)+水平垂直速度(2)
self.title('Ball_Game')
#self.geometry('500x400')
self.resizable(0, 0) # 固定框架
self.wm_attributes('-topmost', 1)
self.canvas=tk.Canvas(self,width=500,height=400,bd=0)
print('height',self.canvas.winfo_height())
self.canvas.pack()
self.ball=Ball(self.canvas,'green')
self.paddle=Paddle(self.canvas,'blue')
def coords2xy(self,pos):
print(pos)
x=(pos[0]+pos[2])/2
y=(pos[1]+pos[3])/2
return [x,y]
def reset(self):
if self.ok:
self.update()
time.sleep(0.01)
self.canvas.delete(self.ball.id)
self.ball.id = self.canvas.create_oval(10, 10, 25, 25, fill='green') # 返回所绘小球的调用值放入对象self.id
self.canvas.move(self.ball.id, 245, 100)
starts = [-3,-2,-1,1,2,3]
random.shuffle(starts)
self.ball.x = starts[0] # 使得小球左右方向运动随机
self.ball.y = -3 # 默认开始的小球向上方运动
ball_coords=self.canvas.coords(self.ball.id)
self.canvas.delete(self.paddle.id)
self.paddle.id = self.canvas.create_rectangle(0, 0, 100, 10, fill='blue') # 创建球拍,将球拍的调用编号存入self.id
self.canvas.move(self.paddle.id, 200, 300) # 将球拍移动到(200,300)处
self.paddle.x=0
paddle_coords=self.canvas.coords(self.paddle.id)
return np.hstack((self.coords2xy(paddle_coords),self.paddle.x,self.coords2xy(ball_coords),self.ball.x,self.ball.y))
#def get_reward(self):
def hit_paddle(self, paddle_pos,ball_pos): # 声明函数,以供调用
#paddle_pos = self.canvas.coords(self.paddle.id) # 将球拍的(x1,y1)(x2,y2)的坐标放到paddle_pos中
if ball_pos[2] >= paddle_pos[0] and ball_pos[0] <= paddle_pos[2]: #
if ball_pos[3] >= paddle_pos[1] and ball_pos[3] <= paddle_pos[3]: # 比较小球y轴是否在球拍y轴内
return True # 表示小球碰到了球拍
return False # 表示小球没有碰到球拍
def simulate_step(self,action):#两个Step函数,这个不更新界面,训练的时候用
if action==0 and self.canvas.coords(self.paddle.id)[2]<=495:#right
self.paddle.x=5
elif action==1 and self.canvas.coords(self.paddle.id)[0]>=5:
self.paddle.x=-5
else:
self.paddle.x=0
self.ball.draw()
self.paddle.draw()
ball_coords_ = self.canvas.coords(self.ball.id)
paddle_coords_ = self.canvas.coords(self.paddle.id)
s_=np.hstack((self.coords2xy(paddle_coords_),self.paddle.x,self.coords2xy(ball_coords_),self.ball.x,self.ball.y))
done=False
if self.hit_paddle(paddle_pos=paddle_coords_,ball_pos=ball_coords_):
self.ball.y=-3
done=True
reward=10
elif self.canvas.coords(self.ball.id)[3]>self.canvas.winfo_height():
done=True
reward=-10
reward=self.get_reward(ball_coords_,paddle_coords_)
return s_,reward,done
def get_reward(self,paddle_pos,ball_pos):
paddle_mid_x=(paddle_pos[0]+paddle_pos[2])/2
paddle_mid_y=(paddle_pos[1]+paddle_pos[3])/2
ball_mid_x=(ball_pos[0]+ball_pos[2])/2
ball_mid_y=(ball_pos[1]+ball_pos[3])/2
det_x= np.array(paddle_mid_x)-np.array(ball_mid_x)
det_y=np.array(paddle_mid_y)-np.array(ball_mid_y)
dis=np.sqrt(det_x**2+det_y**2)
return -dis
def step(self,action):
if action==0 and self.canvas.coords(self.paddle.id)[2]<=495:#right
self.paddle.x=5
elif action==1 and self.canvas.coords(self.paddle.id)[0]>=5:
self.paddle.x=-5
else:
self.paddle.x=0
self.ball.draw()
self.paddle.draw()
self.update_idletasks()
self.canvas.winfo_exists()
self.update()
time.sleep(0.01)
ball_coords_ = self.canvas.coords(self.ball.id)
paddle_coords_ = self.canvas.coords(self.paddle.id)
s_=np.hstack((self.coords2xy(paddle_coords_),self.paddle.x,self.coords2xy(ball_coords_),self.ball.x,self.ball.y))
reward=0
done=False
if self.hit_paddle(paddle_pos=paddle_coords_,ball_pos=ball_coords_):
self.ball.y=-3
done=False
reward=10
elif self.canvas.coords(self.ball.id)[3]>self.canvas.winfo_height():
done=True
reward=-10
reward=self.get_reward(ball_coords_,paddle_coords_)
return s_,reward,done
def render(self):
time.sleep(0.01)
self.update()
再贴一个我自己用dqn训练的视频效果百度云
https://pan.baidu.com/s/1ibpB-EwC1OoasKl6tv2dEQ
才开始学,代码有点混乱,嘿嘿