Deep Q Network是DeepMind最早(2013年)提出来的,是深度强化学习方法。
最开始AI什么也不会,通过给它提供游戏界面像素和分数,慢慢把它训练成游戏高手。
Github上有不少DQN实现,在本帖中,我使用TensorFlow训练一个简单的游戏AI。
- 使用pygame写一个简单的小游戏
- 使用强化学习训练游戏AI
pygame小游戏
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
import
pygame
from
pygame
.
locals
import
*
import
sys
BLACK
=
(
0
,
0
,
0
)
WHITE
=
(
255
,
255
,
255
)
SCREEN_SIZE
=
[
320
,
400
]
BAR_SIZE
=
[
20
,
5
]
BALL_SIZE
=
[
15
,
15
]
class
Game
(
object
)
:
def
__init__
(
self
)
:
pygame
.
init
(
)
self
.
clock
=
pygame
.
time
.
Clock
(
)
self
.
screen
=
pygame
.
display
.
set_mode
(
SCREEN_SIZE
)
pygame
.
display
.
set_caption
(
'Simple Game'
)
self
.
ball_pos_x
=
SCREEN_SIZE
[
0
]
/
/
2
-
BALL_SIZE
[
0
]
/
2
self
.
ball_pos_y
=
SCREEN_SIZE
[
1
]
/
/
2
-
BALL_SIZE
[
1
]
/
2
# ball移动方向
self
.
ball_dir_x
=
-
1
# -1 = left 1 = right
self
.
ball_dir_y
=
-
1
# -1 = up 1 = down
self
.
ball_pos
=
pygame
.
Rect
(
self
.
ball_pos_x
,
self
.
ball_pos_y
,
BALL_SIZE
[
0
]
,
BALL_SIZE
[
1
]
)
self
.
score
=
0
self
.
bar_pos_x
=
SCREEN_SIZE
[
0
]
/
/
2
-
BAR_SIZE
[
0
]
/
/
2
self
.
bar_pos
=
pygame
.
Rect
(
self
.
bar_pos_x
,
SCREEN_SIZE
[
1
]
-
BAR_SIZE
[
1
]
,
BAR_SIZE
[
0
]
,
BAR_SIZE
[
1
]
)
def
bar_move_left
(
self
)
:
self
.
bar_pos_x
=
self
.
bar_pos_x
-
2
def
bar_move_right
(
self
)
:
self
.
bar_pos_x
=
self
.
bar_pos_x
+
2
def
run
(
self
)
:
pygame
.
mouse
.
set_visible
(
0
)
# make cursor invisible
bar_move_left
=
False
bar_move_right
=
False
while
True
:
for
event
in
pygame
.
event
.
get
(
)
:
if
event
.
type
==
QUIT
:
pygame
.
quit
(
)
sys
.
exit
(
)
elif
event
.
type
==
pygame
.
MOUSEBUTTONDOWN
and
event
.
button
==
1
:
# 鼠标左键按下(左移)
bar_move_left
=
True
elif
event
.
type
==
pygame
.
MOUSEBUTTONUP
and
event
.
button
==
1
:
# 鼠标左键释放
bar_move_left
=
False
elif
event
.
type
==
pygame
.
MOUSEBUTTONDOWN
and
event
.
button
==
3
:
#右键
bar_move_right
=
True
elif
event
.
type
==
pygame
.
MOUSEBUTTONUP
and
event
.
button
==
3
:
bar_move_right
=
False
if
bar_move_left
==
True
and
bar_move_right
==
False
:
self
.
bar_move_left
(
)
if
bar_move_left
==
False
and
bar_move_right
==
True
:
self
.
bar_move_right
(
)
self
.
screen
.
fill
(
BLACK
)
self
.
bar_pos
.
left
=
self
.
bar_pos_x
pygame
.
draw
.
rect
(
self
.
screen
,
WHITE
,
self
.
bar_pos
)
self
.
ball_pos
.
left
+=
self
.
ball_dir_x
*
2
self
.
ball_pos
.
bottom
+=
self
.
ball_dir_y
*
3
pygame
.
draw
.
rect
(
self
.
screen
,
WHITE
,
self
.
ball_pos
)
if
self
.
ball_pos
.
top
<=
0
or
self
.
ball_pos
.
bottom
>=
(
SCREEN_SIZE
[
1
]
-
BAR_SIZE
[
1
]
+
1
)
:
self
.
ball_dir_y
=
self
.
ball_dir_y
*
-
1
if
self
.
ball_pos
.
left
<=
0
or
self
.
ball_pos
.
right
>=
(
SCREEN_SIZE
[
0
]
)
:
self
.
ball_dir_x
=
self
.
ball_dir_x
*
-
1
if
self
.
bar_pos
.
top
<=
self
.
ball_pos
.
bottom
and
(
self
.
bar_pos
.
left
<
self
.
ball_pos
.
right
and
self
.
bar_pos
.
right
>
self
.
ball_pos
.
left
)
:
self
.
score
+=
1
print
(
"Score: "
,
self
.
score
,
end
=
'\r'
)
elif
self
.
bar_pos
.
top
<=
self
.
ball_pos
.
bottom
and
(
self
.
bar_pos
.
left
>
self
.
ball_pos
.
right
or
self
.
bar_pos
.
right
<
self
.
ball_pos
.
left
)
:
print
(
"Game Over: "
,
self
.
score
)
return
self
.
score
pygame
.
display
.
update
(
)
self
.
clock
.
tick
(
60
)
game
=
Game
(
)
game
.
run
(
)
|
自制的垃圾游戏。
操作:按住鼠标左键左移棒子,按住鼠标右键右移棒子。每次接住小方块得一分。

ps. 其实我想做一个俄罗斯方块,呵呵呵呵,留着以后再做。
基于强化学习的AI(TensorFlow)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
import
pygame
import
random
from
pygame
.
locals
import
*
import
numpy
as
np
from
collections
import
deque
import
tensorflow
as
tf
# http://blog.topspeedsnail.com/archives/10116
import
cv2
# http://blog.topspeedsnail.com/archives/4755
BLACK
=
(
0
,
0
,
0
)
WHITE
=
(
255
,
255
,
255
)
SCREEN_SIZE
=
[
320
,
400
]
BAR_SIZE
=
[
50
,
5
]
BALL_SIZE
=
[
15
,
15
]
# 神经网络的输出
MOVE_STAY
=
[
1
,
0
,
0
]
MOVE_LEFT
=
[
0
,
1
,
0
]
MOVE_RIGHT
=
[
0
,
0
,
1
]
class
Game
(
object
)
:
def
__init__
(
self
)
:
pygame
.
init
(
)
self
.
clock
=
pygame
.
time
.
Clock
(
)
self
.
screen
=
pygame
.
display
.
set_mode
(
SCREEN_SIZE
)
pygame
.
display
.
set_caption
(
'Simple Game'
)
self
.
ball_pos_x
=
SCREEN_SIZE
[
0
]
/
/
2
-
BALL_SIZE
[
0
]
/
2
self
.
ball_pos_y
=
SCREEN_SIZE
[
1
]
/
/
2
-
BALL_SIZE
[
1
]
/
2
self
.
ball_dir_x
=
-
1
# -1 = left 1 = right
self
.
ball_dir_y
=
-
1
# -1 = up 1 = down
self
.
ball_pos
=
pygame
.
Rect
(
self
.
ball_pos_x
,
self
.
ball_pos_y
,
BALL_SIZE
[
0
]
,
BALL_SIZE
[
1
]
)
self
.
bar_pos_x
=
SCREEN_SIZE
[
0
]
/
/
2
-
BAR_SIZE
[
0
]
/
/
2
self
.
bar_pos
=
pygame
.
Rect
(
self
.
bar_pos_x
,
SCREEN_SIZE
[
1
]
-
BAR_SIZE
[
1
]
,
BAR_SIZE
[
0
]
,
BAR_SIZE
[
1
]
)
# action是MOVE_STAY、MOVE_LEFT、MOVE_RIGHT
# ai控制棒子左右移动;返回游戏界面像素数和对应的奖励。(像素->奖励->强化棒子往奖励高的方向移动)
def
step
(
self
,
action
)
:
if
action
==
MOVE_LEFT
:
self
.
bar_pos_x
=
self
.
bar_pos_x
-
2
elif
action
==
MOVE_RIGHT
:
self
.
bar_pos_x
=
self
.
bar_pos_x
+
2
else
:
pass
if
self
.
bar_pos_x
<
0
:
self
.
bar_pos_x
=
0
if
self
.
bar_pos_x
>
SCREEN_SIZE
[
0
]
-
BAR_SIZE
[
0
]
:
self
.
bar_pos_x
=
SCREEN_SIZE
[
0
]
-
BAR_SIZE
[
0
]
self
.
screen
.
fill
(
BLACK
)
self
.
bar_pos
.
left
=
self
.
bar_pos_x
pygame
.
draw
.
rect
(
self
.
screen
,
WHITE
,
self
.
bar_pos
)
self
.
ball_pos
.
left
+=
self
.
ball_dir_x
*
2
self
.
ball_pos
.
bottom
+=
self
.
ball_dir_y
*
3
pygame
.
draw
.
rect
(
self
.
screen
,
WHITE
,
self
.
ball_pos
)
if
self
.
ball_pos
.
top
<=
0
or
self
.
ball_pos
.
bottom
>=
(
SCREEN_SIZE
[
1
]
-
BAR_SIZE
[
1
]
+
1
)
:
self
.
ball_dir_y
=
self
.
ball_dir_y
*
-
1
if
self
.
ball_pos
.
left
<=
0
or
self
.
ball_pos
.
right
>=
(
SCREEN_SIZE
[
0
]
)
:
self
.
ball_dir_x
=
self
.
ball_dir_x
*
-
1
reward
=
0
if
self
.
bar_pos
.
top
<=
self
.
ball_pos
.
bottom
and
(
self
.
bar_pos
.
left
<
self
.
ball_pos
.
right
and
self
.
bar_pos
.
right
>
self
.
ball_pos
.
left
)
:
reward
=
1
# 击中奖励
elif
self
.
bar_pos
.
top
<=
self
.
ball_pos
.
bottom
and
(
self
.
bar_pos
.
left
>
self
.
ball_pos
.
right
or
self
.
bar_pos
.
right
<
self
.
ball_pos
.
left
)
:
reward
=
-
1
# 没击中惩罚
# 获得游戏界面像素
screen_image
=
pygame
.
surfarray
.
array3d
(
pygame
.
display
.
get_surface
(
)
)
pygame
.
display
.
update
(
)
# 返回游戏界面像素和对应的奖励
return
reward
,
screen_image
# learning_rate
LEARNING_RATE
=
0.99
# 更新梯度
INITIAL_EPSILON
=
1.0
FINAL_EPSILON
=
0.05
# 测试观测次数
EXPLORE
=
500000
OBSERVE
=
50000
# 存储过往经验大小
REPLAY_MEMORY
=
500000
BATCH
=
100
output
=
3
# 输出层神经元数。代表3种操作-MOVE_STAY:[1, 0, 0] MOVE_LEFT:[0, 1, 0] MOVE_RIGHT:[0, 0, 1]
input_image
=
tf
.
placeholder
(
"float"
,
[
None
,
80
,
100
,
4
]
)
# 游戏像素
action
=
tf
.
placeholder
(
"float"
,
[
None
,
output
]
)
# 操作
# 定义CNN-卷积神经网络 参考:http://blog.topspeedsnail.com/archives/10451
def
convolutional_neural_network
(
input_image
)
:
weights
=
{
'w_conv1'
:
tf
.
Variable
(
tf
.
zeros
(
[
8
,
8
,
4
,
32
]
)
)
,
'w_conv2'
:
tf
.
Variable
(
tf
.
zeros
(
[
4
,
4
,
32
,
64
]
)
)
,
'w_conv3'
:
tf
.
Variable
(
tf
.
zeros
(
[
3
,
3
,
64
,
64
]
)
)
,
'w_fc4'
:
tf
.
Variable
(
tf
.
zeros
(
[
3456
,
784
]
)
)
,
'w_out'
:
tf
.
Variable
(
tf
.
zeros
(
[
784
,
output
]
)
)
}
biases
=
{
'b_conv1'
:
tf
.
Variable
(
tf
.
zeros
(
[
32
]
)
)
,
'b_conv2'
:
tf
.
Variable
(
tf
.
zeros
(
[
64
]
)
)
,
'b_conv3'
:
tf
.
Variable
(
tf
.
zeros
(
[
64
]
)
)
,
'b_fc4'
:
tf
.
Variable
(
tf
.
zeros
(
[
784
]
)
)
,
'b_out'
:
tf
.
Variable
(
tf
.
zeros
(
[
output
]
)
)
}
conv1
=
tf
.
nn
.
relu
(
tf
.
nn
.
conv2d
(
input_image
,
weights
[
'w_conv1'
]
,
strides
=
[
1
,
4
,
4
,
1
]
,
padding
=
"VALID"
)
+
biases
[
'b_conv1'
]
)
conv2
=
tf
.
nn
.
relu
(
tf
.
nn
.
conv2d
(
conv1
,
weights
[
'w_conv2'
]
,
strides
=
[
1
,
2
,
2
,
1
]
,
padding
=
"VALID"
)
+
biases
[
'b_conv2'
]
)
conv3
=
tf
.
nn
.
relu
(
tf
.
nn
.
conv2d
(
conv2
,
weights
[
'w_conv3'
]
,
strides
=
[
1
,
1
,
1
,
1
]
,
padding
=
"VALID"
)
+
biases
[
'b_conv3'
]
)
conv3_flat
=
tf
.
reshape
(
conv3
,
[
-
1
,
3456
]
)
fc4
=
tf
.
nn
.
relu
(
tf
.
matmul
(
conv3_flat
,
weights
[
'w_fc4'
]
)
+
biases
[
'b_fc4'
]
)
output_layer
=
tf
.
matmul
(
fc4
,
weights
[
'w_out'
]
)
+
biases
[
'b_out'
]
return
output_layer
# 深度强化学习入门: https://www.nervanasys.com/demystifying-deep-reinforcement-learning/
# 训练神经网络
def
train_neural_network
(
input_image
)
:
predict_action
=
convolutional_neural_network
(
input_image
)
argmax
=
tf
.
placeholder
(
"float"
,
[
None
,
output
]
)
gt
=
tf
.
placeholder
(
"float"
,
[
None
]
)
action
=
tf
.
reduce_sum
(
tf
.
mul
(
predict_action
,
argmax
)
,
reduction_indices
=
1
)
cost
=
tf
.
reduce_mean
(
tf
.
square
(
action
-
gt
)
)
optimizer
=
tf
.
train
.
AdamOptimizer
(
1e
-
6
)
.
minimize
(
cost
)
game
=
Game
(
)
D
=
deque
(
)
_
,
image
=
game
.
step
(
MOVE_STAY
)
# 转换为灰度值
image
=
cv2
.
cvtColor
(
cv2
.
resize
(
image
,
(
100
,
80
)
)
,
cv2
.
COLOR_BGR2GRAY
)
# 转换为二值
ret
,
image
=
cv2
.
threshold
(
image
,
1
,
255
,
cv2
.
THRESH_BINARY
)
input_image_data
=
np
.
stack
(
(
image
,
image
,
image
,
image
)
,
axis
=
2
)
with
tf
.
Session
(
)
as
sess
:
sess
.
run
(
tf
.
initialize_all_variables
(
)
)
saver
=
tf
.
train
.
Saver
(
)
n
=
0
epsilon
=
INITIAL_EPSILON
while
True
:
action_t
=
predict_action
.
eval
(
feed_dict
=
{
input_image
:
[
input_image_data
]
}
)
[
0
]
argmax_t
=
np
.
zeros
(
[
output
]
,
dtype
=
np
.
int
)
if
(
random
.
random
(
)
<=
INITIAL_EPSILON
)
:
maxIndex
=
random
.
randrange
(
output
)
else
:
maxIndex
=
np
.
argmax
(
action_t
)
argmax_t
[
maxIndex
]
=
1
if
epsilon
>
FINAL_EPSILON
:
epsilon
-=
(
INITIAL_EPSILON
-
FINAL_EPSILON
)
/
EXPLORE
#for event in pygame.event.get(): macOS需要事件循环,否则白屏
# if event.type == QUIT:
# pygame.quit()
# sys.exit()
reward
,
image
=
game
.
step
(
list
(
argmax_t
)
)
image
=
cv2
.
cvtColor
(
cv2
.
resize
(
image
,
(
100
,
80
)
)
,
cv2
.
COLOR_BGR2GRAY
)
ret
,
image
=
cv2
.
threshold
(
image
,
1
,
255
,
cv2
.
THRESH_BINARY
)
image
=
np
.
reshape
(
image
,
(
80
,
100
,
1
)
)
input_image_data1
=
np
.
append
(
image
,
input_image_data
[
:
,
:
,
0
:
3
]
,
axis
=
2
)
D
.
append
(
(
input_image_data
,
argmax_t
,
reward
,
input_image_data1
)
)
if
len
(
D
)
>
REPLAY_MEMORY
:
D
.
popleft
(
)
if
n
>
OBSERVE
:
minibatch
=
random
.
sample
(
D
,
BATCH
)
input_image_data_batch
=
[
d
[
0
]
for
d
in
minibatch
]
argmax_batch
=
[
d
[
1
]
for
d
in
minibatch
]
reward_batch
=
[
d
[
2
]
for
d
in
minibatch
]
input_image_data1_batch
=
[
d
[
3
]
for
d
in
minibatch
]
gt_batch
=
[
]
out_batch
=
predict_action
.
eval
(
feed_dict
=
{
input_image
:
input_image_data1_batch
}
)
for
i
in
range
(
0
,
len
(
minibatch
)
)
:
gt_batch
.
append
(
reward_batch
[
i
]
+
LEARNING_RATE
*
np
.
max
(
out_batch
[
i
]
)
)
optimizer
.
run
(
feed_dict
=
{
gt
:
gt_batch
,
argmax
:
argmax_batch
,
input_image
:
input_image_data_batch
}
)
input_image_data
=
input_image
_data1
n
=
n
+
1
if
n
%
10000
==
0
:
saver
.
save
(
sess
,
'game.cpk'
,
global_step
=
n
)
# 保存模型
print
(
n
,
"epsilon:"
,
epsilon
,
" "
,
"action:"
,
maxIndex
,
" "
,
"reward:"
,
reward
)
train_neural_network
(
input_image
)
|
训练中:
如果你使用Linux,你可以使用htop监控内存使用情况。
刚开始,AI傻傻的,只会控制棒子来回瞎晃,通过try-error,它会慢慢掌握这个游戏。等我一觉醒来,这货已经玩的不亦乐乎了。
ps.准备换一个顶级显卡,CPU玩tensorflow太费劲,看来非游戏玩家也有必要买好显卡。
使用训练出来AI玩游戏
这步要做的就是加载使用前面保存的模型。
上面是自己手动实现的强化学习算法,其实有一个特别好的专门为开发测试AI而设计的库openai gym。OpenAI Gym是一个为比较、构建强化学习Ai的一个Python库,它包含很多测试游戏。
参考:https://www.nervanasys.com/openai/
- OpenAI文档:https://gym.openai.com/docs
- OpenAI源代码:https://github.com/openai/gym

安装Gym
1
2
3
4
5
6
7
|
$
git
clone
https
:
/
/
github
.
com
/
openai
/
gym
$
cd
gym
# 安装依赖
#$ brew install cmake boost boost-python sdl2 swig wget # macOS python2
# brew install boost-python --with-python3 # python3
#$ sudo apt-get install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl libboost-all-dev libsdl2-dev swig # Ubuntu
$
pip
install
gym
[
all
]
|