基于价值的简单井字棋,每次选择状态价值最大的状态作为下一状态来实施动作。
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import tkinter as tk
import time
class Agent():
def __init__(self,_index,Alpha,Epsilon,Gamma):
self.index=_index
self.alpha=Alpha
self.epsilon=Epsilon
self.gamma=Gamma
self.States=np.zeros(9).astype(np.int8)
self.value=np.zeros((3,3,3,3,3,3,3,3,3))
def reset(self):
self.States=np.zeros(9).astype(np.int8)
def move(self,state):
Outcome=state.copy()
mask=np.where(Outcome==0)[0]
if np.random.binomial(n=1,p=self.epsilon):
Outcome[mask[np.random.randint(0,len(mask))]]=self.index
else:
temp_value=np.zeros(len(mask))
for i in range(len(mask)):
Cnt_state=state.copy()
Cnt_state[mask[i]]=self.index
temp_value[i]=self.value[tuple(Cnt_state)]
choose=np.where(temp_value==np.max(temp_value))[0]
Outcome[mask[choose[np.random.randint(0,len(choose))]]]=self.index
Error=self.value[tuple(Outcome)]*self.gamma-self.value[tuple(self.States)]
self.value[tuple(self.States)]+=(self.alpha*Error)
self.States=Outcome.copy()
return Outcome
def Judge(self,state):
#0 1 2
#3 4 5
#6 7 8
win_patterns = [
[0, 1, 2], [3, 4, 5], [6, 7, 8], # 三行
[0, 3, 6], [1, 4, 7], [2, 5, 8], # 三列
[0, 4, 8], [2, 4, 6] # 两条对角线
]
if any(all(state[i] == 1 for i in pattern) for pattern in win_patterns):
return 1
elif any(all(state[i] == 2 for i in pattern) for pattern in win_patterns):
return 2
if 0 not in state:
return 3
return 0
def is_empty_file(file_path):
return os.stat(file_path).st_size==0
#main
mode=int(input("请选择模式:0 训练模式 1 记忆模式 2 对弈模式\n"))
if (mode==1 or mode==2) and (not is_empty_file("./model1.pkl")) and (not is_empty_file("./model2.pkl")):
with open("./model1.pkl","rb") as f1:
agent1=pickle.load(f1)
with open("./model2.pkl","rb") as f2:
agent2=pickle.load(f2)
else:
agent1=Agent(_index=1,Alpha=0.1,Epsilon=0.1,Gamma=1)
agent2=Agent(_index=2,Alpha=0.1,Epsilon=0.1,Gamma=1)
if mode==0 or mode ==1:
trial=30000
Winners=np.zeros(trial).astype(np.int8)
for i in range(trial):
agent1.reset()
agent2.reset()
if i==20000:
agent1.epsilon=0
agent2.epsilon=0
winner=0
State=np.zeros(9).astype(np.int8)
while winner==0:
Outcome=agent1.move(State)
winner=agent1.Judge(Outcome)
if winner==agent1.index:
agent1.value[tuple(Outcome)]=1
agent2.value[tuple(State)]=-1
elif winner==3:
agent1.value[tuple(Outcome)]=0.5
agent2.value[tuple(State)]=0.5
else:
State=agent2.move(Outcome)
winner=agent2.Judge(State)
if winner==agent2.index:
agent2.value[tuple(State)]=1
agent1.value[tuple(Outcome)]=-1
elif winner==3:
agent1.value[tuple(Outcome)]=0.5
agent2.value[tuple(State)]=0.5
Winners[i]=winner
#
try:
if mode==1 or mode==0:
with open("./model1.pkl","wb") as f1:
pickle.dump(agent1,f1)
with open("./model2.pkl","wb") as f2:
pickle.dump(agent2,f2)
except Exception as e:
print(f"Error saving models:{e}")
#
step=250
duration=500
def Rate(Winner):
Rate1=np.zeros(int((trial-duration)/step)+1)
Rate2=np.zeros(int((trial-duration)/step)+1)
Rate3=np.zeros(int((trial-duration)/step)+1)
for i in range(len(Rate1)):
Rate1[i]=np.sum(Winner[step*i:duration+step*i]==1)/duration
Rate2[i]=np.sum(Winner[step*i:duration+step*i]==2)/duration
Rate3[i]=np.sum(Winner[step*i:duration+step*i]==3)/duration
return Rate1,Rate2,Rate3
Rate1,Rate2,Rate3=Rate(Winners)
plt.figure(figsize=(10, 6))
plt.plot(Rate1, marker='.', label="Rate 1 (Player 1)")
plt.plot(Rate2, marker='.', label="Rate 2 (Player 2)")
plt.plot(Rate3, marker='.', label="Rate 3 (Draw)")
plt.xticks(np.arange(0,121,40),np.arange(0,32,10),fontsize=30)
plt.yticks(np.arange(0,1.1,0.2),np.round(np.arange(0,1.1,0.2),2),fontsize=30)
# 添加标题、标签和图例
plt.title("Winning Rate of Players Over Time")
plt.xlabel("Step")
plt.ylabel("Winning Rate")
plt.legend()
plt.grid(True)
# 显示图表
plt.show()
else:
def play_game():
global canvas
board=[["" for _ in range(3)] for _ in range(3)]
current_sign=["X"]
AI_code=0
Player_code=0
if np.random.binomial(1,p=0.5):#AI先手
AI_code=1
Player_code=2
current_player=[1]
agent=agent1
else:
AI_code=2
Player_code=1
current_player=[1]
agent=agent2
State=np.zeros(9).astype(np.int8)
canvas.delete("all")
for i in range(1,3):
canvas.create_line(i*100,0,i*100,300,width=3)
canvas.create_line(0,i*100,300,i*100,width=3)
def draw(x,y,player):
if player=="X":
canvas.create_line(x-30,y-30,x+30,y+30,width=3,fill="red")
canvas.create_line(x+30,y-30,x-30,y+30,width=3,fill="red")
else:
canvas.create_oval(x-30,y-30,x+30,y+30,width=3,outline="blue")
current_sign[0]="O" if current_sign[0]=='X' else "X"
def on_click(event):
if current_player[0]==AI_code:
return None
row,col=event.y//100,event.x//100
if board[row][col]=="":
board[row][col]=current_sign[0]
current_player[0]=AI_code
State[row*3+col]=Player_code
x,y=col*100+50,row*100+50
draw(x,y,current_sign[0])
# 检查玩家是否获胜
winner=agent.Judge(State)
if winner:
end_game(winner)
return
root.after(500, ai_move) # 让 AI 0.5 秒后行动
def check_ai_turn():
#"""检测是否轮到 AI 走棋"""
if current_player[0] == AI_code:
root.after(500, ai_move) # AI 延迟 0.5 秒落子
def ai_move():
if current_player[0] == Player_code:
return # 轮到玩家,AI 不能动
Outcome = agent.move(State)
mask = np.where(Outcome != State)[0]
if len(mask) > 0:
x = mask[0] % 3 * 100 + 50
y = mask[0] // 3 * 100 + 50
draw(x, y, current_sign[0])
# 检查 AI 是否获胜
winner=agent.Judge(Outcome)
if winner:
end_game(winner)
return
State[:] = Outcome # 更新游戏状态
current_player[0] = Player_code # 轮到玩家
def end_game(winner):
if winner == Player_code:
label.config(text="玩家win!")
elif winner == AI_code:
label.config(text="AI win!")
else:
label.config(text="Draw!")
canvas.bind("<Button-1>",on_click)
check_ai_turn()
root=tk.Tk()
root.geometry("500x500")
canvas=tk.Canvas(root,width=300,height=300,bg="white")
canvas.pack()
root.title("AI")
label = tk.Label(root, text="Train", font=("Ubuntu", 20))
label.pack(pady=20)
button = tk.Button(root, text="Start", command=play_game, font=("Ubuntu", 20))
button.pack(pady=10)
reset_button = tk.Button(root, text="Restart", command=play_game, font=("Ubuntu", 20))
reset_button.pack(pady=10)
root.mainloop()