alpha-belta 剪枝实现棋类AI ——Tic-Tac-Toe

最新推荐文章于 2023-01-05 11:31:51 发布

原创最新推荐文章于 2023-01-05 11:31:51 发布 · 1.8k 阅读

1 ·

CC 4.0 BY-SA版权

我们不生产知识，我们只是互联网的搬运工

编程语言同时被 2 个专栏收录

240 篇文章

订阅专栏

数据结构与算法

25 篇文章

订阅专栏

本文介绍了一个使用Python实现的井字棋游戏，其中包含抽象类棋子Piece、棋盘Board的设计，以及AI算法minimax和alpha-beta剪枝策略的详细解释与实现。通过这些算法，AI能够找到最优策略并进行游戏。

文章目录

定义抽象类
- 棋子 Piece、棋盘 Board
继承抽象类
AI 算法
来一局!

python 版本 3.7 喔！

定义抽象类

棋子 Piece、棋盘 Board

from __future__ import annotations
from typing import NewType, List
from abc import ABC, abstractmethod

Move = NewType('Move', int)


class Piece:
    @property
    def opposite(self) -> Piece:
        raise NotImplementedError("Should be implemented by subclasses.")


        
class Board(ABC):
    @property
    @abstractmethod
    def turn(self) -> Piece:
        ...

    @abstractmethod
    def move(self, location: Move) -> Board:
        ...

    @property
    @abstractmethod
    def legal_moves(self) -> List[Move]:
        ...

    @property
    @abstractmethod
    def is_win(self) -> bool:
        ...

    @property
    def is_draw(self) -> bool:
        return (not self.is_win) and (len(self.legal_moves) == 0)

    @abstractmethod
    def evaluate(self, player: Piece) -> float:
        ...

继承抽象类

from typing import List
from enum import Enum


class TTTPiece(Piece, Enum):
    X = "X"
    O = "O"
    E = " " # stand-in for empty

    @property
    def opposite(self) -> TTTPiece:
        if self == TTTPiece.X:
            return TTTPiece.O
        elif self == TTTPiece.O:
            return TTTPiece.X
        else:
            return TTTPiece.E

    def __str__(self) -> str:
        return self.value

class TTTBoard(Board):
    def __init__(self, position: List[TTTPiece] = [TTTPiece.E] * 9, turn: TTTPiece = TTTPiece.X) -> None:
        self.position: List[TTTPiece] = position
        self._turn: TTTPiece = turn

    @property
    def turn(self) -> Piece:
        return self._turn

    def move(self, location: Move) -> Board:
        temp_position: List[TTTPiece] = self.position.copy()
        temp_position[location] = self._turn
        return TTTBoard(temp_position, self._turn.opposite)

    @property
    def legal_moves(self) -> List[Move]:
        return [Move(l) for l in range(len(self.position)) if self.position[l] == TTTPiece.E]

    @property
    def is_win(self) -> bool:
        # three row, three column, and then two diagonal checks
        return self.position[0] == self.position[1] and self.position[0] == self.position[2] and self.position[0] != TTTPiece.E or \
        self.position[3] == self.position[4] and self.position[3] == self.position[5] and self.position[3] != TTTPiece.E or \
        self.position[6] == self.position[7] and self.position[6] == self.position[8] and self.position[6] != TTTPiece.E or \
        self.position[0] == self.position[3] and self.position[0] == self.position[6] and self.position[0] != TTTPiece.E or \
        self.position[1] == self.position[4] and self.position[1] == self.position[7] and self.position[1] != TTTPiece.E or \
        self.position[2] == self.position[5] and self.position[2] == self.position[8] and self.position[2] != TTTPiece.E or \
        self.position[0] == self.position[4] and self.position[0] == self.position[8] and self.position[0] != TTTPiece.E or \
        self.position[2] == self.position[4] and self.position[2] == self.position[6] and self.position[2] != TTTPiece.E

    def evaluate(self, player: Piece) -> float:
        if self.is_win and self.turn == player:
            return -1
        elif self.is_win and self.turn != player:
            return 1
        else:
            return 0

    def __repr__(self) -> str:
        return f"""{self.position[0]}|{self.position[1]}|{self.position[2]}
-----
{self.position[3]}|{self.position[4]}|{self.position[5]}
-----
{self.position[6]}|{self.position[7]}|{self.position[8]}"""

AI 算法

minimax

原理：

如果已知整个格局树，我方为方形节点，对手为三角节点，节点之间的边代表一个动作，最后一层为终局收益

那么两个头脑正常的玩家会这样对局：我方会选择使自己终局收益最大的动作，对手会选择使我方收益最小的动作

在这里插入图片描述

# Find the best possible outcome for original player
def minimax(board: Board, maximizing: bool, original_player: Piece, max_depth: int = 8) -> float:
    # Base case – terminal position or maximum depth reached
    if board.is_win or board.is_draw or max_depth == 0:
        return board.evaluate(original_player)

    # Recursive case - maximize your gains or minimize the opponent's gains
    if maximizing:
        best_eval: float = float("-inf") # arbitrarily low starting point
        for move in board.legal_moves:
            result: float = minimax(board.move(move), False, original_player, max_depth - 1)
            best_eval = max(result, best_eval) # we want the move with the highest evaluation
        return best_eval
    else: # minimizing
        worst_eval: float = float("inf")
        for move in board.legal_moves:
            result = minimax(board.move(move), True, original_player, max_depth - 1)
            worst_eval = min(result, worst_eval) # we want the move with the lowest evaluation
        return worst_eval

alpha-beta 剪枝

原理：http://blog.codinglabs.org/articles/2048-ai-analysis.html
总结：

深度优先
alpha 为收益下界，beta 为收益上界
父节点先将 alpha-beta 值传给子节点，开始时 alpha 初始化为负无穷，beta初始化为正无穷
我方（max）节点的 alpha 值等于子结点 beta 值的最大值，即子节点上界的上界，因为我方要收益最大化
对手（min）节点的 beta 值等于子结点 alpha 值的最小值，即子节点下界的下界，因为对手要坑我
如果节点的 alpha 值大于 beta 值，则剪枝，该节点


def alphabeta(board: Board, maximizing: bool, original_player: Piece, max_depth: int = 8, alpha: float = float("-inf"), beta: float = float("inf")) -> float:
    # Base case – terminal position or maximum depth reached
    if board.is_win or board.is_draw or max_depth == 0:
        return board.evaluate(original_player)

    # Recursive case - maximize your gains or minimize the opponent's gains
    if maximizing:
        for move in board.legal_moves:
            result: float = alphabeta(board.move(move), False, original_player, max_depth - 1, alpha, beta)
            alpha = max(result, alpha)
            if beta <= alpha:
                break
        return alpha
    else:  # minimizing
        for move in board.legal_moves:
            result = alphabeta(board.move(move), True, original_player, max_depth - 1, alpha, beta)
            beta = min(result, beta)
            if beta <= alpha:
                break
        return beta

基于 ab剪枝寻找最优策略

最大化子结点的 beta 值

# Find the best possible move in the current position
# looking up to max_depth ahead
def find_best_move(board: Board, max_depth: int = 8) -> Move:
    best_eval: float = float("-inf")
    best_move: Move = Move(-1)
    for move in board.legal_moves:
        result: float = alphabeta(board.move(move), False, board.turn, max_depth)
        if result > best_eval:
            best_eval = result
            best_move = move
    return best_move

来一局!

board: Board = TTTBoard()


def get_player_move() -> Move:
    player_move: Move = Move(-1)
    while player_move not in board.legal_moves:
        play: int =-1
        try:
            play= int(input("Enter a legal square (0-8):"))
        except ValueError as e:
            print('Invalid input, enter again!')
        player_move = Move(play)
    return player_move



while True:
    human_move: Move = get_player_move()
#     human_move: Move = find_best_move(board)
    board = board.move(human_move)
    if board.is_win:
        print("Human wins!")
        break
    elif board.is_draw:
        print("Draw!")
        break
    computer_move: Move = find_best_move(board)
    print(f"Computer move is {computer_move}")
    board = board.move(computer_move)
    print(board)
    if board.is_win:
        print("Computer wins!")
        break
    elif board.is_draw:
        print("Draw!")
        break


'''
Enter a legal square (0-8):2
Computer move is 4
 | |X
-----
 |O| 
-----
 | | 
Enter a legal square (0-8):0
Computer move is 1
X|O|X
-----
 |O| 
-----
 | | 
Enter a legal square (0-8):7
Computer move is 3
X|O|X
-----
O|O| 
-----
 |X| 
Enter a legal square (0-8):5
Computer move is 8
X|O|X
-----
O|O|X
-----
 |X|O
Enter a legal square (0-8):6
Draw!
'''