Skip to content

Commit c65ac4e

Browse files
AdityaDaflapurkarnorvig
authored andcommitted
Include stochastic game class and generic expectiminimax (aimacode#916)
* Add stochastic game class * Update backgammon class * Update Expectiminimax * Fix lint issues * Correct compute_utility function
1 parent 51299b2 commit c65ac4e

File tree

1 file changed

+69
-42
lines changed

1 file changed

+69
-42
lines changed

games.py

Lines changed: 69 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
infinity = float('inf')
1010
GameState = namedtuple('GameState', 'to_move, utility, board, moves')
11+
StochasticGameState = namedtuple('StochasticGameState', 'to_move, utility, board, moves, chance')
1112

1213
# ______________________________________________________________________________
1314
# Minimax Search
@@ -41,42 +42,38 @@ def min_value(state):
4142

4243
# ______________________________________________________________________________
4344

44-
dice_rolls = list(itertools.combinations_with_replacement([1, 2, 3, 4, 5, 6], 2))
45-
direction = {'W' : -1, 'B' : 1}
4645

4746
def expectiminimax(state, game):
4847
"""Return the best move for a player after dice are thrown. The game tree
4948
includes chance nodes along with min and max nodes. [Figure 5.11]"""
5049
player = game.to_move(state)
5150

52-
def max_value(state, dice_roll):
51+
def max_value(state):
5352
v = -infinity
5453
for a in game.actions(state):
5554
v = max(v, chance_node(state, a))
56-
game.dice_roll = dice_roll
5755
return v
5856

59-
def min_value(state, dice_roll):
57+
def min_value(state):
6058
v = infinity
6159
for a in game.actions(state):
6260
v = min(v, chance_node(state, a))
63-
game.dice_roll = dice_roll
6461
return v
6562

6663
def chance_node(state, action):
6764
res_state = game.result(state, action)
6865
if game.terminal_test(res_state):
6966
return game.utility(res_state, player)
7067
sum_chances = 0
71-
num_chances = 21
72-
for val in dice_rolls:
73-
game.dice_roll = tuple(map((direction[res_state.to_move]).__mul__, val))
68+
num_chances = len(game.chances(res_state))
69+
for chance in game.chances(res_state):
70+
res_state = game.outcome(res_state, chance)
7471
util = 0
7572
if res_state.to_move == player:
76-
util = max_value(res_state, game.dice_roll)
73+
util = max_value(res_state)
7774
else:
78-
util = min_value(res_state, game.dice_roll)
79-
sum_chances += util * (1/36 if val[0] == val[1] else 1/18)
75+
util = min_value(res_state)
76+
sum_chances += util * game.probability(chance)
8077
return sum_chances / num_chances
8178

8279
# Body of expectiminimax:
@@ -256,6 +253,36 @@ def play_game(self, *players):
256253
self.display(state)
257254
return self.utility(state, self.to_move(self.initial))
258255

256+
class StochasticGame(Game):
257+
"""A stochastic game includes uncertain events which influence
258+
the moves of players at each state. To create a stochastic game, subclass
259+
this class and implement chances and outcome along with the other
260+
unimplemented game class methods."""
261+
262+
def chances(self, state):
263+
"""Return a list of all possible uncertain events at a state."""
264+
raise NotImplementedError
265+
266+
def outcome(self, state, chance):
267+
"""Return the state which is the outcome of a chance trial."""
268+
raise NotImplementedError
269+
270+
def probability(self, chance):
271+
"""Return the probability of occurence of a chance."""
272+
raise NotImplementedError
273+
274+
def play_game(self, *players):
275+
"""Play an n-person, move-alternating stochastic game."""
276+
state = self.initial
277+
while True:
278+
for player in players:
279+
chance = random.choice(self.chances(state))
280+
state = self.outcome(state, chance)
281+
move = player(self, state)
282+
state = self.result(state, move)
283+
if self.terminal_test(state):
284+
self.display(state)
285+
return self.utility(state, self.to_move(self.initial))
259286

260287
class Fig52Game(Game):
261288
"""The game represented in [Figure 5.2]. Serves as a simple test case."""
@@ -393,26 +420,25 @@ def actions(self, state):
393420
if y == 1 or (x, y - 1) in state.board]
394421

395422

396-
class Backgammon(Game):
423+
class Backgammon(StochasticGame):
397424
"""A two player game where the goal of each player is to move all the
398425
checkers off the board. The moves for each state are determined by
399426
rolling a pair of dice."""
400427

401428
def __init__(self):
402429
"""Initial state of the game"""
403-
self.dice_roll = tuple(map((direction['W']).__mul__, random.choice(dice_rolls)))
404-
# TODO : Add bar to Board class where a blot is placed when it is hit.
405430
point = {'W' : 0, 'B' : 0}
406431
board = [point.copy() for index in range(24)]
407432
board[0]['B'] = board[23]['W'] = 2
408433
board[5]['W'] = board[18]['B'] = 5
409434
board[7]['W'] = board[16]['B'] = 3
410435
board[11]['B'] = board[12]['W'] = 5
411436
self.allow_bear_off = {'W' : False, 'B' : False}
412-
self.initial = GameState(to_move='W',
413-
utility=0,
414-
board=board,
415-
moves=self.get_all_moves(board, 'W'))
437+
self.direction = {'W' : -1, 'B' : 1}
438+
self.initial = StochasticGameState(to_move='W',
439+
utility=0,
440+
board=board,
441+
moves=self.get_all_moves(board, 'W'), chance=None)
416442

417443
def actions(self, state):
418444
"""Return a list of legal moves for a state."""
@@ -423,21 +449,21 @@ def actions(self, state):
423449
legal_moves = []
424450
for move in moves:
425451
board = copy.deepcopy(state.board)
426-
if self.is_legal_move(board, move, self.dice_roll, player):
452+
if self.is_legal_move(board, move, state.chance, player):
427453
legal_moves.append(move)
428454
return legal_moves
429455

430456
def result(self, state, move):
431457
board = copy.deepcopy(state.board)
432458
player = state.to_move
433-
self.move_checker(board, move[0], self.dice_roll[0], player)
459+
self.move_checker(board, move[0], state.chance[0], player)
434460
if len(move) == 2:
435-
self.move_checker(board, move[1], self.dice_roll[1], player)
461+
self.move_checker(board, move[1], state.chance[1], player)
436462
to_move = ('W' if player == 'B' else 'B')
437-
return GameState(to_move=to_move,
438-
utility=self.compute_utility(board, move, player),
439-
board=board,
440-
moves=self.get_all_moves(board, to_move))
463+
return StochasticGameState(to_move=to_move,
464+
utility=self.compute_utility(board, move, player),
465+
board=board,
466+
moves=self.get_all_moves(board, to_move), chance=None)
441467

442468
def utility(self, state, player):
443469
"""Return the value to player; 1 for win, -1 for loss, 0 otherwise."""
@@ -472,7 +498,7 @@ def display(self, state):
472498

473499
def compute_utility(self, board, move, player):
474500
"""If 'W' wins with this move, return 1; if 'B' wins return -1; else return 0."""
475-
util = {'W' : 1, 'B' : '-1'}
501+
util = {'W' : 1, 'B' : -1}
476502
for idx in range(0, 24):
477503
if board[idx][player] > 0:
478504
return 0
@@ -529,18 +555,19 @@ def is_point_open(self, player, point):
529555
opponent = 'B' if player == 'W' else 'W'
530556
return point[opponent] <= 1
531557

532-
def play_game(self, *players):
533-
"""Play backgammon."""
534-
state = self.initial
535-
while True:
536-
for player in players:
537-
saved_dice_roll = self.dice_roll
538-
move = player(self, state)
539-
self.dice_roll = saved_dice_roll
540-
if move is not None:
541-
state = self.result(state, move)
542-
self.dice_roll = tuple(map((direction[player]).__mul__,
543-
random.choice(dice_rolls)))
544-
if self.terminal_test(state):
545-
self.display(state)
546-
return self.utility(state, self.to_move(self.initial))
558+
def chances(self, state):
559+
"""Return a list of all possible dice rolls at a state."""
560+
dice_rolls = list(itertools.combinations_with_replacement([1, 2, 3, 4, 5, 6], 2))
561+
return dice_rolls
562+
563+
def outcome(self, state, chance):
564+
"""Return the state which is the outcome of a dice roll."""
565+
dice = tuple(map((self.direction[state.to_move]).__mul__, chance))
566+
return StochasticGameState(to_move=state.to_move,
567+
utility=state.utility,
568+
board=state.board,
569+
moves=state.moves, chance=dice)
570+
571+
def probability(self, chance):
572+
"""Return the probability of occurence of a dice roll."""
573+
return 1/36 if chance[0] == chance[1] else 1/18

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy