8
8
9
9
infinity = float ('inf' )
10
10
GameState = namedtuple ('GameState' , 'to_move, utility, board, moves' )
11
+ StochasticGameState = namedtuple ('StochasticGameState' , 'to_move, utility, board, moves, chance' )
11
12
12
13
# ______________________________________________________________________________
13
14
# Minimax Search
@@ -41,42 +42,38 @@ def min_value(state):
41
42
42
43
# ______________________________________________________________________________
43
44
44
- dice_rolls = list (itertools .combinations_with_replacement ([1 , 2 , 3 , 4 , 5 , 6 ], 2 ))
45
- direction = {'W' : - 1 , 'B' : 1 }
46
45
47
46
def expectiminimax (state , game ):
48
47
"""Return the best move for a player after dice are thrown. The game tree
49
48
includes chance nodes along with min and max nodes. [Figure 5.11]"""
50
49
player = game .to_move (state )
51
50
52
- def max_value (state , dice_roll ):
51
+ def max_value (state ):
53
52
v = - infinity
54
53
for a in game .actions (state ):
55
54
v = max (v , chance_node (state , a ))
56
- game .dice_roll = dice_roll
57
55
return v
58
56
59
- def min_value (state , dice_roll ):
57
+ def min_value (state ):
60
58
v = infinity
61
59
for a in game .actions (state ):
62
60
v = min (v , chance_node (state , a ))
63
- game .dice_roll = dice_roll
64
61
return v
65
62
66
63
def chance_node (state , action ):
67
64
res_state = game .result (state , action )
68
65
if game .terminal_test (res_state ):
69
66
return game .utility (res_state , player )
70
67
sum_chances = 0
71
- num_chances = 21
72
- for val in dice_rolls :
73
- game . dice_roll = tuple ( map (( direction [ res_state . to_move ]). __mul__ , val ) )
68
+ num_chances = len ( game . chances ( res_state ))
69
+ for chance in game . chances ( res_state ) :
70
+ res_state = game . outcome ( res_state , chance )
74
71
util = 0
75
72
if res_state .to_move == player :
76
- util = max_value (res_state , game . dice_roll )
73
+ util = max_value (res_state )
77
74
else :
78
- util = min_value (res_state , game . dice_roll )
79
- sum_chances += util * ( 1 / 36 if val [ 0 ] == val [ 1 ] else 1 / 18 )
75
+ util = min_value (res_state )
76
+ sum_chances += util * game . probability ( chance )
80
77
return sum_chances / num_chances
81
78
82
79
# Body of expectiminimax:
@@ -256,6 +253,36 @@ def play_game(self, *players):
256
253
self .display (state )
257
254
return self .utility (state , self .to_move (self .initial ))
258
255
256
+ class StochasticGame (Game ):
257
+ """A stochastic game includes uncertain events which influence
258
+ the moves of players at each state. To create a stochastic game, subclass
259
+ this class and implement chances and outcome along with the other
260
+ unimplemented game class methods."""
261
+
262
+ def chances (self , state ):
263
+ """Return a list of all possible uncertain events at a state."""
264
+ raise NotImplementedError
265
+
266
+ def outcome (self , state , chance ):
267
+ """Return the state which is the outcome of a chance trial."""
268
+ raise NotImplementedError
269
+
270
+ def probability (self , chance ):
271
+ """Return the probability of occurence of a chance."""
272
+ raise NotImplementedError
273
+
274
+ def play_game (self , * players ):
275
+ """Play an n-person, move-alternating stochastic game."""
276
+ state = self .initial
277
+ while True :
278
+ for player in players :
279
+ chance = random .choice (self .chances (state ))
280
+ state = self .outcome (state , chance )
281
+ move = player (self , state )
282
+ state = self .result (state , move )
283
+ if self .terminal_test (state ):
284
+ self .display (state )
285
+ return self .utility (state , self .to_move (self .initial ))
259
286
260
287
class Fig52Game (Game ):
261
288
"""The game represented in [Figure 5.2]. Serves as a simple test case."""
@@ -393,26 +420,25 @@ def actions(self, state):
393
420
if y == 1 or (x , y - 1 ) in state .board ]
394
421
395
422
396
- class Backgammon (Game ):
423
+ class Backgammon (StochasticGame ):
397
424
"""A two player game where the goal of each player is to move all the
398
425
checkers off the board. The moves for each state are determined by
399
426
rolling a pair of dice."""
400
427
401
428
def __init__ (self ):
402
429
"""Initial state of the game"""
403
- self .dice_roll = tuple (map ((direction ['W' ]).__mul__ , random .choice (dice_rolls )))
404
- # TODO : Add bar to Board class where a blot is placed when it is hit.
405
430
point = {'W' : 0 , 'B' : 0 }
406
431
board = [point .copy () for index in range (24 )]
407
432
board [0 ]['B' ] = board [23 ]['W' ] = 2
408
433
board [5 ]['W' ] = board [18 ]['B' ] = 5
409
434
board [7 ]['W' ] = board [16 ]['B' ] = 3
410
435
board [11 ]['B' ] = board [12 ]['W' ] = 5
411
436
self .allow_bear_off = {'W' : False , 'B' : False }
412
- self .initial = GameState (to_move = 'W' ,
413
- utility = 0 ,
414
- board = board ,
415
- moves = self .get_all_moves (board , 'W' ))
437
+ self .direction = {'W' : - 1 , 'B' : 1 }
438
+ self .initial = StochasticGameState (to_move = 'W' ,
439
+ utility = 0 ,
440
+ board = board ,
441
+ moves = self .get_all_moves (board , 'W' ), chance = None )
416
442
417
443
def actions (self , state ):
418
444
"""Return a list of legal moves for a state."""
@@ -423,21 +449,21 @@ def actions(self, state):
423
449
legal_moves = []
424
450
for move in moves :
425
451
board = copy .deepcopy (state .board )
426
- if self .is_legal_move (board , move , self . dice_roll , player ):
452
+ if self .is_legal_move (board , move , state . chance , player ):
427
453
legal_moves .append (move )
428
454
return legal_moves
429
455
430
456
def result (self , state , move ):
431
457
board = copy .deepcopy (state .board )
432
458
player = state .to_move
433
- self .move_checker (board , move [0 ], self . dice_roll [0 ], player )
459
+ self .move_checker (board , move [0 ], state . chance [0 ], player )
434
460
if len (move ) == 2 :
435
- self .move_checker (board , move [1 ], self . dice_roll [1 ], player )
461
+ self .move_checker (board , move [1 ], state . chance [1 ], player )
436
462
to_move = ('W' if player == 'B' else 'B' )
437
- return GameState (to_move = to_move ,
438
- utility = self .compute_utility (board , move , player ),
439
- board = board ,
440
- moves = self .get_all_moves (board , to_move ))
463
+ return StochasticGameState (to_move = to_move ,
464
+ utility = self .compute_utility (board , move , player ),
465
+ board = board ,
466
+ moves = self .get_all_moves (board , to_move ), chance = None )
441
467
442
468
def utility (self , state , player ):
443
469
"""Return the value to player; 1 for win, -1 for loss, 0 otherwise."""
@@ -472,7 +498,7 @@ def display(self, state):
472
498
473
499
def compute_utility (self , board , move , player ):
474
500
"""If 'W' wins with this move, return 1; if 'B' wins return -1; else return 0."""
475
- util = {'W' : 1 , 'B' : '-1' }
501
+ util = {'W' : 1 , 'B' : - 1 }
476
502
for idx in range (0 , 24 ):
477
503
if board [idx ][player ] > 0 :
478
504
return 0
@@ -529,18 +555,19 @@ def is_point_open(self, player, point):
529
555
opponent = 'B' if player == 'W' else 'W'
530
556
return point [opponent ] <= 1
531
557
532
- def play_game (self , * players ):
533
- """Play backgammon."""
534
- state = self .initial
535
- while True :
536
- for player in players :
537
- saved_dice_roll = self .dice_roll
538
- move = player (self , state )
539
- self .dice_roll = saved_dice_roll
540
- if move is not None :
541
- state = self .result (state , move )
542
- self .dice_roll = tuple (map ((direction [player ]).__mul__ ,
543
- random .choice (dice_rolls )))
544
- if self .terminal_test (state ):
545
- self .display (state )
546
- return self .utility (state , self .to_move (self .initial ))
558
+ def chances (self , state ):
559
+ """Return a list of all possible dice rolls at a state."""
560
+ dice_rolls = list (itertools .combinations_with_replacement ([1 , 2 , 3 , 4 , 5 , 6 ], 2 ))
561
+ return dice_rolls
562
+
563
+ def outcome (self , state , chance ):
564
+ """Return the state which is the outcome of a dice roll."""
565
+ dice = tuple (map ((self .direction [state .to_move ]).__mul__ , chance ))
566
+ return StochasticGameState (to_move = state .to_move ,
567
+ utility = state .utility ,
568
+ board = state .board ,
569
+ moves = state .moves , chance = dice )
570
+
571
+ def probability (self , chance ):
572
+ """Return the probability of occurence of a dice roll."""
573
+ return 1 / 36 if chance [0 ] == chance [1 ] else 1 / 18
0 commit comments