Skip to content

Commit 8debcd8

Browse files
ad71norvig
authored andcommitted
Fixes problems in mdp.py (aimacode#918)
* Added MDP2 class * Updated loop termination condition in value_iteration
1 parent aba4854 commit 8debcd8

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

mdp.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,19 @@ def check_consistency(self):
104104
assert abs(s - 1) < 0.001
105105

106106

107+
class MDP2(MDP):
108+
109+
"""Inherits from MDP. Handles terminal states, and transitions to and from terminal states better."""
110+
def __init__(self, init, actlist, terminals, transitions, reward=None, gamma=0.9):
111+
MDP.__init__(self, init, actlist, terminals, transitions, reward, gamma=gamma)
112+
113+
def T(self, state, action):
114+
if action is None:
115+
return [(0.0, state)]
116+
else:
117+
return self.transitions[state][action]
118+
119+
107120
class GridMDP(MDP):
108121

109122
"""A two-dimensional grid MDP, as in [Figure 17.1]. All you have to do is
@@ -186,7 +199,7 @@ def value_iteration(mdp, epsilon=0.001):
186199
U1[s] = R(s) + gamma * max(sum(p*U[s1] for (p, s1) in T(s, a))
187200
for a in mdp.actions(s))
188201
delta = max(delta, abs(U1[s] - U[s]))
189-
if delta < epsilon*(1 - gamma)/gamma:
202+
if delta <= epsilon*(1 - gamma)/gamma:
190203
return U
191204

192205

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy