>,\n",
- " {(0, 0): 0.05460040868097919,\n",
- " (0, 1): 0.10374209705939107,\n",
- " (0, 2): 0.1325347830202934,\n",
- " (1, 0): 0.07539875443960498,\n",
- " (1, 2): 0.19259892322613212,\n",
- " (2, 0): -0.0437928007023705,\n",
- " (2, 1): -0.009680447057460156,\n",
- " (2, 2): 0.30829164610044535,\n",
- " (3, 0): 0.0,\n",
- " (3, 1): -0.7641890167582844,\n",
- " (3, 2): 0.4106787728880888})"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"U"
]
@@ -514,17 +585,9 @@
},
{
"cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{(0, 1): 0.3984432178350045, (1, 2): 0.649585681261095, (3, 2): 1.0, (0, 0): 0.2962883154554812, (3, 0): 0.12987274656746342, (3, 1): -1.0, (2, 1): 0.48644001739269643, (2, 0): 0.3447542300124158, (2, 2): 0.7953620878466678, (1, 0): 0.25386699846479516, (0, 2): 0.5093943765842497}\n"
- ]
- }
- ],
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"print(value_iteration(sequential_decision_environment))"
]
@@ -564,7 +627,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.1"
+ "version": "3.6.3"
}
},
"nbformat": 4,
diff --git a/rl.py b/rl.py
index 94664b130..1b7e20c33 100644
--- a/rl.py
+++ b/rl.py
@@ -7,6 +7,61 @@
import random
+class PassiveDUEAgent:
+
+ """Passive (non-learning) agent that uses direct utility estimation
+ on a given MDP and policy."""
+ def __init__(self, pi, mdp):
+ self.pi = pi
+ self.mdp = mdp
+ self.U = {}
+ self.s = None
+ self.a = None
+ self.s_history = []
+ self.r_history = []
+ self.init = mdp.init
+
+ def __call__(self, percept):
+ s1, r1 = percept
+ self.s_history.append(s1)
+ self.r_history.append(r1)
+ ##
+ ##
+ if s1 in self.mdp.terminals:
+ self.s = self.a = None
+ else:
+ self.s, self.a = s1, self.pi[s1]
+ return self.a
+
+ def estimate_U(self):
+ # this function can be called only if the MDP has reached a terminal state
+ # it will also reset the mdp history
+ assert self.a is None, 'MDP is not in terminal state'
+ assert len(self.s_history) == len(self.r_history)
+ # calculating the utilities based on the current iteration
+ U2 = {s : [] for s in set(self.s_history)}
+ for i in range(len(self.s_history)):
+ s = self.s_history[i]
+ U2[s] += [sum(self.r_history[i:])]
+ U2 = {k : sum(v)/max(len(v), 1) for k, v in U2.items()}
+ # resetting history
+ self.s_history, self.r_history = [], []
+ # setting the new utilities to the average of the previous
+ # iteration and this one
+ for k in U2.keys():
+ if k in self.U.keys():
+ self.U[k] = (self.U[k] + U2[k]) /2
+ else:
+ self.U[k] = U2[k]
+ return self.U
+
+ def update_state(self, percept):
+ '''To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)'''
+ return percept
+
+
+
class PassiveADPAgent:
"""Passive (non-learning) agent that uses adaptive dynamic programming
diff --git a/tests/test_rl.py b/tests/test_rl.py
index 932b34ae5..95a0e2224 100644
--- a/tests/test_rl.py
+++ b/tests/test_rl.py
@@ -15,7 +15,17 @@
(0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,
}
-
+def test_PassiveDUEAgent():
+ agent = PassiveDUEAgent(policy, sequential_decision_environment)
+ for i in range(200):
+ run_single_trial(agent,sequential_decision_environment)
+ agent.estimate_U()
+ # Agent does not always produce same results.
+ # Check if results are good enough.
+ #print(agent.U[(0, 0)], agent.U[(0,1)], agent.U[(1,0)])
+ assert agent.U[(0, 0)] > 0.15 # In reality around 0.3
+ assert agent.U[(0, 1)] > 0.15 # In reality around 0.4
+ assert agent.U[(1, 0)] > 0 # In reality around 0.2
def test_PassiveADPAgent():
agent = PassiveADPAgent(policy, sequential_decision_environment)
pFad - Phonifier reborn
Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy