aimacode · norvig · Apr 6, 2017 · Mar 31, 2017 · Mar 31, 2017 · Mar 31, 2017
diff --git a/tests/test_text.py b/tests/test_text.py
@@ -99,6 +99,17 @@ def test_shift_decoding():
     assert msg == 'This is a secret message.'
 
 
+def test_permutation_decoder():
+    gutenberg = DataFile("EN-text/gutenberg.txt").read()
+    flatland = DataFile("EN-text/flatland.txt").read()
+
+    pd = PermutationDecoder(canonicalize(gutenberg))
+    assert pd.decode('aba') in ('ece', 'ete', 'tat', 'tit', 'txt')
+
+    pd = PermutationDecoder(canonicalize(flatland))
+    assert pd.decode('aba') in ('ded', 'did', 'ece', 'ele', 'eme', 'ere', 'eve', 'eye', 'iti', 'mom', 'ses', 'tat', 'tit')
+
+
 def test_rot13_encoding():
     code = rot13('Hello, world!')
 

diff --git a/text.py b/text.py
@@ -4,7 +4,7 @@
 Then we show a very simple Information Retrieval system, and an example
 working on a tiny sample of Unix manual pages."""
 
-from utils import argmin
+from utils import argmin, argmax, hashabledict
 from learning import CountingProbDist
 import search
 
@@ -60,7 +60,7 @@ def add_sequence(self, words):
         n = self.n
         words = self.add_empty(words, n)
 
-        for i in range(len(words) - n):
+        for i in range(len(words) - n + 1):
             self.add(tuple(words[i:i + n]))
 
     def samples(self, nwords):
@@ -350,40 +350,59 @@ class PermutationDecoder:
     def __init__(self, training_text, ciphertext=None):
         self.Pwords = UnigramTextModel(words(training_text))
         self.P1 = UnigramTextModel(training_text)  # By letter
-        self.P2 = NgramTextModel(2, training_text)  # By letter pair
+        self.P2 = NgramTextModel(2, words(training_text))  # By letter pair
 
     def decode(self, ciphertext):
         """Search for a decoding of the ciphertext."""
-        self.ciphertext = ciphertext
+        self.ciphertext = canonicalize(ciphertext)
+        # reduce domain to speed up search
+        self.chardomain = {c for c in self.ciphertext if c is not ' '}
         problem = PermutationDecoderProblem(decoder=self)
-        return search.best_first_tree_search(
+        solution =  search.best_first_graph_search(
             problem, lambda node: self.score(node.state))
+        print(solution.state, len(solution.state))
+        solution.state[' '] = ' '
+        return translate(self.ciphertext, lambda c: solution.state[c])
+
 
     def score(self, code):
         """Score is product of word scores, unigram scores, and bigram scores.
         This can get very small, so we use logs and exp."""
 
-        # TODO: Implement the permutation_decode function
-        text = permutation_decode(self.ciphertext, code)  # noqa
+        # remake code dictionary to contain translation for all characters
+        full_code = code.copy()
+        full_code.update({x:x for x in self.chardomain if x not in code})
+        full_code[' '] = ' '
+        text = translate(self.ciphertext, lambda c: full_code[c])
 
-        logP = (sum([log(self.Pwords[word]) for word in words(text)]) +
-                sum([log(self.P1[c]) for c in text]) +
-                sum([log(self.P2[b]) for b in bigrams(text)]))
-        return exp(logP)
+        # add small positive value to prevent computing log(0)
+        # TODO: Modify the values to make score more accurate
+        logP = (sum([log(self.Pwords[word] + 1e-20) for word in words(text)]) +
+                sum([log(self.P1[c] + 1e-5) for c in text]) +
+                sum([log(self.P2[b] + 1e-10) for b in bigrams(text)]))
+        return -exp(logP)
 
 
 class PermutationDecoderProblem(search.Problem):
 
     def __init__(self, initial=None, goal=None, decoder=None):
-        self.initial = initial or {}
+        self.initial = initial or hashabledict()
         self.decoder = decoder
 
     def actions(self, state):
-        # Find the best
-        p, plainchar = max([(self.decoder.P1[c], c)
-                            for c in alphabet if c not in state])
-        succs = [extend(state, plainchar, cipherchar)]  # ???? # noqa
+        search_list = [c for c in self.decoder.chardomain if c not in state]
+        target_list = [c for c in alphabet if c not in state.values()]
+        # Find the best charater to replace
+        plainchar = argmax(search_list, key=lambda c: self.decoder.P1[c])
+        for cipherchar in target_list:
+            yield (plainchar, cipherchar)
+
+    def result(self, state, action):
+        new_state = hashabledict(state)  # copy to prevent hash issues
+        assert type(new_state) == hashabledict
+        new_state[action[0]] = action[1]
+        return new_state
 
     def goal_test(self, state):
-        """We're done when we get all 26 letters assigned."""
-        return len(state) >= 26
+        """We're done when all letters in search domain are assigned."""
+        return len(state) >= len(self.decoder.chardomain)
diff --git a/utils.py b/utils.py
@@ -568,6 +568,33 @@ def __missing__(self, key):
         return result
 
 
+class hashabledict(dict):
+    """Allows hashing by representing a dictionary as tuple of key:value pairs
+       May cause problems as the hash value may change during runtime
+    """
+    def __tuplify__(self):
+        return tuple(sorted(self.items()))
+
+    def __hash__(self):
+        return hash(self.__tuplify__())
+
+    def __lt__(self, odict):
+        assert type(odict) is hashabledict
+        return self.__tuplify__() < odict.__tuplify__()
+
+    def __gt__(self, odict):
+        assert type(odict) is hashabledict
+        return self.__tuplify__() > odict.__tuplify__()
+
+    def __le__(self, odict):
+        assert type(odict) is hashabledict
+        return self.__tuplify__() <= odict.__tuplify__()
+
+    def __ge__(self, odict):
+        assert type(odict) is hashabledict
+        return self.__tuplify__() >= odict.__tuplify__()
+
+
 # ______________________________________________________________________________
 # Queues: Stack, FIFOQueue, PriorityQueue