Skip to content

Changes to hashable dict #482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 12, 2017
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Implemented permutation decoder
  • Loading branch information
Chipe1 committed Mar 31, 2017
commit 05eff787276c7b27baabfa4130814e9b8d1b1f40
48 changes: 32 additions & 16 deletions text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Then we show a very simple Information Retrieval system, and an example
working on a tiny sample of Unix manual pages."""

from utils import argmin
from utils import argmin, argmax, hashabledict
from learning import CountingProbDist
import search

Expand Down Expand Up @@ -60,7 +60,7 @@ def add_sequence(self, words):
n = self.n
words = self.add_empty(words, n)

for i in range(len(words) - n):
for i in range(len(words) - n + 1):
self.add(tuple(words[i:i + n]))

def samples(self, nwords):
Expand Down Expand Up @@ -350,39 +350,55 @@ class PermutationDecoder:
def __init__(self, training_text, ciphertext=None):
self.Pwords = UnigramTextModel(words(training_text))
self.P1 = UnigramTextModel(training_text) # By letter
self.P2 = NgramTextModel(2, training_text) # By letter pair
self.P2 = NgramTextModel(2, words(training_text)) # By letter pair

def decode(self, ciphertext):
"""Search for a decoding of the ciphertext."""
self.ciphertext = ciphertext
self.ciphertext = canonicalize(ciphertext)
problem = PermutationDecoderProblem(decoder=self)
return search.best_first_tree_search(
solution = search.best_first_graph_search(
problem, lambda node: self.score(node.state))
print(solution.state, len(solution.state))
solution.state[' '] = ' '
return translate(self.ciphertext, lambda c: solution.state[c])


def score(self, code):
"""Score is product of word scores, unigram scores, and bigram scores.
This can get very small, so we use logs and exp."""

# TODO: Implement the permutation_decode function
text = permutation_decode(self.ciphertext, code) # noqa
# remake code dictionary to contain translation for all characters
full_code = code.copy()
full_code.update({x:x for x in alphabet + ' ' if x not in code})
text = translate(self.ciphertext, lambda c: full_code[c])

logP = (sum([log(self.Pwords[word]) for word in words(text)]) +
sum([log(self.P1[c]) for c in text]) +
sum([log(self.P2[b]) for b in bigrams(text)]))
return exp(logP)
# add small positive value to prevent computing log(0)
# TODO: Modify the values to make score more accurate
logP = (sum([log(self.Pwords[word] + 1e-20) for word in words(text)]) +
sum([log(self.P1[c] + 1e-5) for c in text]) +
sum([log(self.P2[b] + 1e-10) for b in bigrams(text)]))
return -exp(logP)


class PermutationDecoderProblem(search.Problem):

def __init__(self, initial=None, goal=None, decoder=None):
self.initial = initial or {}
self.initial = initial or hashabledict()
self.decoder = decoder

def actions(self, state):
# Find the best
p, plainchar = max([(self.decoder.P1[c], c)
for c in alphabet if c not in state])
succs = [extend(state, plainchar, cipherchar)] # ???? # noqa
search_list = [c for c in alphabet if c not in state]
target_list = [c for c in alphabet if c not in state.values()]
# Find the best charater to replace
plainchar = argmax(search_list, key=lambda c: self.decoder.P1[c])
for cipherchar in target_list:
yield (plainchar, cipherchar)

def result(self, state, action):
new_state = hashabledict(state) # copy to prevent hash issues
assert type(new_state) == hashabledict
new_state[action[0]] = action[1]
return new_state

def goal_test(self, state):
"""We're done when we get all 26 letters assigned."""
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy