-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Implemented PermutationDecoder #456
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
Then we show a very simple Information Retrieval system, and an example | ||
working on a tiny sample of Unix manual pages.""" | ||
|
||
from utils import argmin | ||
from utils import argmin, argmax, hashabledict | ||
from learning import CountingProbDist | ||
import search | ||
|
||
|
@@ -60,7 +60,7 @@ def add_sequence(self, words): | |
n = self.n | ||
words = self.add_empty(words, n) | ||
|
||
for i in range(len(words) - n): | ||
for i in range(len(words) - n + 1): | ||
self.add(tuple(words[i:i + n])) | ||
|
||
def samples(self, nwords): | ||
|
@@ -350,40 +350,59 @@ class PermutationDecoder: | |
def __init__(self, training_text, ciphertext=None): | ||
self.Pwords = UnigramTextModel(words(training_text)) | ||
self.P1 = UnigramTextModel(training_text) # By letter | ||
self.P2 = NgramTextModel(2, training_text) # By letter pair | ||
self.P2 = NgramTextModel(2, words(training_text)) # By letter pair | ||
|
||
def decode(self, ciphertext): | ||
"""Search for a decoding of the ciphertext.""" | ||
self.ciphertext = ciphertext | ||
self.ciphertext = canonicalize(ciphertext) | ||
# reduce domain to speed up search | ||
self.chardomain = {c for c in self.ciphertext if c is not ' '} | ||
problem = PermutationDecoderProblem(decoder=self) | ||
return search.best_first_tree_search( | ||
solution = search.best_first_graph_search( | ||
problem, lambda node: self.score(node.state)) | ||
print(solution.state, len(solution.state)) | ||
solution.state[' '] = ' ' | ||
return translate(self.ciphertext, lambda c: solution.state[c]) | ||
|
||
|
||
def score(self, code): | ||
"""Score is product of word scores, unigram scores, and bigram scores. | ||
This can get very small, so we use logs and exp.""" | ||
|
||
# TODO: Implement the permutation_decode function | ||
text = permutation_decode(self.ciphertext, code) # noqa | ||
# remake code dictionary to contain translation for all characters | ||
full_code = code.copy() | ||
full_code.update({x:x for x in self.chardomain if x not in code}) | ||
full_code[' '] = ' ' | ||
text = translate(self.ciphertext, lambda c: full_code[c]) | ||
|
||
logP = (sum([log(self.Pwords[word]) for word in words(text)]) + | ||
sum([log(self.P1[c]) for c in text]) + | ||
sum([log(self.P2[b]) for b in bigrams(text)])) | ||
return exp(logP) | ||
# add small positive value to prevent computing log(0) | ||
# TODO: Modify the values to make score more accurate | ||
logP = (sum([log(self.Pwords[word] + 1e-20) for word in words(text)]) + | ||
sum([log(self.P1[c] + 1e-5) for c in text]) + | ||
sum([log(self.P2[b] + 1e-10) for b in bigrams(text)])) | ||
return -exp(logP) | ||
|
||
|
||
class PermutationDecoderProblem(search.Problem): | ||
|
||
def __init__(self, initial=None, goal=None, decoder=None): | ||
self.initial = initial or {} | ||
self.initial = initial or hashabledict() | ||
self.decoder = decoder | ||
|
||
def actions(self, state): | ||
# Find the best | ||
p, plainchar = max([(self.decoder.P1[c], c) | ||
for c in alphabet if c not in state]) | ||
succs = [extend(state, plainchar, cipherchar)] # ???? # noqa | ||
search_list = [c for c in self.decoder.chardomain if c not in state] | ||
target_list = [c for c in alphabet if c not in state.values()] | ||
# Find the best charater to replace | ||
plainchar = argmax(search_list, key=lambda c: self.decoder.P1[c]) | ||
for cipherchar in target_list: | ||
yield (plainchar, cipherchar) | ||
|
||
def result(self, state, action): | ||
new_state = hashabledict(state) # copy to prevent hash issues | ||
assert type(new_state) == hashabledict | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't need this line. |
||
new_state[action[0]] = action[1] | ||
return new_state | ||
|
||
def goal_test(self, state): | ||
"""We're done when we get all 26 letters assigned.""" | ||
return len(state) >= 26 | ||
"""We're done when all letters in search domain are assigned.""" | ||
return len(state) >= len(self.decoder.chardomain) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -568,6 +568,33 @@ def __missing__(self, key): | |
return result | ||
|
||
|
||
class hashabledict(dict): | ||
"""Allows hashing by representing a dictionary as tuple of key:value pairs | ||
May cause problems as the hash value may change during runtime | ||
""" | ||
def __tuplify__(self): | ||
return tuple(sorted(self.items())) | ||
|
||
def __hash__(self): | ||
return hash(self.__tuplify__()) | ||
|
||
def __lt__(self, odict): | ||
assert type(odict) is hashabledict | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be isinstance(odict, hashabledict) |
||
return self.__tuplify__() < odict.__tuplify__() | ||
|
||
def __gt__(self, odict): | ||
assert type(odict) is hashabledict | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be isinstance(odict, hashabledict) |
||
return self.__tuplify__() > odict.__tuplify__() | ||
|
||
def __le__(self, odict): | ||
assert type(odict) is hashabledict | ||
return self.__tuplify__() <= odict.__tuplify__() | ||
|
||
def __ge__(self, odict): | ||
assert type(odict) is hashabledict | ||
return self.__tuplify__() >= odict.__tuplify__() | ||
|
||
|
||
# ______________________________________________________________________________ | ||
# Queues: Stack, FIFOQueue, PriorityQueue | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There should be a comment about how slow this will be.