Skip to content

Implemented PermutationDecoder #456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ def test_shift_decoding():
assert msg == 'This is a secret message.'


def test_permutation_decoder():
gutenberg = DataFile("EN-text/gutenberg.txt").read()
flatland = DataFile("EN-text/flatland.txt").read()

pd = PermutationDecoder(canonicalize(gutenberg))
assert pd.decode('aba') in ('ece', 'ete', 'tat', 'tit', 'txt')

pd = PermutationDecoder(canonicalize(flatland))
assert pd.decode('aba') in ('ded', 'did', 'ece', 'ele', 'eme', 'ere', 'eve', 'eye', 'iti', 'mom', 'ses', 'tat', 'tit')


def test_rot13_encoding():
code = rot13('Hello, world!')

Expand Down
55 changes: 37 additions & 18 deletions text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Then we show a very simple Information Retrieval system, and an example
working on a tiny sample of Unix manual pages."""

from utils import argmin
from utils import argmin, argmax, hashabledict
from learning import CountingProbDist
import search

Expand Down Expand Up @@ -60,7 +60,7 @@ def add_sequence(self, words):
n = self.n
words = self.add_empty(words, n)

for i in range(len(words) - n):
for i in range(len(words) - n + 1):
self.add(tuple(words[i:i + n]))

def samples(self, nwords):
Expand Down Expand Up @@ -350,40 +350,59 @@ class PermutationDecoder:
def __init__(self, training_text, ciphertext=None):
self.Pwords = UnigramTextModel(words(training_text))
self.P1 = UnigramTextModel(training_text) # By letter
self.P2 = NgramTextModel(2, training_text) # By letter pair
self.P2 = NgramTextModel(2, words(training_text)) # By letter pair

def decode(self, ciphertext):
"""Search for a decoding of the ciphertext."""
self.ciphertext = ciphertext
self.ciphertext = canonicalize(ciphertext)
# reduce domain to speed up search
self.chardomain = {c for c in self.ciphertext if c is not ' '}
problem = PermutationDecoderProblem(decoder=self)
return search.best_first_tree_search(
solution = search.best_first_graph_search(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be a comment about how slow this will be.

problem, lambda node: self.score(node.state))
print(solution.state, len(solution.state))
solution.state[' '] = ' '
return translate(self.ciphertext, lambda c: solution.state[c])


def score(self, code):
"""Score is product of word scores, unigram scores, and bigram scores.
This can get very small, so we use logs and exp."""

# TODO: Implement the permutation_decode function
text = permutation_decode(self.ciphertext, code) # noqa
# remake code dictionary to contain translation for all characters
full_code = code.copy()
full_code.update({x:x for x in self.chardomain if x not in code})
full_code[' '] = ' '
text = translate(self.ciphertext, lambda c: full_code[c])

logP = (sum([log(self.Pwords[word]) for word in words(text)]) +
sum([log(self.P1[c]) for c in text]) +
sum([log(self.P2[b]) for b in bigrams(text)]))
return exp(logP)
# add small positive value to prevent computing log(0)
# TODO: Modify the values to make score more accurate
logP = (sum([log(self.Pwords[word] + 1e-20) for word in words(text)]) +
sum([log(self.P1[c] + 1e-5) for c in text]) +
sum([log(self.P2[b] + 1e-10) for b in bigrams(text)]))
return -exp(logP)


class PermutationDecoderProblem(search.Problem):

def __init__(self, initial=None, goal=None, decoder=None):
self.initial = initial or {}
self.initial = initial or hashabledict()
self.decoder = decoder

def actions(self, state):
# Find the best
p, plainchar = max([(self.decoder.P1[c], c)
for c in alphabet if c not in state])
succs = [extend(state, plainchar, cipherchar)] # ???? # noqa
search_list = [c for c in self.decoder.chardomain if c not in state]
target_list = [c for c in alphabet if c not in state.values()]
# Find the best charater to replace
plainchar = argmax(search_list, key=lambda c: self.decoder.P1[c])
for cipherchar in target_list:
yield (plainchar, cipherchar)

def result(self, state, action):
new_state = hashabledict(state) # copy to prevent hash issues
assert type(new_state) == hashabledict
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need this line.

new_state[action[0]] = action[1]
return new_state

def goal_test(self, state):
"""We're done when we get all 26 letters assigned."""
return len(state) >= 26
"""We're done when all letters in search domain are assigned."""
return len(state) >= len(self.decoder.chardomain)
27 changes: 27 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,33 @@ def __missing__(self, key):
return result


class hashabledict(dict):
"""Allows hashing by representing a dictionary as tuple of key:value pairs
May cause problems as the hash value may change during runtime
"""
def __tuplify__(self):
return tuple(sorted(self.items()))

def __hash__(self):
return hash(self.__tuplify__())

def __lt__(self, odict):
assert type(odict) is hashabledict
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be isinstance(odict, hashabledict)

return self.__tuplify__() < odict.__tuplify__()

def __gt__(self, odict):
assert type(odict) is hashabledict
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be isinstance(odict, hashabledict)

return self.__tuplify__() > odict.__tuplify__()

def __le__(self, odict):
assert type(odict) is hashabledict
return self.__tuplify__() <= odict.__tuplify__()

def __ge__(self, odict):
assert type(odict) is hashabledict
return self.__tuplify__() >= odict.__tuplify__()


# ______________________________________________________________________________
# Queues: Stack, FIFOQueue, PriorityQueue

Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy