diff --git a/tests/test_text.py b/tests/test_text.py index 577ad661b..d884e02a2 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -47,6 +47,32 @@ def test_text_models(): assert P3.cond_prob['in', 'order'].dictionary == {'to': 6} + test_string = 'unigram' + wordseq = words(test_string) + + P1 = UnigramTextModel(wordseq) + + assert P1.dictionary == {('unigram'): 1} + + test_string = 'bigram text' + wordseq = words(test_string) + + P2 = NgramTextModel(2, wordseq) + + assert (P2.dictionary == {('', 'bigram'): 1, ('bigram', 'text'): 1} or + P2.dictionary == {('bigram', 'text'): 1, ('', 'bigram'): 1}) + + + test_string = 'test trigram text' + wordseq = words(test_string) + + P3 = NgramTextModel(3, wordseq) + + assert ('', '', 'test') in P3.dictionary + assert ('', 'test', 'trigram') in P3.dictionary + assert ('test', 'trigram', 'text') in P3.dictionary + assert len(P3.dictionary) == 3 + def test_viterbi_segmentation(): flatland = DataFile("EN-text/flatland.txt").read() diff --git a/text.py b/text.py index 855e89aaf..e064b6049 100644 --- a/text.py +++ b/text.py @@ -55,7 +55,7 @@ def add_sequence(self, words): Prefix some copies of the empty word, '', to make the start work.""" n = self.n words = ['', ] * (n - 1) + words - for i in range(len(words) - n): + for i in range(len(words) - n + 1): self.add(tuple(words[i:i + n])) def samples(self, nwords):
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: