diff --git a/tests/test_text.py b/tests/test_text.py index 577ad661b..d884e02a2 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -47,6 +47,32 @@ def test_text_models(): assert P3.cond_prob['in', 'order'].dictionary == {'to': 6} + test_string = 'unigram' + wordseq = words(test_string) + + P1 = UnigramTextModel(wordseq) + + assert P1.dictionary == {('unigram'): 1} + + test_string = 'bigram text' + wordseq = words(test_string) + + P2 = NgramTextModel(2, wordseq) + + assert (P2.dictionary == {('', 'bigram'): 1, ('bigram', 'text'): 1} or + P2.dictionary == {('bigram', 'text'): 1, ('', 'bigram'): 1}) + + + test_string = 'test trigram text' + wordseq = words(test_string) + + P3 = NgramTextModel(3, wordseq) + + assert ('', '', 'test') in P3.dictionary + assert ('', 'test', 'trigram') in P3.dictionary + assert ('test', 'trigram', 'text') in P3.dictionary + assert len(P3.dictionary) == 3 + def test_viterbi_segmentation(): flatland = DataFile("EN-text/flatland.txt").read() diff --git a/text.py b/text.py index 855e89aaf..e064b6049 100644 --- a/text.py +++ b/text.py @@ -55,7 +55,7 @@ def add_sequence(self, words): Prefix some copies of the empty word, '', to make the start work.""" n = self.n words = ['', ] * (n - 1) + words - for i in range(len(words) - n): + for i in range(len(words) - n + 1): self.add(tuple(words[i:i + n])) def samples(self, nwords): pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy