From 7899ec4f772f48e3037ee664e4dc0fb07fc49218 Mon Sep 17 00:00:00 2001 From: Lucas Moura Date: Fri, 24 Mar 2017 14:00:57 -0300 Subject: [PATCH 1/2] Fix NgramTextModel bug --- text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text.py b/text.py index 855e89aaf..e064b6049 100644 --- a/text.py +++ b/text.py @@ -55,7 +55,7 @@ def add_sequence(self, words): Prefix some copies of the empty word, '', to make the start work.""" n = self.n words = ['', ] * (n - 1) + words - for i in range(len(words) - n): + for i in range(len(words) - n + 1): self.add(tuple(words[i:i + n])) def samples(self, nwords): From 96dbe26c5125e8d35d9c3d4a8e2363f84925dfa0 Mon Sep 17 00:00:00 2001 From: Lucas Moura Date: Fri, 24 Mar 2017 14:01:12 -0300 Subject: [PATCH 2/2] Add new tests for NgramTextModel --- tests/test_text.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_text.py b/tests/test_text.py index 577ad661b..d884e02a2 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -47,6 +47,32 @@ def test_text_models(): assert P3.cond_prob['in', 'order'].dictionary == {'to': 6} + test_string = 'unigram' + wordseq = words(test_string) + + P1 = UnigramTextModel(wordseq) + + assert P1.dictionary == {('unigram'): 1} + + test_string = 'bigram text' + wordseq = words(test_string) + + P2 = NgramTextModel(2, wordseq) + + assert (P2.dictionary == {('', 'bigram'): 1, ('bigram', 'text'): 1} or + P2.dictionary == {('bigram', 'text'): 1, ('', 'bigram'): 1}) + + + test_string = 'test trigram text' + wordseq = words(test_string) + + P3 = NgramTextModel(3, wordseq) + + assert ('', '', 'test') in P3.dictionary + assert ('', 'test', 'trigram') in P3.dictionary + assert ('test', 'trigram', 'text') in P3.dictionary + assert len(P3.dictionary) == 3 + def test_viterbi_segmentation(): flatland = DataFile("EN-text/flatland.txt").read() pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy