Skip to content

Commit c2ded84

Browse files
committed
Add tests to NgramCharModel
1 parent 28c4948 commit c2ded84

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

tests/test_text.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,72 @@ def test_text_models():
7474
assert len(P3.dictionary) == 3
7575

7676

77+
def test_char_models():
78+
test_string = 'unigram'
79+
wordseq = words(test_string)
80+
P1 = NgramCharModel(1, wordseq)
81+
82+
assert len(P1.dictionary) == len(test_string)
83+
for char in test_string:
84+
assert tuple(char) in P1.dictionary
85+
86+
test_string = 'a b c'
87+
wordseq = words(test_string)
88+
P1 = NgramCharModel(1, wordseq)
89+
90+
assert len(P1.dictionary) == len(test_string.split())
91+
for char in test_string.split():
92+
assert tuple(char) in P1.dictionary
93+
94+
test_string = 'bigram'
95+
wordseq = words(test_string)
96+
P2 = NgramCharModel(2, wordseq)
97+
98+
expected_bigrams = {(' ', 'b'): 1, ('b', 'i'): 1, ('i', 'g'): 1, ('g', 'r'): 1, ('r', 'a'): 1, ('a', 'm'): 1}
99+
100+
assert len(P2.dictionary) == len(expected_bigrams)
101+
for bigram, count in expected_bigrams.items():
102+
assert bigram in P2.dictionary
103+
assert P2.dictionary[bigram] == count
104+
105+
test_string = 'bigram bigram'
106+
wordseq = words(test_string)
107+
P2 = NgramCharModel(2, wordseq)
108+
109+
expected_bigrams = {(' ', 'b'): 2, ('b', 'i'): 2, ('i', 'g'): 2, ('g', 'r'): 2, ('r', 'a'): 2, ('a', 'm'): 2}
110+
111+
assert len(P2.dictionary) == len(expected_bigrams)
112+
for bigram, count in expected_bigrams.items():
113+
assert bigram in P2.dictionary
114+
assert P2.dictionary[bigram] == count
115+
116+
test_string = 'trigram'
117+
wordseq = words(test_string)
118+
P3 = NgramCharModel(3, wordseq)
119+
120+
expected_trigrams = {(' ', ' ', 't'): 1, (' ', 't', 'r'): 1, ('t', 'r', 'i'): 1,
121+
('r', 'i', 'g'): 1, ('i', 'g', 'r'): 1, ('g', 'r', 'a'): 1,
122+
('r', 'a', 'm'): 1}
123+
124+
assert len(P3.dictionary) == len(expected_trigrams)
125+
for bigram, count in expected_trigrams.items():
126+
assert bigram in P3.dictionary
127+
assert P3.dictionary[bigram] == count
128+
129+
test_string = 'trigram trigram trigram'
130+
wordseq = words(test_string)
131+
P3 = NgramCharModel(3, wordseq)
132+
133+
expected_trigrams = {(' ', ' ', 't'): 3, (' ', 't', 'r'): 3, ('t', 'r', 'i'): 3,
134+
('r', 'i', 'g'): 3, ('i', 'g', 'r'): 3, ('g', 'r', 'a'): 3,
135+
('r', 'a', 'm'): 3}
136+
137+
assert len(P3.dictionary) == len(expected_trigrams)
138+
for bigram, count in expected_trigrams.items():
139+
assert bigram in P3.dictionary
140+
assert P3.dictionary[bigram] == count
141+
142+
77143
def test_viterbi_segmentation():
78144
flatland = DataFile("EN-text/flatland.txt").read()
79145
wordseq = words(flatland)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy