Skip to content

Commit 4b06c18

Browse files
committed
The data structure used in unaccent is a trie, not suffix tree.
Fix the term used in variable and struct names, and comments. Alexander Korotkov
1 parent 2ffa66f commit 4b06c18

File tree

1 file changed

+28
-28
lines changed

1 file changed

+28
-28
lines changed

contrib/unaccent/unaccent.c

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,29 @@
2323
PG_MODULE_MAGIC;
2424

2525
/*
26-
* Unaccent dictionary uses uncompressed suffix tree to find a
27-
* character to replace. Each node of tree is an array of
28-
* SuffixChar struct with length = 256 (n-th element of array
26+
* Unaccent dictionary uses a trie to find a character to replace. Each node of
27+
* the trie is an array of 256 TrieChar structs (n-th element of array
2928
* corresponds to byte)
3029
*/
31-
typedef struct SuffixChar
30+
typedef struct TrieChar
3231
{
33-
struct SuffixChar *nextChar;
32+
struct TrieChar *nextChar;
3433
char *replaceTo;
3534
int replacelen;
36-
} SuffixChar;
35+
} TrieChar;
3736

3837
/*
39-
* placeChar - put str into tree's structure, byte by byte.
38+
* placeChar - put str into trie's structure, byte by byte.
4039
*/
41-
static SuffixChar *
42-
placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
40+
static TrieChar *
41+
placeChar(TrieChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
4342
{
44-
SuffixChar *curnode;
43+
TrieChar *curnode;
4544

4645
if (!node)
4746
{
48-
node = palloc(sizeof(SuffixChar) * 256);
49-
memset(node, 0, sizeof(SuffixChar) * 256);
47+
node = palloc(sizeof(TrieChar) * 256);
48+
memset(node, 0, sizeof(TrieChar) * 256);
5049
}
5150

5251
curnode = node + *str;
@@ -71,13 +70,14 @@ placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int
7170
}
7271

7372
/*
74-
* initSuffixTree - create suffix tree from file. Function converts
75-
* UTF8-encoded file into current encoding.
73+
* initTrie - create trie from file.
74+
*
75+
* Function converts UTF8-encoded file into current encoding.
7676
*/
77-
static SuffixChar *
78-
initSuffixTree(char *filename)
77+
static TrieChar *
78+
initTrie(char *filename)
7979
{
80-
SuffixChar *volatile rootSuffixTree = NULL;
80+
TrieChar *volatile rootTrie = NULL;
8181
MemoryContext ccxt = CurrentMemoryContext;
8282
tsearch_readline_state trst;
8383
volatile bool skip;
@@ -161,7 +161,7 @@ initSuffixTree(char *filename)
161161
}
162162

163163
if (state >= 3)
164-
rootSuffixTree = placeChar(rootSuffixTree,
164+
rootTrie = placeChar(rootTrie,
165165
(unsigned char *) src, srclen,
166166
trg, trglen);
167167

@@ -192,14 +192,14 @@ initSuffixTree(char *filename)
192192

193193
tsearch_readline_end(&trst);
194194

195-
return rootSuffixTree;
195+
return rootTrie;
196196
}
197197

198198
/*
199-
* findReplaceTo - find multibyte character in tree
199+
* findReplaceTo - find multibyte character in trie
200200
*/
201-
static SuffixChar *
202-
findReplaceTo(SuffixChar *node, unsigned char *src, int srclen)
201+
static TrieChar *
202+
findReplaceTo(TrieChar *node, unsigned char *src, int srclen)
203203
{
204204
while (node)
205205
{
@@ -221,7 +221,7 @@ Datum
221221
unaccent_init(PG_FUNCTION_ARGS)
222222
{
223223
List *dictoptions = (List *) PG_GETARG_POINTER(0);
224-
SuffixChar *rootSuffixTree = NULL;
224+
TrieChar *rootTrie = NULL;
225225
bool fileloaded = false;
226226
ListCell *l;
227227

@@ -235,7 +235,7 @@ unaccent_init(PG_FUNCTION_ARGS)
235235
ereport(ERROR,
236236
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
237237
errmsg("multiple Rules parameters")));
238-
rootSuffixTree = initSuffixTree(defGetString(defel));
238+
rootTrie = initTrie(defGetString(defel));
239239
fileloaded = true;
240240
}
241241
else
@@ -254,29 +254,29 @@ unaccent_init(PG_FUNCTION_ARGS)
254254
errmsg("missing Rules parameter")));
255255
}
256256

257-
PG_RETURN_POINTER(rootSuffixTree);
257+
PG_RETURN_POINTER(rootTrie);
258258
}
259259

260260
PG_FUNCTION_INFO_V1(unaccent_lexize);
261261
Datum unaccent_lexize(PG_FUNCTION_ARGS);
262262
Datum
263263
unaccent_lexize(PG_FUNCTION_ARGS)
264264
{
265-
SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0);
265+
TrieChar *rootTrie = (TrieChar *) PG_GETARG_POINTER(0);
266266
char *srcchar = (char *) PG_GETARG_POINTER(1);
267267
int32 len = PG_GETARG_INT32(2);
268268
char *srcstart,
269269
*trgchar = NULL;
270270
int charlen;
271271
TSLexeme *res = NULL;
272-
SuffixChar *node;
272+
TrieChar *node;
273273

274274
srcstart = srcchar;
275275
while (srcchar - srcstart < len)
276276
{
277277
charlen = pg_mblen(srcchar);
278278

279-
node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen);
279+
node = findReplaceTo(rootTrie, (unsigned char *) srcchar, charlen);
280280
if (node && node->replaceTo)
281281
{
282282
if (!res)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy