Skip to content

Commit 25bd9ce

Browse files
committed
Add matchorig, matchsynonyms, and keepsynonyms options to contrib/dict_xsyn.
Sergey Karpov
1 parent 23dc89d commit 25bd9ce

File tree

4 files changed

+282
-52
lines changed

4 files changed

+282
-52
lines changed

contrib/dict_xsyn/dict_xsyn.c

Lines changed: 69 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2007-2009, PostgreSQL Global Development Group
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.6 2009/01/01 17:23:32 momjian Exp $
9+
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.7 2009/08/05 18:06:49 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -33,7 +33,10 @@ typedef struct
3333
int len;
3434
Syn *syn;
3535

36+
bool matchorig;
3637
bool keeporig;
38+
bool matchsynonyms;
39+
bool keepsynonyms;
3740
} DictSyn;
3841

3942

@@ -88,34 +91,45 @@ read_dictionary(DictSyn *d, char *filename)
8891
{
8992
char *value;
9093
char *key;
91-
char *end = NULL;
94+
char *pos;
95+
char *end;
9296

9397
if (*line == '\0')
9498
continue;
9599

96100
value = lowerstr(line);
97101
pfree(line);
98102

99-
key = find_word(value, &end);
100-
if (!key)
103+
pos = value;
104+
while ((key = find_word(pos, &end)) != NULL)
101105
{
102-
pfree(value);
103-
continue;
104-
}
106+
/* Enlarge syn structure if full */
107+
if (cur == d->len)
108+
{
109+
d->len = (d->len > 0) ? 2 * d->len : 16;
110+
if (d->syn)
111+
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
112+
else
113+
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
114+
}
105115

106-
if (cur == d->len)
107-
{
108-
d->len = (d->len > 0) ? 2 * d->len : 16;
109-
if (d->syn)
110-
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
111-
else
112-
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
113-
}
116+
/* Save first word only if we will match it */
117+
if (pos != value || d->matchorig)
118+
{
119+
d->syn[cur].key = pnstrdup(key, end - key);
120+
d->syn[cur].value = pstrdup(value);
114121

115-
d->syn[cur].key = pnstrdup(key, end - key);
116-
d->syn[cur].value = value;
122+
cur++;
123+
}
124+
125+
pos = end;
117126

118-
cur++;
127+
/* Don't bother scanning synonyms if we will not match them */
128+
if (!d->matchsynonyms)
129+
break;
130+
}
131+
132+
pfree(value);
119133
}
120134

121135
tsearch_readline_end(&trst);
@@ -133,23 +147,40 @@ dxsyn_init(PG_FUNCTION_ARGS)
133147
List *dictoptions = (List *) PG_GETARG_POINTER(0);
134148
DictSyn *d;
135149
ListCell *l;
150+
char *filename = NULL;
136151

137152
d = (DictSyn *) palloc0(sizeof(DictSyn));
138153
d->len = 0;
139154
d->syn = NULL;
155+
d->matchorig = true;
140156
d->keeporig = true;
157+
d->matchsynonyms = false;
158+
d->keepsynonyms = true;
141159

142160
foreach(l, dictoptions)
143161
{
144162
DefElem *defel = (DefElem *) lfirst(l);
145163

146-
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
164+
if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0)
165+
{
166+
d->matchorig = defGetBoolean(defel);
167+
}
168+
else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
147169
{
148170
d->keeporig = defGetBoolean(defel);
149171
}
172+
else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0)
173+
{
174+
d->matchsynonyms = defGetBoolean(defel);
175+
}
176+
else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0)
177+
{
178+
d->keepsynonyms = defGetBoolean(defel);
179+
}
150180
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
151181
{
152-
read_dictionary(d, defGetString(defel));
182+
/* we can't read the rules before parsing all options! */
183+
filename = defGetString(defel);
153184
}
154185
else
155186
{
@@ -160,6 +191,9 @@ dxsyn_init(PG_FUNCTION_ARGS)
160191
}
161192
}
162193

194+
if (filename)
195+
read_dictionary(d, filename);
196+
163197
PG_RETURN_POINTER(d);
164198
}
165199

@@ -194,41 +228,33 @@ dxsyn_lexize(PG_FUNCTION_ARGS)
194228

195229
/* Parse string of synonyms and return array of words */
196230
{
197-
char *value = pstrdup(found->value);
198-
int value_length = strlen(value);
199-
char *pos = value;
231+
char *value = found->value;
232+
char *syn;
233+
char *pos;
234+
char *end;
200235
int nsyns = 0;
201-
bool is_first = true;
202236

203-
res = palloc(0);
237+
res = palloc(sizeof(TSLexeme));
204238

205-
while (pos < value + value_length)
239+
pos = value;
240+
while ((syn = find_word(pos, &end)) != NULL)
206241
{
207-
char *end;
208-
char *syn = find_word(pos, &end);
209-
210-
if (!syn)
211-
break;
212-
*end = '\0';
213-
214242
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
215-
res[nsyns].lexeme = NULL;
216243

217-
/* first word is added to result only if KEEPORIG flag is set */
218-
if (d->keeporig || !is_first)
244+
/* The first word is output only if keeporig=true */
245+
if (pos != value || d->keeporig)
219246
{
220-
res[nsyns].lexeme = pstrdup(syn);
221-
res[nsyns + 1].lexeme = NULL;
222-
247+
res[nsyns].lexeme = pnstrdup(syn, end - syn);
223248
nsyns++;
224249
}
225250

226-
is_first = false;
251+
pos = end;
227252

228-
pos = end + 1;
253+
/* Stop if we are not to output the synonyms */
254+
if (!d->keepsynonyms)
255+
break;
229256
}
230-
231-
pfree(value);
257+
res[nsyns].lexeme = NULL;
232258
}
233259

234260
PG_RETURN_POINTER(res);

contrib/dict_xsyn/expected/dict_xsyn.out

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,76 @@
55
SET client_min_messages = warning;
66
\set ECHO none
77
RESET client_min_messages;
8-
--configuration
9-
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
8+
-- default configuration - match first word and return it among with all synonyms
9+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
1010
--lexize
1111
SELECT ts_lexize('xsyn', 'supernova');
12+
ts_lexize
13+
--------------------------
14+
{supernova,sn,sne,1987a}
15+
(1 row)
16+
17+
SELECT ts_lexize('xsyn', 'sn');
18+
ts_lexize
19+
-----------
20+
21+
(1 row)
22+
23+
SELECT ts_lexize('xsyn', 'grb');
24+
ts_lexize
25+
-----------
26+
27+
(1 row)
28+
29+
-- the same, but return only synonyms
30+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
31+
SELECT ts_lexize('xsyn', 'supernova');
32+
ts_lexize
33+
----------------
34+
{sn,sne,1987a}
35+
(1 row)
36+
37+
SELECT ts_lexize('xsyn', 'sn');
38+
ts_lexize
39+
-----------
40+
41+
(1 row)
42+
43+
SELECT ts_lexize('xsyn', 'grb');
44+
ts_lexize
45+
-----------
46+
47+
(1 row)
48+
49+
-- match any word and return all words
50+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
51+
SELECT ts_lexize('xsyn', 'supernova');
52+
ts_lexize
53+
--------------------------
54+
{supernova,sn,sne,1987a}
55+
(1 row)
56+
57+
SELECT ts_lexize('xsyn', 'sn');
58+
ts_lexize
59+
--------------------------
60+
{supernova,sn,sne,1987a}
61+
(1 row)
62+
63+
SELECT ts_lexize('xsyn', 'grb');
64+
ts_lexize
65+
-----------
66+
67+
(1 row)
68+
69+
-- match any word and return all words except first one
70+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
71+
SELECT ts_lexize('xsyn', 'supernova');
72+
ts_lexize
73+
----------------
74+
{sn,sne,1987a}
75+
(1 row)
76+
77+
SELECT ts_lexize('xsyn', 'sn');
1278
ts_lexize
1379
----------------
1480
{sn,sne,1987a}
@@ -20,3 +86,63 @@ SELECT ts_lexize('xsyn', 'grb');
2086

2187
(1 row)
2288

89+
-- match any synonym but not first word, and return first word instead
90+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
91+
SELECT ts_lexize('xsyn', 'supernova');
92+
ts_lexize
93+
-----------
94+
95+
(1 row)
96+
97+
SELECT ts_lexize('xsyn', 'sn');
98+
ts_lexize
99+
-------------
100+
{supernova}
101+
(1 row)
102+
103+
SELECT ts_lexize('xsyn', 'grb');
104+
ts_lexize
105+
-----------
106+
107+
(1 row)
108+
109+
-- do not match or return anything
110+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
111+
SELECT ts_lexize('xsyn', 'supernova');
112+
ts_lexize
113+
-----------
114+
115+
(1 row)
116+
117+
SELECT ts_lexize('xsyn', 'sn');
118+
ts_lexize
119+
-----------
120+
121+
(1 row)
122+
123+
SELECT ts_lexize('xsyn', 'grb');
124+
ts_lexize
125+
-----------
126+
127+
(1 row)
128+
129+
-- match any word but return nothing
130+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
131+
SELECT ts_lexize('xsyn', 'supernova');
132+
ts_lexize
133+
-----------
134+
{}
135+
(1 row)
136+
137+
SELECT ts_lexize('xsyn', 'sn');
138+
ts_lexize
139+
-----------
140+
{}
141+
(1 row)
142+
143+
SELECT ts_lexize('xsyn', 'grb');
144+
ts_lexize
145+
-----------
146+
147+
(1 row)
148+

contrib/dict_xsyn/sql/dict_xsyn.sql

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,46 @@ SET client_min_messages = warning;
88
\set ECHO all
99
RESET client_min_messages;
1010

11-
--configuration
12-
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
11+
-- default configuration - match first word and return it among with all synonyms
12+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
1313

1414
--lexize
1515
SELECT ts_lexize('xsyn', 'supernova');
16+
SELECT ts_lexize('xsyn', 'sn');
17+
SELECT ts_lexize('xsyn', 'grb');
18+
19+
-- the same, but return only synonyms
20+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
21+
SELECT ts_lexize('xsyn', 'supernova');
22+
SELECT ts_lexize('xsyn', 'sn');
23+
SELECT ts_lexize('xsyn', 'grb');
24+
25+
-- match any word and return all words
26+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
27+
SELECT ts_lexize('xsyn', 'supernova');
28+
SELECT ts_lexize('xsyn', 'sn');
29+
SELECT ts_lexize('xsyn', 'grb');
30+
31+
-- match any word and return all words except first one
32+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
33+
SELECT ts_lexize('xsyn', 'supernova');
34+
SELECT ts_lexize('xsyn', 'sn');
35+
SELECT ts_lexize('xsyn', 'grb');
36+
37+
-- match any synonym but not first word, and return first word instead
38+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
39+
SELECT ts_lexize('xsyn', 'supernova');
40+
SELECT ts_lexize('xsyn', 'sn');
41+
SELECT ts_lexize('xsyn', 'grb');
42+
43+
-- do not match or return anything
44+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
45+
SELECT ts_lexize('xsyn', 'supernova');
46+
SELECT ts_lexize('xsyn', 'sn');
47+
SELECT ts_lexize('xsyn', 'grb');
48+
49+
-- match any word but return nothing
50+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
51+
SELECT ts_lexize('xsyn', 'supernova');
52+
SELECT ts_lexize('xsyn', 'sn');
1653
SELECT ts_lexize('xsyn', 'grb');

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy