Skip to content

Commit 7351b5f

Browse files
committed
Cleanup for some problems in tsearch patch:
- ispell initialization crashed on empty dictionary file - ispell initialization crashed on affix file with prefixes but no suffixes - stop words file was run through pg_verify_mbstr, with database encoding, but it's supposed to be UTF-8; similar bug for synonym files - bunch of comments added, typos fixed, and other cleanup Introduced consistent encoding checking/conversion of data read from tsearch configuration files, by doing this in a single t_readline() subroutine (replacing direct usages of fgets). Cleaned up API for readstopwords too. Heikki Linnakangas
1 parent b918bf8 commit 7351b5f

File tree

14 files changed

+341
-328
lines changed

14 files changed

+341
-328
lines changed

src/backend/snowball/dict_snowball.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -192,7 +192,6 @@ dsnowball_init(PG_FUNCTION_ARGS)
192192
ListCell *l;
193193

194194
d = (DictSnowball *) palloc0(sizeof(DictSnowball));
195-
d->stoplist.wordop = recode_and_lowerstr;
196195

197196
foreach(l, dictoptions)
198197
{
@@ -204,8 +203,7 @@ dsnowball_init(PG_FUNCTION_ARGS)
204203
ereport(ERROR,
205204
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
206205
errmsg("multiple StopWords parameters")));
207-
readstoplist(defGetString(defel), &d->stoplist);
208-
sortstoplist(&d->stoplist);
206+
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
209207
stoploaded = true;
210208
}
211209
else if (pg_strcasecmp("Language", defel->defname) == 0)

src/backend/tsearch/dict_ispell.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -39,7 +39,6 @@ dispell_init(PG_FUNCTION_ARGS)
3939
ListCell *l;
4040

4141
d = (DictISpell *) palloc0(sizeof(DictISpell));
42-
d->stoplist.wordop = recode_and_lowerstr;
4342

4443
foreach(l, dictoptions)
4544
{
@@ -73,8 +72,7 @@ dispell_init(PG_FUNCTION_ARGS)
7372
ereport(ERROR,
7473
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
7574
errmsg("multiple StopWords parameters")));
76-
readstoplist(defGetString(defel), &(d->stoplist));
77-
sortstoplist(&(d->stoplist));
75+
readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
7876
stoploaded = true;
7977
}
8078
else

src/backend/tsearch/dict_simple.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,19 +23,17 @@
2323
typedef struct
2424
{
2525
StopList stoplist;
26-
} DictExample;
26+
} DictSimple;
2727

2828

2929
Datum
3030
dsimple_init(PG_FUNCTION_ARGS)
3131
{
3232
List *dictoptions = (List *) PG_GETARG_POINTER(0);
33-
DictExample *d = (DictExample *) palloc0(sizeof(DictExample));
33+
DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple));
3434
bool stoploaded = false;
3535
ListCell *l;
3636

37-
d->stoplist.wordop = recode_and_lowerstr;
38-
3937
foreach(l, dictoptions)
4038
{
4139
DefElem *defel = (DefElem *) lfirst(l);
@@ -46,8 +44,7 @@ dsimple_init(PG_FUNCTION_ARGS)
4644
ereport(ERROR,
4745
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4846
errmsg("multiple StopWords parameters")));
49-
readstoplist(defGetString(defel), &d->stoplist);
50-
sortstoplist(&d->stoplist);
47+
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
5148
stoploaded = true;
5249
}
5350
else
@@ -65,16 +62,16 @@ dsimple_init(PG_FUNCTION_ARGS)
6562
Datum
6663
dsimple_lexize(PG_FUNCTION_ARGS)
6764
{
68-
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
65+
DictSimple *d = (DictSimple *) PG_GETARG_POINTER(0);
6966
char *in = (char *) PG_GETARG_POINTER(1);
7067
int32 len = PG_GETARG_INT32(2);
71-
char *txt = lowerstr_with_len(in, len);
68+
char *txt;
7269
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
7370

71+
txt = lowerstr_with_len(in, len);
72+
7473
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
75-
{
7674
pfree(txt);
77-
}
7875
else
7976
res[0].lexeme = txt;
8077

src/backend/tsearch/dict_synonym.c

Lines changed: 48 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.2 2007/08/22 04:13:15 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -20,9 +20,6 @@
2020
#include "tsearch/ts_utils.h"
2121
#include "utils/builtins.h"
2222

23-
24-
#define SYNBUFLEN 4096
25-
2623
typedef struct
2724
{
2825
char *in;
@@ -31,23 +28,34 @@ typedef struct
3128

3229
typedef struct
3330
{
34-
int len;
31+
int len; /* length of syn array */
3532
Syn *syn;
3633
} DictSyn;
3734

35+
/*
36+
* Finds the next whitespace-delimited word within the 'in' string.
37+
* Returns a pointer to the first character of the word, and a pointer
38+
* to the next byte after the last character in the word (in *end).
39+
*/
3840
static char *
3941
findwrd(char *in, char **end)
4042
{
4143
char *start;
4244

43-
*end = NULL;
45+
/* Skip leading spaces */
4446
while (*in && t_isspace(in))
4547
in += pg_mblen(in);
4648

49+
/* Return NULL on empty lines */
4750
if (*in == '\0')
51+
{
52+
*end = NULL;
4853
return NULL;
54+
}
55+
4956
start = in;
5057

58+
/* Find end of word */
5159
while (*in && !t_isspace(in))
5260
in += pg_mblen(in);
5361

@@ -70,12 +78,11 @@ dsynonym_init(PG_FUNCTION_ARGS)
7078
ListCell *l;
7179
char *filename = NULL;
7280
FILE *fin;
73-
char buf[SYNBUFLEN];
7481
char *starti,
7582
*starto,
7683
*end = NULL;
7784
int cur = 0;
78-
int slen;
85+
char *line = NULL;
7986

8087
foreach(l, dictoptions)
8188
{
@@ -105,10 +112,33 @@ dsynonym_init(PG_FUNCTION_ARGS)
105112

106113
d = (DictSyn *) palloc0(sizeof(DictSyn));
107114

108-
while (fgets(buf, SYNBUFLEN, fin))
115+
while ((line = t_readline(fin)) != NULL)
109116
{
110-
slen = strlen(buf);
111-
pg_verifymbstr(buf, slen, false);
117+
starti = findwrd(line, &end);
118+
if (!starti)
119+
{
120+
/* Empty line */
121+
goto skipline;
122+
}
123+
*end = '\0';
124+
if (end >= line + strlen(line))
125+
{
126+
/* A line with only one word. Ignore silently. */
127+
goto skipline;
128+
}
129+
130+
starto = findwrd(end + 1, &end);
131+
if (!starto)
132+
{
133+
/* A line with only one word. Ignore silently. */
134+
goto skipline;
135+
}
136+
*end = '\0';
137+
138+
/* starti now points to the first word, and starto to the second
139+
* word on the line, with a \0 terminator at the end of both words.
140+
*/
141+
112142
if (cur == d->len)
113143
{
114144
if (d->len == 0)
@@ -123,36 +153,19 @@ dsynonym_init(PG_FUNCTION_ARGS)
123153
}
124154
}
125155

126-
starti = findwrd(buf, &end);
127-
if (!starti)
128-
continue;
129-
*end = '\0';
130-
if (end >= buf + slen)
131-
continue;
132-
133-
starto = findwrd(end + 1, &end);
134-
if (!starto)
135-
continue;
136-
*end = '\0';
137-
138-
d->syn[cur].in = recode_and_lowerstr(starti);
139-
d->syn[cur].out = recode_and_lowerstr(starto);
140-
if (!(d->syn[cur].in && d->syn[cur].out))
141-
{
142-
FreeFile(fin);
143-
ereport(ERROR,
144-
(errcode(ERRCODE_OUT_OF_MEMORY),
145-
errmsg("out of memory")));
146-
}
156+
d->syn[cur].in = lowerstr(starti);
157+
d->syn[cur].out = lowerstr(starto);
147158

148159
cur++;
160+
161+
skipline:
162+
pfree(line);
149163
}
150164

151165
FreeFile(fin);
152166

153167
d->len = cur;
154-
if (cur > 1)
155-
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
168+
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
156169

157170
PG_RETURN_POINTER(d);
158171
}
@@ -179,8 +192,7 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
179192
if (!found)
180193
PG_RETURN_POINTER(NULL);
181194

182-
res = palloc(sizeof(TSLexeme) * 2);
183-
memset(res, 0, sizeof(TSLexeme) * 2);
195+
res = palloc0(sizeof(TSLexeme) * 2);
184196
res[0].lexeme = pstrdup(found->out);
185197

186198
PG_RETURN_POINTER(res);

src/backend/tsearch/dict_thesaurus.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -170,10 +170,10 @@ static void
170170
thesaurusRead(char *filename, DictThesaurus * d)
171171
{
172172
FILE *fh;
173-
char str[BUFSIZ];
174173
int lineno = 0;
175174
uint16 idsubst = 0;
176175
bool useasis = false;
176+
char *line;
177177

178178
filename = get_tsearch_config_filename(filename, "ths");
179179
fh = AllocateFile(filename, "r");
@@ -183,27 +183,28 @@ thesaurusRead(char *filename, DictThesaurus * d)
183183
errmsg("could not open thesaurus file \"%s\": %m",
184184
filename)));
185185

186-
while (fgets(str, sizeof(str), fh))
186+
while ((line = t_readline(fh)) != NULL)
187187
{
188-
char *ptr,
189-
*recoded;
188+
char *ptr;
190189
int state = TR_WAITLEX;
191190
char *beginwrd = NULL;
192191
uint16 posinsubst = 0;
193192
uint16 nwrd = 0;
194193

195-
ptr = recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
196-
GetDatabaseEncoding(), PG_UTF8);
197-
if (recoded == NULL)
198-
elog(ERROR, "encoding conversion failed");
199-
200194
lineno++;
201195

202-
/* is it comment ? */
203-
while (t_isspace(ptr))
196+
ptr = line;
197+
198+
/* is it a comment? */
199+
while (*ptr && t_isspace(ptr))
204200
ptr += pg_mblen(ptr);
205-
if (t_iseq(recoded, '#') || *recoded == '\0' || t_iseq(recoded, '\n') || t_iseq(recoded, '\r'))
201+
202+
if (t_iseq(ptr, '#') || *ptr == '\0' ||
203+
t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
204+
{
205+
pfree(line);
206206
continue;
207+
}
207208

208209
while (*ptr)
209210
{
@@ -301,8 +302,7 @@ thesaurusRead(char *filename, DictThesaurus * d)
301302
lineno, filename)));
302303
}
303304

304-
if (recoded != str)
305-
pfree(recoded);
305+
pfree(line);
306306
}
307307

308308
d->nsubst = idsubst;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy