Skip to content

Commit 1ea47dd

Browse files
committed
Fix shared tsvector/tsquery input code so that we don't say "syntax error in
tsvector" when we are really parsing a tsquery. Report the bogus input, too. Make styles of some related error messages more consistent.
1 parent dfc6f13 commit 1ea47dd

File tree

4 files changed

+81
-68
lines changed

4 files changed

+81
-68
lines changed

src/backend/utils/adt/tsquery.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.7 2007/09/11 16:01:40 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.8 2007/10/21 22:29:56 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -141,7 +141,7 @@ gettoken_query(TSQueryParserState state,
141141
{
142142
ereport(ERROR,
143143
(errcode(ERRCODE_SYNTAX_ERROR),
144-
errmsg("syntax error at start of operand in tsearch query: \"%s\"",
144+
errmsg("syntax error in tsquery: \"%s\"",
145145
state->buffer)));
146146
}
147147
else if (!t_isspace(state->buf))
@@ -159,7 +159,7 @@ gettoken_query(TSQueryParserState state,
159159
else
160160
ereport(ERROR,
161161
(errcode(ERRCODE_SYNTAX_ERROR),
162-
errmsg("no operand in tsearch query: \"%s\"",
162+
errmsg("no operand in tsquery: \"%s\"",
163163
state->buffer)));
164164
}
165165
break;
@@ -232,12 +232,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
232232
if (distance >= MAXSTRPOS)
233233
ereport(ERROR,
234234
(errcode(ERRCODE_SYNTAX_ERROR),
235-
errmsg("value is too big in tsearch query: \"%s\"",
235+
errmsg("value is too big in tsquery: \"%s\"",
236236
state->buffer)));
237237
if (lenval >= MAXSTRLEN)
238238
ereport(ERROR,
239239
(errcode(ERRCODE_SYNTAX_ERROR),
240-
errmsg("operand is too long in tsearch query: \"%s\"",
240+
errmsg("operand is too long in tsquery: \"%s\"",
241241
state->buffer)));
242242

243243
tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
@@ -264,7 +264,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
264264
if (lenval >= MAXSTRLEN)
265265
ereport(ERROR,
266266
(errcode(ERRCODE_SYNTAX_ERROR),
267-
errmsg("word is too long in tsearch query: \"%s\"",
267+
errmsg("word is too long in tsquery: \"%s\"",
268268
state->buffer)));
269269

270270
INIT_CRC32(valcrc);
@@ -372,7 +372,7 @@ makepol(TSQueryParserState state,
372372
default:
373373
ereport(ERROR,
374374
(errcode(ERRCODE_SYNTAX_ERROR),
375-
errmsg("syntax error in tsearch query: \"%s\"",
375+
errmsg("syntax error in tsquery: \"%s\"",
376376
state->buffer)));
377377
}
378378
}
@@ -478,7 +478,7 @@ parse_tsquery(char *buf,
478478
state.polstr = NIL;
479479

480480
/* init value parser's state */
481-
state.valstate = init_tsvector_parser(NULL, true);
481+
state.valstate = init_tsvector_parser(state.buffer, true, true);
482482

483483
/* init list of operand */
484484
state.sumlen = 0;

src/backend/utils/adt/tsvector.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -204,7 +204,7 @@ tsvectorin(PG_FUNCTION_ARGS)
204204

205205
pg_verifymbstr(buf, strlen(buf), false);
206206

207-
state = init_tsvector_parser(buf, false);
207+
state = init_tsvector_parser(buf, false, false);
208208

209209
arrlen = 64;
210210
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
@@ -224,7 +224,7 @@ tsvectorin(PG_FUNCTION_ARGS)
224224
if (cur - tmpbuf > MAXSTRPOS)
225225
ereport(ERROR,
226226
(errcode(ERRCODE_SYNTAX_ERROR),
227-
errmsg("position value too large")));
227+
errmsg("position value is too large")));
228228

229229
/*
230230
* Enlarge buffers if needed
@@ -496,7 +496,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
496496
datalen += lex_len;
497497

498498
if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0)
499-
elog(ERROR, "lexemes are unordered");
499+
elog(ERROR, "lexemes are misordered");
500500

501501
/* Receive positions */
502502

@@ -523,7 +523,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
523523
{
524524
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
525525
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
526-
elog(ERROR, "position information is unordered");
526+
elog(ERROR, "position information is misordered");
527527
}
528528

529529
datalen += (npos + 1) * sizeof(WordEntry);

src/backend/utils/adt/tsvector_parser.c

Lines changed: 63 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.2 2007/10/21 22:29:56 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -20,35 +20,49 @@
2020
#include "tsearch/ts_utils.h"
2121
#include "utils/memutils.h"
2222

23+
24+
/*
25+
* Private state of tsvector parser. Note that tsquery also uses this code to
26+
* parse its input, hence the boolean flags. The two flags are both true or
27+
* both false in current usage, but we keep them separate for clarity.
28+
* is_tsquery affects *only* the content of error messages.
29+
*/
2330
struct TSVectorParseStateData
2431
{
25-
char *prsbuf;
26-
char *word; /* buffer to hold the current word */
27-
int len; /* size in bytes allocated for 'word' */
28-
bool oprisdelim;
32+
char *prsbuf; /* next input character */
33+
char *bufstart; /* whole string (used only for errors) */
34+
char *word; /* buffer to hold the current word */
35+
int len; /* size in bytes allocated for 'word' */
36+
int eml; /* max bytes per character */
37+
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
38+
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
2939
};
3040

41+
3142
/*
3243
* Initializes parser for the input string. If oprisdelim is set, the
3344
* following characters are treated as delimiters in addition to whitespace:
3445
* ! | & ( )
3546
*/
3647
TSVectorParseState
37-
init_tsvector_parser(char *input, bool oprisdelim)
48+
init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
3849
{
3950
TSVectorParseState state;
4051

4152
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
4253
state->prsbuf = input;
54+
state->bufstart = input;
4355
state->len = 32;
4456
state->word = (char *) palloc(state->len);
57+
state->eml = pg_database_encoding_max_length();
4558
state->oprisdelim = oprisdelim;
59+
state->is_tsquery = is_tsquery;
4660

4761
return state;
4862
}
4963

5064
/*
51-
* Reinitializes parser for parsing 'input', instead of previous input.
65+
* Reinitializes parser to parse 'input', instead of previous input.
5266
*/
5367
void
5468
reset_tsvector_parser(TSVectorParseState state, char *input)
@@ -66,21 +80,21 @@ close_tsvector_parser(TSVectorParseState state)
6680
pfree(state);
6781
}
6882

83+
/* increase the size of 'word' if needed to hold one more character */
6984
#define RESIZEPRSBUF \
7085
do { \
71-
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
86+
int clen = curpos - state->word; \
87+
if ( clen + state->eml >= state->len ) \
7288
{ \
73-
int clen = curpos - state->word; \
7489
state->len *= 2; \
75-
state->word = (char*)repalloc( (void*)state->word, state->len ); \
90+
state->word = (char *) repalloc(state->word, state->len); \
7691
curpos = state->word + clen; \
7792
} \
7893
} while (0)
7994

80-
8195
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
8296

83-
/* Fills the output parameters, and returns true */
97+
/* Fills gettoken_tsvector's output parameters, and returns true */
8498
#define RETURN_TOKEN \
8599
do { \
86100
if (pos_ptr != NULL) \
@@ -111,18 +125,34 @@ do { \
111125
#define WAITPOSDELIM 7
112126
#define WAITCHARCMPLX 8
113127

128+
#define PRSSYNTAXERROR prssyntaxerror(state)
129+
130+
static void
131+
prssyntaxerror(TSVectorParseState state)
132+
{
133+
ereport(ERROR,
134+
(errcode(ERRCODE_SYNTAX_ERROR),
135+
state->is_tsquery ?
136+
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
137+
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
138+
}
139+
140+
114141
/*
115-
* Get next token from string being parsed. Returns false if
116-
* end of input string is reached, otherwise strval, lenval, pos_ptr
117-
* and poslen output parameters are filled in:
142+
* Get next token from string being parsed. Returns true if successful,
143+
* false if end of input string is reached. On success, these output
144+
* parameters are filled in:
118145
*
119-
* *strval token
120-
* *lenval length of*strval
146+
* *strval pointer to token
147+
* *lenval length of *strval
121148
* *pos_ptr pointer to a palloc'd array of positions and weights
122149
* associated with the token. If the caller is not interested
123150
* in the information, NULL can be supplied. Otherwise
124151
* the caller is responsible for pfreeing the array.
125152
* *poslen number of elements in *pos_ptr
153+
* *endptr scan resumption point
154+
*
155+
* Pass NULL for unwanted output parameters.
126156
*/
127157
bool
128158
gettoken_tsvector(TSVectorParseState state,
@@ -155,9 +185,7 @@ gettoken_tsvector(TSVectorParseState state,
155185
oldstate = WAITENDWORD;
156186
}
157187
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
158-
ereport(ERROR,
159-
(errcode(ERRCODE_SYNTAX_ERROR),
160-
errmsg("syntax error in tsvector")));
188+
PRSSYNTAXERROR;
161189
else if (!t_isspace(state->prsbuf))
162190
{
163191
COPYCHAR(curpos, state->prsbuf);
@@ -170,7 +198,8 @@ gettoken_tsvector(TSVectorParseState state,
170198
if (*(state->prsbuf) == '\0')
171199
ereport(ERROR,
172200
(errcode(ERRCODE_SYNTAX_ERROR),
173-
errmsg("there is no escaped character")));
201+
errmsg("there is no escaped character: \"%s\"",
202+
state->bufstart)));
174203
else
175204
{
176205
RESIZEPRSBUF;
@@ -192,18 +221,14 @@ gettoken_tsvector(TSVectorParseState state,
192221
{
193222
RESIZEPRSBUF;
194223
if (curpos == state->word)
195-
ereport(ERROR,
196-
(errcode(ERRCODE_SYNTAX_ERROR),
197-
errmsg("syntax error in tsvector")));
224+
PRSSYNTAXERROR;
198225
*(curpos) = '\0';
199226
RETURN_TOKEN;
200227
}
201228
else if (t_iseq(state->prsbuf, ':'))
202229
{
203230
if (curpos == state->word)
204-
ereport(ERROR,
205-
(errcode(ERRCODE_SYNTAX_ERROR),
206-
errmsg("syntax error in tsvector")));
231+
PRSSYNTAXERROR;
207232
*(curpos) = '\0';
208233
if (state->oprisdelim)
209234
RETURN_TOKEN;
@@ -229,9 +254,7 @@ gettoken_tsvector(TSVectorParseState state,
229254
oldstate = WAITENDCMPLX;
230255
}
231256
else if (*(state->prsbuf) == '\0')
232-
ereport(ERROR,
233-
(errcode(ERRCODE_SYNTAX_ERROR),
234-
errmsg("syntax error in tsvector")));
257+
PRSSYNTAXERROR;
235258
else
236259
{
237260
RESIZEPRSBUF;
@@ -253,9 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
253276
RESIZEPRSBUF;
254277
*(curpos) = '\0';
255278
if (curpos == state->word)
256-
ereport(ERROR,
257-
(errcode(ERRCODE_SYNTAX_ERROR),
258-
errmsg("syntax error in tsvector")));
279+
PRSSYNTAXERROR;
259280
if (state->oprisdelim)
260281
{
261282
/* state->prsbuf+=pg_mblen(state->prsbuf); */
@@ -290,17 +311,17 @@ gettoken_tsvector(TSVectorParseState state,
290311
}
291312
npos++;
292313
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
314+
/* we cannot get here in tsquery, so no need for 2 errmsgs */
293315
if (WEP_GETPOS(pos[npos - 1]) == 0)
294316
ereport(ERROR,
295317
(errcode(ERRCODE_SYNTAX_ERROR),
296-
errmsg("wrong position info in tsvector")));
318+
errmsg("wrong position info in tsvector: \"%s\"",
319+
state->bufstart)));
297320
WEP_SETWEIGHT(pos[npos - 1], 0);
298321
statecode = WAITPOSDELIM;
299322
}
300323
else
301-
ereport(ERROR,
302-
(errcode(ERRCODE_SYNTAX_ERROR),
303-
errmsg("syntax error in tsvector")));
324+
PRSSYNTAXERROR;
304325
}
305326
else if (statecode == WAITPOSDELIM)
306327
{
@@ -309,42 +330,32 @@ gettoken_tsvector(TSVectorParseState state,
309330
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
310331
{
311332
if (WEP_GETWEIGHT(pos[npos - 1]))
312-
ereport(ERROR,
313-
(errcode(ERRCODE_SYNTAX_ERROR),
314-
errmsg("syntax error in tsvector")));
333+
PRSSYNTAXERROR;
315334
WEP_SETWEIGHT(pos[npos - 1], 3);
316335
}
317336
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
318337
{
319338
if (WEP_GETWEIGHT(pos[npos - 1]))
320-
ereport(ERROR,
321-
(errcode(ERRCODE_SYNTAX_ERROR),
322-
errmsg("syntax error in tsvector")));
339+
PRSSYNTAXERROR;
323340
WEP_SETWEIGHT(pos[npos - 1], 2);
324341
}
325342
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
326343
{
327344
if (WEP_GETWEIGHT(pos[npos - 1]))
328-
ereport(ERROR,
329-
(errcode(ERRCODE_SYNTAX_ERROR),
330-
errmsg("syntax error in tsvector")));
345+
PRSSYNTAXERROR;
331346
WEP_SETWEIGHT(pos[npos - 1], 1);
332347
}
333348
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
334349
{
335350
if (WEP_GETWEIGHT(pos[npos - 1]))
336-
ereport(ERROR,
337-
(errcode(ERRCODE_SYNTAX_ERROR),
338-
errmsg("syntax error in tsvector")));
351+
PRSSYNTAXERROR;
339352
WEP_SETWEIGHT(pos[npos - 1], 0);
340353
}
341354
else if (t_isspace(state->prsbuf) ||
342355
*(state->prsbuf) == '\0')
343356
RETURN_TOKEN;
344357
else if (!t_isdigit(state->prsbuf))
345-
ereport(ERROR,
346-
(errcode(ERRCODE_SYNTAX_ERROR),
347-
errmsg("syntax error in tsvector")));
358+
PRSSYNTAXERROR;
348359
}
349360
else /* internal error */
350361
elog(ERROR, "internal error in gettoken_tsvector");

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy