Skip to content

Commit 8e35bbd

Browse files
committed
Remove fixed-size literal buffer from scan.l, and repair
boundary-condition bug in myinput() which caused flex scanner to fail on tokens larger than a bufferload. Turns out flex doesn't want null- terminated input ... and if it gives you a 1-character buffer, you'd better supply a character, not a null, lest you be thought to be reporting end of input.
1 parent d07766f commit 8e35bbd

File tree

2 files changed

+73
-62
lines changed

2 files changed

+73
-62
lines changed

src/backend/parser/Makefile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Makefile for parser
55
#
66
# IDENTIFICATION
7-
# $Header: /cvsroot/pgsql/src/backend/parser/Makefile,v 1.20 1999/05/03 19:09:40 momjian Exp $
7+
# $Header: /cvsroot/pgsql/src/backend/parser/Makefile,v 1.21 1999/10/18 02:42:31 tgl Exp $
88
#
99
#-------------------------------------------------------------------------
1010

@@ -37,9 +37,7 @@ gram.c parse.h: gram.y
3737

3838
scan.c: scan.l
3939
$(LEX) $<
40-
sed -e 's/#define YY_BUF_SIZE .*/#define YY_BUF_SIZE 65536/' \
41-
<lex.yy.c >scan.c
42-
rm -f lex.yy.c
40+
mv lex.yy.c scan.c
4341

4442
# The following dependencies on parse.h are computed by
4543
# make depend, but we state them here explicitly anyway because

src/backend/parser/scan.l

Lines changed: 71 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.59 1999/10/09 01:32:38 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.60 1999/10/18 02:42:31 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -20,6 +20,7 @@
2020
#include <errno.h>
2121

2222
#include "postgres.h"
23+
2324
#include "miscadmin.h"
2425
#include "nodes/parsenodes.h"
2526
#include "nodes/pg_list.h"
@@ -29,16 +30,6 @@
2930
#include "parser/scansup.h"
3031
#include "utils/builtins.h"
3132

32-
#ifdef YY_READ_BUF_SIZE
33-
#undef YY_READ_BUF_SIZE
34-
#endif
35-
#define YY_READ_BUF_SIZE MAX_PARSE_BUFFER
36-
37-
#ifdef YY_READ_BUF_SIZE
38-
#undef YY_READ_BUF_SIZE
39-
#endif
40-
#define YY_READ_BUF_SIZE MAX_PARSE_BUFFER
41-
4233
extern char *parseString;
4334
static char *parseCh;
4435

@@ -47,9 +38,8 @@ static char *parseCh;
4738
#undef yywrap
4839
#endif /* yywrap */
4940

41+
/* set up my input handler --- need one flavor for flex, one for lex */
5042
#if defined(FLEX_SCANNER)
51-
/* MAX_PARSE_BUFFER is defined in miscadmin.h */
52-
#define YYLMAX MAX_PARSE_BUFFER
5343
#define YY_NO_UNPUT
5444
static int myinput(char* buf, int max);
5545
#undef YY_INPUT
@@ -63,8 +53,18 @@ void unput(char);
6353

6454
extern YYSTYPE yylval;
6555

66-
int llen;
67-
char literal[MAX_PARSE_BUFFER];
56+
/*
57+
* literalbuf is used to accumulate literal values when multiple rules
58+
* are needed to parse a single literal. Call startlit to reset buffer
59+
* to empty, addlit to add text. Note that the buffer is palloc'd and
60+
* starts life afresh on every parse cycle.
61+
*/
62+
static char *literalbuf; /* expandable buffer */
63+
static int literallen; /* actual current length */
64+
static int literalalloc; /* current allocated buffer size */
65+
66+
#define startlit() (literalbuf[0] = '\0', literallen = 0)
67+
static void addlit(char *ytext, int yleng);
6868

6969
%}
7070
/* OK, here is a short description of lex/flex rules behavior.
@@ -153,17 +153,14 @@ self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
153153
op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
154154
operator {op_and_self}+
155155

156-
/* we do not allow unary minus in numbers.
157-
* instead we pass it verbatim to parser. there it gets
156+
/* we no longer allow unary minus in numbers.
157+
* instead we pass it separately to parser. there it gets
158158
* coerced via doNegate() -- Leon aug 20 1999
159159
*/
160160

161161
integer {digit}+
162162
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
163163
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
164-
/*
165-
real (((({digit}*\.{digit}+)|({digit}+\.{digit}*))([Ee][-+]?{digit}+)?)|({digit}+[Ee][-+]?{digit}+))
166-
*/
167164

168165
param \${integer}
169166

@@ -199,88 +196,77 @@ other .
199196

200197
{xbstart} {
201198
BEGIN(xb);
202-
llen = 0;
203-
*literal = '\0';
199+
startlit();
204200
}
205201
<xb>{xbstop} {
206202
char* endptr;
207203

208204
BEGIN(INITIAL);
209205
errno = 0;
210-
yylval.ival = strtol((char *)literal,&endptr,2);
206+
yylval.ival = strtol(literalbuf, &endptr, 2);
211207
if (*endptr != '\0' || errno == ERANGE)
212-
elog(ERROR,"Bad binary integer input '%s'",literal);
208+
elog(ERROR, "Bad binary integer input '%s'",
209+
literalbuf);
213210
return ICONST;
214211
}
215212
<xh>{xhinside} |
216213
<xb>{xbinside} {
217-
if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
218-
elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
219-
memcpy(literal+llen, yytext, yyleng+1);
220-
llen += yyleng;
214+
addlit(yytext, yyleng);
221215
}
222216
<xh>{xhcat} |
223217
<xb>{xbcat} {
224218
}
225219

226220
{xhstart} {
227221
BEGIN(xh);
228-
llen = 0;
229-
*literal = '\0';
222+
startlit();
230223
}
231224
<xh>{xhstop} {
232225
char* endptr;
233226

234227
BEGIN(INITIAL);
235228
errno = 0;
236-
yylval.ival = strtol((char *)literal,&endptr,16);
229+
yylval.ival = strtol(literalbuf, &endptr, 16);
237230
if (*endptr != '\0' || errno == ERANGE)
238-
elog(ERROR,"Bad hexadecimal integer input '%s'",literal);
231+
elog(ERROR, "Bad hexadecimal integer input '%s'",
232+
literalbuf);
239233
return ICONST;
240234
}
241235

242236
{xqstart} {
243237
BEGIN(xq);
244-
llen = 0;
245-
*literal = '\0';
238+
startlit();
246239
}
247240
<xq>{xqstop} {
248241
BEGIN(INITIAL);
249-
yylval.str = scanstr(literal);
242+
yylval.str = scanstr(literalbuf);
250243
return SCONST;
251244
}
252245
<xq>{xqdouble} |
253246
<xq>{xqinside} |
254247
<xq>{xqliteral} {
255-
if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
256-
elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
257-
memcpy(literal+llen, yytext, yyleng+1);
258-
llen += yyleng;
248+
addlit(yytext, yyleng);
259249
}
260250
<xq>{xqcat} {
261251
}
262252

263253

264254
{xdstart} {
265255
BEGIN(xd);
266-
llen = 0;
267-
*literal = '\0';
256+
startlit();
268257
}
269258
<xd>{xdstop} {
270259
BEGIN(INITIAL);
271-
yylval.str = pstrdup(literal);
260+
yylval.str = pstrdup(literalbuf);
272261
return IDENT;
273262
}
274263
<xd>{xdinside} {
275-
if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
276-
elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
277-
memcpy(literal+llen, yytext, yyleng+1);
278-
llen += yyleng;
264+
addlit(yytext, yyleng);
279265
}
280266

281267
{typecast} { return TYPECAST; }
282268

283-
{self} { return yytext[0]; }
269+
{self} { return yytext[0]; }
284270

285271
{operator} {
286272
if (strcmp((char*)yytext,"!=") == 0)
@@ -391,14 +377,37 @@ init_io()
391377
because input()/myinput() checks the non-nullness of parseCh
392378
to know when to pass the string to lex/flex */
393379
parseCh = NULL;
380+
381+
/* initialize literal buffer to a reasonable but expansible size */
382+
literalalloc = 128;
383+
literalbuf = (char *) palloc(literalalloc);
384+
startlit();
385+
394386
#if defined(FLEX_SCANNER)
395387
if (YY_CURRENT_BUFFER)
396388
yy_flush_buffer(YY_CURRENT_BUFFER);
397389
#endif /* FLEX_SCANNER */
398390
BEGIN INITIAL;
399391
}
400392

393+
static void
394+
addlit(char *ytext, int yleng)
395+
{
396+
/* enlarge buffer if needed */
397+
if ((literallen+yleng) >= literalalloc)
398+
{
399+
do {
400+
literalalloc *= 2;
401+
} while ((literallen+yleng) >= literalalloc);
402+
literalbuf = (char *) repalloc(literalbuf, literalalloc);
403+
}
404+
/* append data --- note we assume ytext is null-terminated */
405+
memcpy(literalbuf+literallen, ytext, yleng+1);
406+
literallen += yleng;
407+
}
408+
401409
#if !defined(FLEX_SCANNER)
410+
402411
/* get lex input from a string instead of from stdin */
403412
int
404413
input()
@@ -420,27 +429,31 @@ unput(char c)
420429
else if (c != 0)
421430
*--parseCh = c;
422431
}
432+
423433
#endif /* !defined(FLEX_SCANNER) */
424434

425435
#ifdef FLEX_SCANNER
436+
426437
/* input routine for flex to read input from a string instead of a file */
427438
static int
428439
myinput(char* buf, int max)
429440
{
430-
int len, copylen;
441+
int len;
431442

432443
if (parseCh == NULL)
433444
parseCh = parseString;
434445
len = strlen(parseCh); /* remaining data available */
435-
if (len >= max)
436-
copylen = max - 1;
437-
else
438-
copylen = len;
439-
if (copylen > 0)
440-
memcpy(buf, parseCh, copylen);
441-
buf[copylen] = '\0';
442-
parseCh += copylen;
443-
return copylen;
446+
/* Note: this code used to think that flex wants a null-terminated
447+
* string. It does NOT, and returning 1 less character than it asks
448+
* for will cause failure under the right boundary conditions. So
449+
* shut up and fill the buffer to the limit, you hear?
450+
*/
451+
if (len > max)
452+
len = max;
453+
if (len > 0)
454+
memcpy(buf, parseCh, len);
455+
parseCh += len;
456+
return len;
444457
}
445-
#endif /* FLEX_SCANNER */
446458

459+
#endif /* FLEX_SCANNER */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy