Skip to content

Commit 5ada9ef

Browse files
committed
Teach plpgsql's lexer about dollar-quoted literals.
Andrew Dunstan, some help from Tom Lane.
1 parent fa7a3ab commit 5ada9ef

File tree

4 files changed

+118
-30
lines changed

4 files changed

+118
-30
lines changed

src/pl/plpgsql/src/gram.y

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* procedural language
55
*
66
* IDENTIFICATION
7-
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.50 2003/12/23 00:01:57 tgl Exp $
7+
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.51 2004/02/25 18:10:51 tgl Exp $
88
*
99
* This software is copyrighted by Jan Wieck - Hamburg.
1010
*
@@ -1235,7 +1235,7 @@ stmt_raise : K_RAISE lno raise_level raise_msg raise_params ';'
12351235

12361236
raise_msg : T_STRING
12371237
{
1238-
$$ = strdup(yytext);
1238+
$$ = plpgsql_get_string_value();
12391239
}
12401240
;
12411241

src/pl/plpgsql/src/pl_exec.c

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* procedural language
44
*
55
* IDENTIFICATION
6-
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.96 2004/02/24 01:44:33 tgl Exp $
6+
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.97 2004/02/25 18:10:51 tgl Exp $
77
*
88
* This software is copyrighted by Jan Wieck - Hamburg.
99
*
@@ -1805,7 +1805,7 @@ exec_stmt_raise(PLpgSQL_execstate * estate, PLpgSQL_stmt_raise * stmt)
18051805
for (cp = stmt->message; *cp; cp++)
18061806
{
18071807
/*
1808-
* Occurences of a single % are replaced by the next argument's
1808+
* Occurrences of a single % are replaced by the next argument's
18091809
* external representation. Double %'s are converted to one %.
18101810
*/
18111811
if ((c[0] = *cp) == '%')
@@ -1834,21 +1834,6 @@ exec_stmt_raise(PLpgSQL_execstate * estate, PLpgSQL_stmt_raise * stmt)
18341834
continue;
18351835
}
18361836

1837-
/*
1838-
* Occurrences of single ' are removed. double ' are reduced to
1839-
* single ones. We must do this because the parameter stored by
1840-
* the grammar is the raw T_STRING input literal, rather than the
1841-
* de-lexed string as you might expect ...
1842-
*/
1843-
if (*cp == '\'')
1844-
{
1845-
cp++;
1846-
if (*cp == '\'')
1847-
plpgsql_dstring_append(&ds, c);
1848-
else
1849-
cp--;
1850-
continue;
1851-
}
18521837
plpgsql_dstring_append(&ds, c);
18531838
}
18541839

src/pl/plpgsql/src/plpgsql.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* procedural language
44
*
55
* IDENTIFICATION
6-
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.43 2003/11/29 19:52:12 pgsql Exp $
6+
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.44 2004/02/25 18:10:51 tgl Exp $
77
*
88
* This software is copyrighted by Jan Wieck - Hamburg.
99
*
@@ -694,5 +694,6 @@ extern void plpgsql_push_back_token(int token);
694694
extern int plpgsql_scanner_lineno(void);
695695
extern void plpgsql_scanner_init(const char *str, int functype);
696696
extern void plpgsql_scanner_finish(void);
697+
extern char *plpgsql_get_string_value(void);
697698

698699
#endif /* PLPGSQL_H */

src/pl/plpgsql/src/scan.l

Lines changed: 112 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* procedural language
55
*
66
* IDENTIFICATION
7-
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.31 2004/02/24 22:06:32 tgl Exp $
7+
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.32 2004/02/25 18:10:51 tgl Exp $
88
*
99
* This software is copyrighted by Jan Wieck - Hamburg.
1010
*
@@ -57,6 +57,8 @@ static int lookahead_token;
5757
static bool have_lookahead_token;
5858
static const char *cur_line_start;
5959
static int cur_line_num;
60+
static char *dolqstart; /* current $foo$ quote start string */
61+
static int dolqlen; /* signal to plpgsql_get_string_value */
6062

6163
int plpgsql_SpaceScanned = 0;
6264
%}
@@ -70,7 +72,9 @@ int plpgsql_SpaceScanned = 0;
7072
%option case-insensitive
7173

7274

73-
%x IN_STRING IN_COMMENT
75+
%x IN_STRING
76+
%x IN_COMMENT
77+
%x IN_DOLLARQUOTE
7478

7579
digit [0-9]
7680
ident_start [A-Za-z\200-\377_]
@@ -84,6 +88,14 @@ param \${digit}+
8488

8589
space [ \t\n\r\f]
8690

91+
/* $foo$ style quotes ("dollar quoting")
92+
* copied straight from the backend SQL parser
93+
*/
94+
dolq_start [A-Za-z\200-\377_]
95+
dolq_cont [A-Za-z\200-\377_0-9]
96+
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
97+
dolqinside [^$]+
98+
8799
%%
88100
/* ----------
89101
* Local variables in scanner to remember where
@@ -97,7 +109,7 @@ space [ \t\n\r\f]
97109
* Reset the state when entering the scanner
98110
* ----------
99111
*/
100-
BEGIN INITIAL;
112+
BEGIN(INITIAL);
101113
plpgsql_SpaceScanned = 0;
102114

103115
/* ----------
@@ -247,9 +259,9 @@ dump { return O_DUMP; }
247259
--[^\r\n]* ;
248260

249261
\/\* { start_lineno = plpgsql_scanner_lineno();
250-
BEGIN IN_COMMENT;
262+
BEGIN(IN_COMMENT);
251263
}
252-
<IN_COMMENT>\*\/ { BEGIN INITIAL; plpgsql_SpaceScanned = 1; }
264+
<IN_COMMENT>\*\/ { BEGIN(INITIAL); plpgsql_SpaceScanned = 1; }
253265
<IN_COMMENT>\n ;
254266
<IN_COMMENT>. ;
255267
<IN_COMMENT><<EOF>> {
@@ -260,7 +272,7 @@ dump { return O_DUMP; }
260272
}
261273

262274
/* ----------
263-
* Collect anything inside of ''s and return one STRING
275+
* Collect anything inside of ''s and return one STRING token
264276
*
265277
* Hacking yytext/yyleng here lets us avoid using yymore(), which is
266278
* a win for performance. It's safe because we know the underlying
@@ -270,15 +282,18 @@ dump { return O_DUMP; }
270282
' {
271283
start_lineno = plpgsql_scanner_lineno();
272284
start_charpos = yytext;
273-
BEGIN IN_STRING;
285+
BEGIN(IN_STRING);
274286
}
275287
<IN_STRING>\\. { }
276288
<IN_STRING>\\ { /* can only happen with \ at EOF */ }
277289
<IN_STRING>'' { }
278290
<IN_STRING>' {
279-
yyleng -= (yytext - start_charpos);
291+
/* tell plpgsql_get_string_value it's not a dollar quote */
292+
dolqlen = 0;
293+
/* adjust yytext/yyleng to describe whole string token */
294+
yyleng += (yytext - start_charpos);
280295
yytext = start_charpos;
281-
BEGIN INITIAL;
296+
BEGIN(INITIAL);
282297
return T_STRING;
283298
}
284299
<IN_STRING>[^'\\]+ { }
@@ -289,6 +304,43 @@ dump { return O_DUMP; }
289304
errmsg("unterminated string")));
290305
}
291306

307+
{dolqdelim} {
308+
start_lineno = plpgsql_scanner_lineno();
309+
start_charpos = yytext;
310+
dolqstart = pstrdup(yytext);
311+
BEGIN(IN_DOLLARQUOTE);
312+
}
313+
<IN_DOLLARQUOTE>{dolqdelim} {
314+
if (strcmp(yytext, dolqstart) == 0)
315+
{
316+
pfree(dolqstart);
317+
/* tell plpgsql_get_string_value it is a dollar quote */
318+
dolqlen = yyleng;
319+
/* adjust yytext/yyleng to describe whole string token */
320+
yyleng += (yytext - start_charpos);
321+
yytext = start_charpos;
322+
BEGIN(INITIAL);
323+
return T_STRING;
324+
}
325+
else
326+
{
327+
/*
328+
* When we fail to match $...$ to dolqstart, transfer
329+
* the $... part to the output, but put back the final
330+
* $ for rescanning. Consider $delim$...$junk$delim$
331+
*/
332+
yyless(yyleng-1);
333+
}
334+
}
335+
<IN_DOLLARQUOTE>{dolqinside} { }
336+
<IN_DOLLARQUOTE>. { /* needed for $ inside the quoted text */ }
337+
<IN_DOLLARQUOTE><<EOF>> {
338+
plpgsql_error_lineno = start_lineno;
339+
ereport(ERROR,
340+
(errcode(ERRCODE_DATATYPE_MISMATCH),
341+
errmsg("unterminated dollar-quoted string")));
342+
}
343+
292344
/* ----------
293345
* Any unmatched character is returned as is
294346
* ----------
@@ -429,7 +481,6 @@ plpgsql_scanner_init(const char *str, int functype)
429481
BEGIN(INITIAL);
430482
}
431483

432-
433484
/*
434485
* Called after parsing is done to clean up after plpgsql_scanner_init()
435486
*/
@@ -439,3 +490,54 @@ plpgsql_scanner_finish(void)
439490
yy_delete_buffer(scanbufhandle);
440491
pfree(scanbuf);
441492
}
493+
494+
/*
495+
* Called after a T_STRING token is read to get the string literal's value
496+
* as a malloc'd string. (We make this a separate call because in many
497+
* scenarios there's no need to get the decoded value.)
498+
*
499+
* Note: we expect the literal to be the most recently lexed token. This
500+
* would not work well if we supported multiple-token pushback or if
501+
* plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
502+
*/
503+
char *
504+
plpgsql_get_string_value(void)
505+
{
506+
char *result;
507+
const char *cp;
508+
int len;
509+
510+
if (dolqlen > 0)
511+
{
512+
/* Token is a $foo$...$foo$ string */
513+
len = yyleng - 2 * dolqlen;
514+
Assert(len >= 0);
515+
result = (char *) malloc(len + 1);
516+
memcpy(result, yytext + dolqlen, len);
517+
result[len] = '\0';
518+
}
519+
else
520+
{
521+
/* Token is a '...' string */
522+
result = (char *) malloc(yyleng + 1); /* more than enough room */
523+
len = 0;
524+
for (cp = yytext; *cp; cp++)
525+
{
526+
if (*cp == '\'')
527+
{
528+
if (cp[1] == '\'')
529+
result[len++] = *cp++;
530+
/* else it must be string start or end quote */
531+
}
532+
else if (*cp == '\\')
533+
{
534+
if (cp[1] != '\0') /* just a paranoid check */
535+
result[len++] = *(++cp);
536+
}
537+
else
538+
result[len++] = *cp;
539+
}
540+
result[len] = '\0';
541+
}
542+
return result;
543+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy