Skip to content

Commit 799ac99

Browse files
committed
Sync psql's scanner with recent changes in backend scanner's flex rules.
Marko Kreen, Tom Lane
1 parent 3686bcb commit 799ac99

File tree

2 files changed

+47
-7
lines changed

2 files changed

+47
-7
lines changed

src/backend/parser/scan.l

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Portions Copyright (c) 1994, Regents of the University of California
2525
*
2626
* IDENTIFICATION
27-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.161 2009/09/25 21:13:06 petere Exp $
27+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $
2828
*
2929
*-------------------------------------------------------------------------
3030
*/
@@ -571,18 +571,16 @@ other .
571571

572572
BEGIN(xe);
573573
}
574-
<xeu>. |
575-
<xeu>\n |
574+
<xeu>. { yyerror("invalid Unicode surrogate pair"); }
575+
<xeu>\n { yyerror("invalid Unicode surrogate pair"); }
576576
<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
577-
578577
<xe,xeu>{xeunicodefail} {
579578
ereport(ERROR,
580579
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
581580
errmsg("invalid Unicode escape"),
582581
errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
583582
lexer_errposition()));
584-
}
585-
583+
}
586584
<xe>{xeescape} {
587585
if (yytext[1] == '\'')
588586
{

src/bin/psql/psqlscan.l

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
* Portions Copyright (c) 1994, Regents of the University of California
3434
*
3535
* IDENTIFICATION
36-
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.28 2009/01/01 17:23:55 momjian Exp $
36+
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.29 2009/09/27 03:27:24 tgl Exp $
3737
*
3838
*-------------------------------------------------------------------------
3939
*/
@@ -117,6 +117,7 @@ static void push_new_buffer(const char *newstr);
117117
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
118118
char **txtcopy);
119119
static void emit(const char *txt, int len);
120+
static bool is_utf16_surrogate_first(uint32 c);
120121

121122
#define ECHO emit(yytext, yyleng)
122123

@@ -158,6 +159,7 @@ static void emit(const char *txt, int len);
158159
* <xdolq> $foo$ quoted strings
159160
* <xui> quoted identifier with Unicode escapes
160161
* <xus> quoted string with Unicode escapes
162+
* <xeu> Unicode surrogate pair in extended quoted string
161163
*/
162164

163165
%x xb
@@ -169,6 +171,7 @@ static void emit(const char *txt, int len);
169171
%x xdolq
170172
%x xui
171173
%x xus
174+
%x xeu
172175
/* Additional exclusive states for psql only: lex backslash commands */
173176
%x xslashcmd
174177
%x xslasharg
@@ -192,6 +195,9 @@ static void emit(const char *txt, int len);
192195
* did not end with a newline.
193196
*
194197
* XXX perhaps \f (formfeed) should be treated as a newline as well?
198+
*
199+
* XXX if you change the set of whitespace characters, fix scanner_isspace()
200+
* to agree, and see also the plpgsql lexer.
195201
*/
196202

197203
space [ \t\n\r\f]
@@ -253,6 +259,8 @@ xeinside [^\\']+
253259
xeescape [\\][^0-7]
254260
xeoctesc [\\][0-7]{1,3}
255261
xehexesc [\\]x[0-9A-Fa-f]{1,2}
262+
xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
263+
xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
256264
257265
/* Extended quote
258266
* xqdouble implements embedded quote, ''''
@@ -334,6 +342,10 @@ identifier {ident_start}{ident_cont}*
334342

335343
typecast "::"
336344

345+
/* these two token types are used by PL/pgsql, though not in core SQL */
346+
dot_dot \.\.
347+
colon_equals ":="
348+
337349
/*
338350
* "self" is the set of chars that should be returned as single-character
339351
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
@@ -511,6 +523,22 @@ other .
511523
<xe>{xeinside} {
512524
ECHO;
513525
}
526+
<xe>{xeunicode} {
527+
uint32 c = strtoul(yytext+2, NULL, 16);
528+
529+
if (is_utf16_surrogate_first(c))
530+
BEGIN(xeu);
531+
ECHO;
532+
}
533+
<xeu>{xeunicode} {
534+
BEGIN(xe);
535+
ECHO;
536+
}
537+
<xeu>. { ECHO; }
538+
<xeu>\n { ECHO; }
539+
<xe,xeu>{xeunicodefail} {
540+
ECHO;
541+
}
514542
<xe>{xeescape} {
515543
ECHO;
516544
}
@@ -605,6 +633,14 @@ other .
605633
ECHO;
606634
}
607635

636+
{dot_dot} {
637+
ECHO;
638+
}
639+
640+
{colon_equals} {
641+
ECHO;
642+
}
643+
608644
/*
609645
* These rules are specific to psql --- they implement parenthesis
610646
* counting and detection of command-ending semicolon. These must
@@ -1690,3 +1726,9 @@ emit(const char *txt, int len)
16901726
}
16911727
}
16921728
}
1729+
1730+
static bool
1731+
is_utf16_surrogate_first(uint32 c)
1732+
{
1733+
return (c >= 0xD800 && c <= 0xDBFF);
1734+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy