Skip to content

Commit 22c9222

Browse files
committed
Fix de-escaping checks so that we will reject \000 as well as other invalidly
encoded sequences. Per discussion of a couple of days ago.
1 parent c1c40e5 commit 22c9222

File tree

2 files changed

+28
-26
lines changed

2 files changed

+28
-26
lines changed

src/backend/commands/copy.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.307 2009/03/31 22:12:46 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.308 2009/04/19 21:08:54 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -2718,7 +2718,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27182718
char *start_ptr;
27192719
char *end_ptr;
27202720
int input_len;
2721-
bool saw_high_bit = false;
2721+
bool saw_non_ascii = false;
27222722

27232723
/* Make sure space remains in fieldvals[] */
27242724
if (fieldno >= maxfields)
@@ -2783,8 +2783,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27832783
}
27842784
}
27852785
c = val & 0377;
2786-
if (IS_HIGHBIT_SET(c))
2787-
saw_high_bit = true;
2786+
if (c == '\0' || IS_HIGHBIT_SET(c))
2787+
saw_non_ascii = true;
27882788
}
27892789
break;
27902790
case 'x':
@@ -2808,8 +2808,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
28082808
}
28092809
}
28102810
c = val & 0xff;
2811-
if (IS_HIGHBIT_SET(c))
2812-
saw_high_bit = true;
2811+
if (c == '\0' || IS_HIGHBIT_SET(c))
2812+
saw_non_ascii = true;
28132813
}
28142814
}
28152815
break;
@@ -2847,11 +2847,11 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
28472847
*output_ptr++ = '\0';
28482848

28492849
/*
2850-
* If we de-escaped a char with the high bit set, make sure we still
2850+
* If we de-escaped a non-7-bit-ASCII char, make sure we still
28512851
* have valid data for the db encoding. Avoid calling strlen here for
28522852
* the sake of efficiency.
28532853
*/
2854-
if (saw_high_bit)
2854+
if (saw_non_ascii)
28552855
{
28562856
char *fld = fieldvals[fieldno];
28572857

src/backend/parser/scan.l

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Portions Copyright (c) 1994, Regents of the University of California
2525
*
2626
* IDENTIFICATION
27-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.150 2009/04/14 22:18:47 tgl Exp $
27+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.151 2009/04/19 21:08:54 tgl Exp $
2828
*
2929
*-------------------------------------------------------------------------
3030
*/
@@ -60,7 +60,7 @@ bool escape_string_warning = true;
6060
bool standard_conforming_strings = false;
6161

6262
static bool warn_on_first_escape;
63-
static bool saw_high_bit = false;
63+
static bool saw_non_ascii = false;
6464

6565
/*
6666
* literalbuf is used to accumulate literal values when multiple rules
@@ -453,7 +453,7 @@ other .
453453

454454
{xqstart} {
455455
warn_on_first_escape = true;
456-
saw_high_bit = false;
456+
saw_non_ascii = false;
457457
SET_YYLLOC();
458458
if (standard_conforming_strings)
459459
BEGIN(xq);
@@ -463,7 +463,7 @@ other .
463463
}
464464
{xestart} {
465465
warn_on_first_escape = false;
466-
saw_high_bit = false;
466+
saw_non_ascii = false;
467467
SET_YYLLOC();
468468
BEGIN(xe);
469469
startlit();
@@ -477,10 +477,11 @@ other .
477477
<xq,xe>{quotefail} {
478478
yyless(1);
479479
BEGIN(INITIAL);
480-
/* check that the data remains valid if it might have been
480+
/*
481+
* check that the data remains valid if it might have been
481482
* made invalid by unescaping any chars.
482483
*/
483-
if (saw_high_bit)
484+
if (saw_non_ascii)
484485
pg_verifymbstr(literalbuf, literallen, false);
485486
yylval.str = litbufdup();
486487
return SCONST;
@@ -526,16 +527,16 @@ other .
526527

527528
check_escape_warning();
528529
addlitchar(c);
529-
if (IS_HIGHBIT_SET(c))
530-
saw_high_bit = true;
530+
if (c == '\0' || IS_HIGHBIT_SET(c))
531+
saw_non_ascii = true;
531532
}
532533
<xe>{xehexesc} {
533534
unsigned char c = strtoul(yytext+2, NULL, 16);
534535

535536
check_escape_warning();
536537
addlitchar(c);
537-
if (IS_HIGHBIT_SET(c))
538-
saw_high_bit = true;
538+
if (c == '\0' || IS_HIGHBIT_SET(c))
539+
saw_non_ascii = true;
539540
}
540541
<xq,xe,xus>{quotecontinue} {
541542
/* ignore */
@@ -1083,21 +1084,18 @@ litbuf_udeescape(unsigned char escape)
10831084
}
10841085

10851086
*out = '\0';
1087+
/*
1088+
* We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
1089+
* codes; but it's probably not worth the trouble, since this isn't
1090+
* likely to be a performance-critical path.
1091+
*/
10861092
pg_verifymbstr(new, out - new, false);
10871093
return new;
10881094
}
10891095

10901096
static unsigned char
10911097
unescape_single_char(unsigned char c)
10921098
{
1093-
/* Normally we wouldn't expect to see \n where n has its high bit set
1094-
* but we set the flag to check the string if we do get it, so
1095-
* that this doesn't become a way of getting around the coding validity
1096-
* checks.
1097-
*/
1098-
if (IS_HIGHBIT_SET(c))
1099-
saw_high_bit = true;
1100-
11011099
switch (c)
11021100
{
11031101
case 'b':
@@ -1111,6 +1109,10 @@ unescape_single_char(unsigned char c)
11111109
case 't':
11121110
return '\t';
11131111
default:
1112+
/* check for backslash followed by non-7-bit-ASCII */
1113+
if (c == '\0' || IS_HIGHBIT_SET(c))
1114+
saw_non_ascii = true;
1115+
11141116
return c;
11151117
}
11161118
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy