Skip to content

Commit eb0a773

Browse files
committed
Perform post-escaping encoding validity checks on SQL literals and COPY input
so that invalidly encoded data cannot enter the database by these means.
1 parent 22b613e commit eb0a773

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

src/backend/commands/copy.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.286 2007/09/07 20:59:26 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.287 2007/09/12 20:49:27 adunstan Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -2685,6 +2685,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
26852685
char *start_ptr;
26862686
char *end_ptr;
26872687
int input_len;
2688+
bool saw_high_bit = false;
26882689

26892690
/* Make sure space remains in fieldvals[] */
26902691
if (fieldno >= maxfields)
@@ -2749,6 +2750,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27492750
}
27502751
}
27512752
c = val & 0377;
2753+
if (IS_HIGHBIT_SET(c))
2754+
saw_high_bit = true;
27522755
}
27532756
break;
27542757
case 'x':
@@ -2772,6 +2775,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27722775
}
27732776
}
27742777
c = val & 0xff;
2778+
if (IS_HIGHBIT_SET(c))
2779+
saw_high_bit = true;
27752780
}
27762781
}
27772782
break;
@@ -2799,7 +2804,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
27992804
* literally
28002805
*/
28012806
}
2802-
}
2807+
}
28032808

28042809
/* Add c to output string */
28052810
*output_ptr++ = c;
@@ -2808,6 +2813,16 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
28082813
/* Terminate attribute value in output area */
28092814
*output_ptr++ = '\0';
28102815

2816+
/* If we de-escaped a char with the high bit set, make sure
2817+
* we still have valid data for the db encoding. Avoid calling strlen
2818+
* here for the sake of efficiency.
2819+
*/
2820+
if (saw_high_bit)
2821+
{
2822+
char *fld = fieldvals[fieldno];
2823+
pg_verifymbstr(fld, output_ptr - (fld + 1), false);
2824+
}
2825+
28112826
/* Check whether raw input matched null marker */
28122827
input_len = end_ptr - start_ptr;
28132828
if (input_len == cstate->null_print_len &&

src/backend/parser/scan.l

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Portions Copyright (c) 1994, Regents of the University of California
2525
*
2626
* IDENTIFICATION
27-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.140 2007/08/12 20:18:06 tgl Exp $
27+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.141 2007/09/12 20:49:27 adunstan Exp $
2828
*
2929
*-------------------------------------------------------------------------
3030
*/
@@ -60,6 +60,7 @@ bool escape_string_warning = true;
6060
bool standard_conforming_strings = false;
6161

6262
static bool warn_on_first_escape;
63+
static bool saw_high_bit = false;
6364

6465
/*
6566
* literalbuf is used to accumulate literal values when multiple rules
@@ -426,6 +427,7 @@ other .
426427

427428
{xqstart} {
428429
warn_on_first_escape = true;
430+
saw_high_bit = false;
429431
SET_YYLLOC();
430432
if (standard_conforming_strings)
431433
BEGIN(xq);
@@ -435,6 +437,7 @@ other .
435437
}
436438
{xestart} {
437439
warn_on_first_escape = false;
440+
saw_high_bit = false;
438441
SET_YYLLOC();
439442
BEGIN(xe);
440443
startlit();
@@ -443,6 +446,11 @@ other .
443446
<xq,xe>{quotefail} {
444447
yyless(1);
445448
BEGIN(INITIAL);
449+
/* check that the data remains valid if it might have been
450+
* made invalid by unescaping any chars.
451+
*/
452+
if (saw_high_bit)
453+
pg_verifymbstr(literalbuf, literallen, false);
446454
yylval.str = litbufdup();
447455
return SCONST;
448456
}
@@ -475,12 +483,16 @@ other .
475483

476484
check_escape_warning();
477485
addlitchar(c);
486+
if (IS_HIGHBIT_SET(c))
487+
saw_high_bit = true;
478488
}
479489
<xe>{xehexesc} {
480490
unsigned char c = strtoul(yytext+2, NULL, 16);
481491

482492
check_escape_warning();
483493
addlitchar(c);
494+
if (IS_HIGHBIT_SET(c))
495+
saw_high_bit = true;
484496
}
485497
<xq,xe>{quotecontinue} {
486498
/* ignore */
@@ -892,6 +904,14 @@ litbufdup(void)
892904
static unsigned char
893905
unescape_single_char(unsigned char c)
894906
{
907+
/* Normally we wouldn't expect to see \n where n has its high bit set
908+
* but we set the flag to check the string if we do get it, so
909+
* that this doesn't become a way of getting around the coding validity
910+
* checks.
911+
*/
912+
if (IS_HIGHBIT_SET(c))
913+
saw_high_bit = true;
914+
895915
switch (c)
896916
{
897917
case 'b':

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy