Skip to content

Commit 7d43ca6

Browse files
committed
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly encoded input. In such cases it can be useful to replace e.g. an incomplete multi-byte characters with bytes that will trigger an error when getting validated as part of a larger string. Unfortunately, until now, for some encoding no such sequence existed. For those encodings this commit removes one previously accepted input combination - we consider that to be ok, as the chosen bytes are outside of the valid ranges for the encodings, we just previously failed to detect that. As we cannot add a new field to pg_wchar_table without breaking ABI, this is implemented "in-line" in the newly added function. Author: Noah Misch <noah@leadboat.com> Reviewed-by: Andres Freund <andres@anarazel.de> Backpatch-through: 13 Security: CVE-2025-1094
1 parent 439776b commit 7d43ca6

File tree

5 files changed

+117
-1
lines changed

5 files changed

+117
-1
lines changed

src/common/wchar.c

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,25 @@
1616
#include "utils/ascii.h"
1717

1818

19+
/*
20+
* In today's multibyte encodings other than UTF8, this two-byte sequence
21+
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
22+
*
23+
* For historical reasons, several verifychar implementations opt to reject
24+
* this pair specifically. Byte pair range constraints, in encoding
25+
* originator documentation, always excluded this pair. No core conversion
26+
* could translate it. However, longstanding verifychar implementations
27+
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
28+
* pairs not valid per encoding originator documentation. To avoid tightening
29+
* core or non-core conversions in a security patch, we sought this one pair.
30+
*
31+
* PQescapeString() historically used spaces for BYTE1; many other values
32+
* could suffice for BYTE1.
33+
*/
34+
#define NONUTF8_INVALID_BYTE0 (0x8d)
35+
#define NONUTF8_INVALID_BYTE1 (' ')
36+
37+
1938
/*
2039
* Operations on multi-byte encodings are driven by a table of helper
2140
* functions.
@@ -1465,6 +1484,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
14651484
if (len < l)
14661485
return -1;
14671486

1487+
if (l == 2 &&
1488+
s[0] == NONUTF8_INVALID_BYTE0 &&
1489+
s[1] == NONUTF8_INVALID_BYTE1)
1490+
return -1;
1491+
14681492
while (--l > 0)
14691493
{
14701494
if (*++s == '\0')
@@ -1514,6 +1538,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
15141538
if (len < l)
15151539
return -1;
15161540

1541+
if (l == 2 &&
1542+
s[0] == NONUTF8_INVALID_BYTE0 &&
1543+
s[1] == NONUTF8_INVALID_BYTE1)
1544+
return -1;
1545+
15171546
while (--l > 0)
15181547
{
15191548
if (*++s == '\0')
@@ -1563,6 +1592,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
15631592
if (len < l)
15641593
return -1;
15651594

1595+
if (l == 2 &&
1596+
s[0] == NONUTF8_INVALID_BYTE0 &&
1597+
s[1] == NONUTF8_INVALID_BYTE1)
1598+
return -1;
1599+
15661600
while (--l > 0)
15671601
{
15681602
if (*++s == '\0')
@@ -2007,6 +2041,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
20072041
}
20082042

20092043

2044+
/*
2045+
* Fills the provided buffer with two bytes such that:
2046+
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
2047+
*/
2048+
void
2049+
pg_encoding_set_invalid(int encoding, char *dst)
2050+
{
2051+
Assert(pg_encoding_max_length(encoding) > 1);
2052+
2053+
dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
2054+
dst[1] = NONUTF8_INVALID_BYTE1;
2055+
}
2056+
20102057
/*
20112058
*-------------------------------------------------------------------
20122059
* encoding info table
@@ -2128,5 +2175,11 @@ pg_encoding_max_length(int encoding)
21282175
{
21292176
Assert(PG_VALID_ENCODING(encoding));
21302177

2131-
return pg_wchar_table[encoding].maxmblen;
2178+
/*
2179+
* Check for the encoding despite the assert, due to some mingw versions
2180+
* otherwise issuing bogus warnings.
2181+
*/
2182+
return PG_VALID_ENCODING(encoding) ?
2183+
pg_wchar_table[encoding].maxmblen :
2184+
pg_wchar_table[PG_SQL_ASCII].maxmblen;
21322185
}

src/include/mb/pg_wchar.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,7 @@ extern int pg_valid_server_encoding_id(int encoding);
662662
* (in addition to the ones just above). The constant tables declared
663663
* earlier in this file are also available from libpgcommon.
664664
*/
665+
extern void pg_encoding_set_invalid(int encoding, char *dst);
665666
extern int pg_encoding_mblen(int encoding, const char *mbstr);
666667
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
667668
extern int pg_encoding_dsplen(int encoding, const char *mbstr);

src/test/regress/expected/conversion.out

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
\getenv libdir PG_LIBDIR
66
\getenv dlsuffix PG_DLSUFFIX
77
\set regresslib :libdir '/regress' :dlsuffix
8+
CREATE FUNCTION test_enc_setup() RETURNS void
9+
AS :'regresslib', 'test_enc_setup'
10+
LANGUAGE C STRICT;
11+
SELECT FROM test_enc_setup();
12+
--
13+
(1 row)
14+
815
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
916
AS :'regresslib', 'test_enc_conversion'
1017
LANGUAGE C STRICT;

src/test/regress/regress.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,56 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
11051105
PG_RETURN_NULL();
11061106
}
11071107

1108+
/* one-time tests for encoding infrastructure */
1109+
PG_FUNCTION_INFO_V1(test_enc_setup);
1110+
Datum
1111+
test_enc_setup(PG_FUNCTION_ARGS)
1112+
{
1113+
/* Test pg_encoding_set_invalid() */
1114+
for (int i = 0; i < _PG_LAST_ENCODING_; i++)
1115+
{
1116+
char buf[2],
1117+
bigbuf[16];
1118+
int len,
1119+
mblen,
1120+
valid;
1121+
1122+
if (pg_encoding_max_length(i) == 1)
1123+
continue;
1124+
pg_encoding_set_invalid(i, buf);
1125+
len = strnlen(buf, 2);
1126+
if (len != 2)
1127+
elog(WARNING,
1128+
"official invalid string for encoding \"%s\" has length %d",
1129+
pg_enc2name_tbl[i].name, len);
1130+
mblen = pg_encoding_mblen(i, buf);
1131+
if (mblen != 2)
1132+
elog(WARNING,
1133+
"official invalid string for encoding \"%s\" has mblen %d",
1134+
pg_enc2name_tbl[i].name, mblen);
1135+
valid = pg_encoding_verifymbstr(i, buf, len);
1136+
if (valid != 0)
1137+
elog(WARNING,
1138+
"official invalid string for encoding \"%s\" has valid prefix of length %d",
1139+
pg_enc2name_tbl[i].name, valid);
1140+
valid = pg_encoding_verifymbstr(i, buf, 1);
1141+
if (valid != 0)
1142+
elog(WARNING,
1143+
"first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
1144+
pg_enc2name_tbl[i].name, valid);
1145+
memset(bigbuf, ' ', sizeof(bigbuf));
1146+
bigbuf[0] = buf[0];
1147+
bigbuf[1] = buf[1];
1148+
valid = pg_encoding_verifymbstr(i, bigbuf, sizeof(bigbuf));
1149+
if (valid != 0)
1150+
elog(WARNING,
1151+
"trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
1152+
pg_enc2name_tbl[i].name, valid);
1153+
}
1154+
1155+
PG_RETURN_VOID();
1156+
}
1157+
11081158
/*
11091159
* Call an encoding conversion or verification function.
11101160
*

src/test/regress/sql/conversion.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
\set regresslib :libdir '/regress' :dlsuffix
1010

11+
CREATE FUNCTION test_enc_setup() RETURNS void
12+
AS :'regresslib', 'test_enc_setup'
13+
LANGUAGE C STRICT;
14+
SELECT FROM test_enc_setup();
15+
1116
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
1217
AS :'regresslib', 'test_enc_conversion'
1318
LANGUAGE C STRICT;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy