Skip to content

Commit 703b3fd

Browse files
committed
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly encoded input. In such cases it can be useful to replace e.g. an incomplete multi-byte characters with bytes that will trigger an error when getting validated as part of a larger string. Unfortunately, until now, for some encoding no such sequence existed. For those encodings this commit removes one previously accepted input combination - we consider that to be ok, as the chosen bytes are outside of the valid ranges for the encodings, we just previously failed to detect that. As we cannot add a new field to pg_wchar_table without breaking ABI, this is implemented "in-line" in the newly added function. Author: Noah Misch <noah@leadboat.com> Reviewed-by: Andres Freund <andres@anarazel.de> Backpatch-through: 13 Security: CVE-2025-1094
1 parent 0fb4598 commit 703b3fd

File tree

5 files changed

+117
-1
lines changed

5 files changed

+117
-1
lines changed

src/common/wchar.c

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,25 @@
1616
#include "utils/ascii.h"
1717

1818

19+
/*
20+
* In today's multibyte encodings other than UTF8, this two-byte sequence
21+
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
22+
*
23+
* For historical reasons, several verifychar implementations opt to reject
24+
* this pair specifically. Byte pair range constraints, in encoding
25+
* originator documentation, always excluded this pair. No core conversion
26+
* could translate it. However, longstanding verifychar implementations
27+
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
28+
* pairs not valid per encoding originator documentation. To avoid tightening
29+
* core or non-core conversions in a security patch, we sought this one pair.
30+
*
31+
* PQescapeString() historically used spaces for BYTE1; many other values
32+
* could suffice for BYTE1.
33+
*/
34+
#define NONUTF8_INVALID_BYTE0 (0x8d)
35+
#define NONUTF8_INVALID_BYTE1 (' ')
36+
37+
1938
/*
2039
* Operations on multi-byte encodings are driven by a table of helper
2140
* functions.
@@ -1526,6 +1545,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
15261545
if (len < l)
15271546
return -1;
15281547

1548+
if (l == 2 &&
1549+
s[0] == NONUTF8_INVALID_BYTE0 &&
1550+
s[1] == NONUTF8_INVALID_BYTE1)
1551+
return -1;
1552+
15291553
while (--l > 0)
15301554
{
15311555
if (*++s == '\0')
@@ -1575,6 +1599,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
15751599
if (len < l)
15761600
return -1;
15771601

1602+
if (l == 2 &&
1603+
s[0] == NONUTF8_INVALID_BYTE0 &&
1604+
s[1] == NONUTF8_INVALID_BYTE1)
1605+
return -1;
1606+
15781607
while (--l > 0)
15791608
{
15801609
if (*++s == '\0')
@@ -1624,6 +1653,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
16241653
if (len < l)
16251654
return -1;
16261655

1656+
if (l == 2 &&
1657+
s[0] == NONUTF8_INVALID_BYTE0 &&
1658+
s[1] == NONUTF8_INVALID_BYTE1)
1659+
return -1;
1660+
16271661
while (--l > 0)
16281662
{
16291663
if (*++s == '\0')
@@ -2067,6 +2101,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
20672101
}
20682102

20692103

2104+
/*
2105+
* Fills the provided buffer with two bytes such that:
2106+
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
2107+
*/
2108+
void
2109+
pg_encoding_set_invalid(int encoding, char *dst)
2110+
{
2111+
Assert(pg_encoding_max_length(encoding) > 1);
2112+
2113+
dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
2114+
dst[1] = NONUTF8_INVALID_BYTE1;
2115+
}
2116+
20702117
/*
20712118
*-------------------------------------------------------------------
20722119
* encoding info table
@@ -2189,5 +2236,11 @@ pg_encoding_max_length(int encoding)
21892236
{
21902237
Assert(PG_VALID_ENCODING(encoding));
21912238

2192-
return pg_wchar_table[encoding].maxmblen;
2239+
/*
2240+
* Check for the encoding despite the assert, due to some mingw versions
2241+
* otherwise issuing bogus warnings.
2242+
*/
2243+
return PG_VALID_ENCODING(encoding) ?
2244+
pg_wchar_table[encoding].maxmblen :
2245+
pg_wchar_table[PG_SQL_ASCII].maxmblen;
21932246
}

src/include/mb/pg_wchar.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ extern int pg_valid_server_encoding_id(int encoding);
573573
* (in addition to the ones just above). The constant tables declared
574574
* earlier in this file are also available from libpgcommon.
575575
*/
576+
extern void pg_encoding_set_invalid(int encoding, char *dst);
576577
extern int pg_encoding_mblen(int encoding, const char *mbstr);
577578
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
578579
extern int pg_encoding_dsplen(int encoding, const char *mbstr);

src/test/regress/expected/conversion.out

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
\getenv libdir PG_LIBDIR
66
\getenv dlsuffix PG_DLSUFFIX
77
\set regresslib :libdir '/regress' :dlsuffix
8+
CREATE FUNCTION test_enc_setup() RETURNS void
9+
AS :'regresslib', 'test_enc_setup'
10+
LANGUAGE C STRICT;
11+
SELECT FROM test_enc_setup();
12+
--
13+
(1 row)
14+
815
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
916
AS :'regresslib', 'test_enc_conversion'
1017
LANGUAGE C STRICT;

src/test/regress/regress.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,56 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
10991099
PG_RETURN_NULL();
11001100
}
11011101

1102+
/* one-time tests for encoding infrastructure */
1103+
PG_FUNCTION_INFO_V1(test_enc_setup);
1104+
Datum
1105+
test_enc_setup(PG_FUNCTION_ARGS)
1106+
{
1107+
/* Test pg_encoding_set_invalid() */
1108+
for (int i = 0; i < _PG_LAST_ENCODING_; i++)
1109+
{
1110+
char buf[2],
1111+
bigbuf[16];
1112+
int len,
1113+
mblen,
1114+
valid;
1115+
1116+
if (pg_encoding_max_length(i) == 1)
1117+
continue;
1118+
pg_encoding_set_invalid(i, buf);
1119+
len = strnlen(buf, 2);
1120+
if (len != 2)
1121+
elog(WARNING,
1122+
"official invalid string for encoding \"%s\" has length %d",
1123+
pg_enc2name_tbl[i].name, len);
1124+
mblen = pg_encoding_mblen(i, buf);
1125+
if (mblen != 2)
1126+
elog(WARNING,
1127+
"official invalid string for encoding \"%s\" has mblen %d",
1128+
pg_enc2name_tbl[i].name, mblen);
1129+
valid = pg_encoding_verifymbstr(i, buf, len);
1130+
if (valid != 0)
1131+
elog(WARNING,
1132+
"official invalid string for encoding \"%s\" has valid prefix of length %d",
1133+
pg_enc2name_tbl[i].name, valid);
1134+
valid = pg_encoding_verifymbstr(i, buf, 1);
1135+
if (valid != 0)
1136+
elog(WARNING,
1137+
"first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
1138+
pg_enc2name_tbl[i].name, valid);
1139+
memset(bigbuf, ' ', sizeof(bigbuf));
1140+
bigbuf[0] = buf[0];
1141+
bigbuf[1] = buf[1];
1142+
valid = pg_encoding_verifymbstr(i, bigbuf, sizeof(bigbuf));
1143+
if (valid != 0)
1144+
elog(WARNING,
1145+
"trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
1146+
pg_enc2name_tbl[i].name, valid);
1147+
}
1148+
1149+
PG_RETURN_VOID();
1150+
}
1151+
11021152
/*
11031153
* Call an encoding conversion or verification function.
11041154
*

src/test/regress/sql/conversion.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
\set regresslib :libdir '/regress' :dlsuffix
1010

11+
CREATE FUNCTION test_enc_setup() RETURNS void
12+
AS :'regresslib', 'test_enc_setup'
13+
LANGUAGE C STRICT;
14+
SELECT FROM test_enc_setup();
15+
1116
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
1217
AS :'regresslib', 'test_enc_conversion'
1318
LANGUAGE C STRICT;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy