Skip to content

Commit 26a944c

Browse files
committed
Adjust bytea get_bit/set_bit to use int8 not int4 for bit numbering.
Since the existing bit number argument can't exceed INT32_MAX, it's not possible for these functions to manipulate bits beyond the first 256MB of a bytea value. Lift that restriction by redeclaring the bit number arguments as int8 (which requires a catversion bump, hence is not back-patchable). The similarly-named functions for bit/varbit don't really have a problem because we restrict those types to at most VARBITMAXLEN bits; hence leave them alone. While here, extend the encode/decode functions in utils/adt/encode.c to allow dealing with values wider than 1GB. This is not a live bug or restriction in current usage, because no input could be more than 1GB, and since none of the encoders can expand a string more than 4X, the result size couldn't overflow uint32. But it might be desirable to support more in future, so make the input length values size_t and the potential-output-length values uint64. Also add some test cases to improve the miserable code coverage of these functions. Movead Li, editorialized some by me; also reviewed by Ashutosh Bapat Discussion: https://postgr.es/m/20200312115135445367128@highgo.ca
1 parent 9c74ceb commit 26a944c

File tree

8 files changed

+217
-71
lines changed

8 files changed

+217
-71
lines changed

doc/src/sgml/func.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2905,7 +2905,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
29052905
<indexterm>
29062906
<primary>get_bit</primary>
29072907
</indexterm>
2908-
<literal><function>get_bit(<parameter>bytes</parameter> <type>bytea</type>, <parameter>offset</parameter> <type>int</type>)</function></literal>
2908+
<literal><function>get_bit(<parameter>bytes</parameter> <type>bytea</type>, <parameter>offset</parameter> <type>bigint</type>)</function></literal>
29092909
</entry>
29102910
<entry><type>int</type></entry>
29112911
<entry>
@@ -2990,7 +2990,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
29902990
<primary>set_bit</primary>
29912991
</indexterm>
29922992
<literal><function>set_bit(<parameter>bytes</parameter> <type>bytea</type>,
2993-
<parameter>offset</parameter> <type>int</type>,
2993+
<parameter>offset</parameter> <type>bigint</type>,
29942994
<parameter>newvalue</parameter> <type>int</type>)</function></literal>
29952995
</entry>
29962996
<entry><type>bytea</type></entry>

src/backend/utils/adt/encode.c

Lines changed: 85 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,24 @@
1616
#include <ctype.h>
1717

1818
#include "utils/builtins.h"
19+
#include "utils/memutils.h"
1920

2021

22+
/*
23+
* Encoding conversion API.
24+
* encode_len() and decode_len() compute the amount of space needed, while
25+
* encode() and decode() perform the actual conversions. It is okay for
26+
* the _len functions to return an overestimate, but not an underestimate.
27+
* (Having said that, large overestimates could cause unnecessary errors,
28+
* so it's better to get it right.) The conversion routines write to the
29+
* buffer at *res and return the true length of their output.
30+
*/
2131
struct pg_encoding
2232
{
23-
unsigned (*encode_len) (const char *data, unsigned dlen);
24-
unsigned (*decode_len) (const char *data, unsigned dlen);
25-
unsigned (*encode) (const char *data, unsigned dlen, char *res);
26-
unsigned (*decode) (const char *data, unsigned dlen, char *res);
33+
uint64 (*encode_len) (const char *data, size_t dlen);
34+
uint64 (*decode_len) (const char *data, size_t dlen);
35+
uint64 (*encode) (const char *data, size_t dlen, char *res);
36+
uint64 (*decode) (const char *data, size_t dlen, char *res);
2737
};
2838

2939
static const struct pg_encoding *pg_find_encoding(const char *name);
@@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS)
3949
Datum name = PG_GETARG_DATUM(1);
4050
text *result;
4151
char *namebuf;
42-
int datalen,
43-
resultlen,
44-
res;
52+
char *dataptr;
53+
size_t datalen;
54+
uint64 resultlen;
55+
uint64 res;
4556
const struct pg_encoding *enc;
4657

47-
datalen = VARSIZE_ANY_EXHDR(data);
48-
4958
namebuf = TextDatumGetCString(name);
5059

5160
enc = pg_find_encoding(namebuf);
@@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS)
5463
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5564
errmsg("unrecognized encoding: \"%s\"", namebuf)));
5665

57-
resultlen = enc->encode_len(VARDATA_ANY(data), datalen);
66+
dataptr = VARDATA_ANY(data);
67+
datalen = VARSIZE_ANY_EXHDR(data);
68+
69+
resultlen = enc->encode_len(dataptr, datalen);
70+
71+
/*
72+
* resultlen possibly overflows uint32, therefore on 32-bit machines it's
73+
* unsafe to rely on palloc's internal check.
74+
*/
75+
if (resultlen > MaxAllocSize - VARHDRSZ)
76+
ereport(ERROR,
77+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
78+
errmsg("result of encoding conversion is too large")));
79+
5880
result = palloc(VARHDRSZ + resultlen);
5981

60-
res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result));
82+
res = enc->encode(dataptr, datalen, VARDATA(result));
6183

6284
/* Make this FATAL 'cause we've trodden on memory ... */
6385
if (res > resultlen)
@@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS)
7597
Datum name = PG_GETARG_DATUM(1);
7698
bytea *result;
7799
char *namebuf;
78-
int datalen,
79-
resultlen,
80-
res;
100+
char *dataptr;
101+
size_t datalen;
102+
uint64 resultlen;
103+
uint64 res;
81104
const struct pg_encoding *enc;
82105

83-
datalen = VARSIZE_ANY_EXHDR(data);
84-
85106
namebuf = TextDatumGetCString(name);
86107

87108
enc = pg_find_encoding(namebuf);
@@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS)
90111
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
91112
errmsg("unrecognized encoding: \"%s\"", namebuf)));
92113

93-
resultlen = enc->decode_len(VARDATA_ANY(data), datalen);
114+
dataptr = VARDATA_ANY(data);
115+
datalen = VARSIZE_ANY_EXHDR(data);
116+
117+
resultlen = enc->decode_len(dataptr, datalen);
118+
119+
/*
120+
* resultlen possibly overflows uint32, therefore on 32-bit machines it's
121+
* unsafe to rely on palloc's internal check.
122+
*/
123+
if (resultlen > MaxAllocSize - VARHDRSZ)
124+
ereport(ERROR,
125+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
126+
errmsg("result of decoding conversion is too large")));
127+
94128
result = palloc(VARHDRSZ + resultlen);
95129

96-
res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result));
130+
res = enc->decode(dataptr, datalen, VARDATA(result));
97131

98132
/* Make this FATAL 'cause we've trodden on memory ... */
99133
if (res > resultlen)
@@ -122,8 +156,8 @@ static const int8 hexlookup[128] = {
122156
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123157
};
124158

125-
unsigned
126-
hex_encode(const char *src, unsigned len, char *dst)
159+
uint64
160+
hex_encode(const char *src, size_t len, char *dst)
127161
{
128162
const char *end = src + len;
129163

@@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst)
133167
*dst++ = hextbl[*src & 0xF];
134168
src++;
135169
}
136-
return len * 2;
170+
return (uint64) len * 2;
137171
}
138172

139173
static inline char
@@ -152,8 +186,8 @@ get_hex(char c)
152186
return (char) res;
153187
}
154188

155-
unsigned
156-
hex_decode(const char *src, unsigned len, char *dst)
189+
uint64
190+
hex_decode(const char *src, size_t len, char *dst)
157191
{
158192
const char *s,
159193
*srcend;
@@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst)
184218
return p - dst;
185219
}
186220

187-
static unsigned
188-
hex_enc_len(const char *src, unsigned srclen)
221+
static uint64
222+
hex_enc_len(const char *src, size_t srclen)
189223
{
190-
return srclen << 1;
224+
return (uint64) srclen << 1;
191225
}
192226

193-
static unsigned
194-
hex_dec_len(const char *src, unsigned srclen)
227+
static uint64
228+
hex_dec_len(const char *src, size_t srclen)
195229
{
196-
return srclen >> 1;
230+
return (uint64) srclen >> 1;
197231
}
198232

199233
/*
@@ -214,8 +248,8 @@ static const int8 b64lookup[128] = {
214248
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
215249
};
216250

217-
static unsigned
218-
pg_base64_encode(const char *src, unsigned len, char *dst)
251+
static uint64
252+
pg_base64_encode(const char *src, size_t len, char *dst)
219253
{
220254
char *p,
221255
*lend = dst + 76;
@@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst)
261295
return p - dst;
262296
}
263297

264-
static unsigned
265-
pg_base64_decode(const char *src, unsigned len, char *dst)
298+
static uint64
299+
pg_base64_decode(const char *src, size_t len, char *dst)
266300
{
267301
const char *srcend = src + len,
268302
*s = src;
@@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst)
331365
}
332366

333367

334-
static unsigned
335-
pg_base64_enc_len(const char *src, unsigned srclen)
368+
static uint64
369+
pg_base64_enc_len(const char *src, size_t srclen)
336370
{
337371
/* 3 bytes will be converted to 4, linefeed after 76 chars */
338-
return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
372+
return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4);
339373
}
340374

341-
static unsigned
342-
pg_base64_dec_len(const char *src, unsigned srclen)
375+
static uint64
376+
pg_base64_dec_len(const char *src, size_t srclen)
343377
{
344-
return (srclen * 3) >> 2;
378+
return ((uint64) srclen * 3) >> 2;
345379
}
346380

347381
/*
@@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen)
361395
#define VAL(CH) ((CH) - '0')
362396
#define DIG(VAL) ((VAL) + '0')
363397

364-
static unsigned
365-
esc_encode(const char *src, unsigned srclen, char *dst)
398+
static uint64
399+
esc_encode(const char *src, size_t srclen, char *dst)
366400
{
367401
const char *end = src + srclen;
368402
char *rp = dst;
369-
int len = 0;
403+
uint64 len = 0;
370404

371405
while (src < end)
372406
{
@@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst)
400434
return len;
401435
}
402436

403-
static unsigned
404-
esc_decode(const char *src, unsigned srclen, char *dst)
437+
static uint64
438+
esc_decode(const char *src, size_t srclen, char *dst)
405439
{
406440
const char *end = src + srclen;
407441
char *rp = dst;
408-
int len = 0;
442+
uint64 len = 0;
409443

410444
while (src < end)
411445
{
@@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst)
448482
return len;
449483
}
450484

451-
static unsigned
452-
esc_enc_len(const char *src, unsigned srclen)
485+
static uint64
486+
esc_enc_len(const char *src, size_t srclen)
453487
{
454488
const char *end = src + srclen;
455-
int len = 0;
489+
uint64 len = 0;
456490

457491
while (src < end)
458492
{
@@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen)
469503
return len;
470504
}
471505

472-
static unsigned
473-
esc_dec_len(const char *src, unsigned srclen)
506+
static uint64
507+
esc_dec_len(const char *src, size_t srclen)
474508
{
475509
const char *end = src + srclen;
476-
int len = 0;
510+
uint64 len = 0;
477511

478512
while (src < end)
479513
{

src/backend/utils/adt/varlena.c

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ byteaout(PG_FUNCTION_ARGS)
389389
{
390390
/* Print traditional escaped format */
391391
char *vp;
392-
int len;
392+
uint64 len;
393393
int i;
394394

395395
len = 1; /* empty string has 1 char */
@@ -403,7 +403,18 @@ byteaout(PG_FUNCTION_ARGS)
403403
else
404404
len++;
405405
}
406+
407+
/*
408+
* In principle len can't overflow uint32 if the input fit in 1GB, but
409+
* for safety let's check rather than relying on palloc's internal
410+
* check.
411+
*/
412+
if (len > MaxAllocSize)
413+
ereport(ERROR,
414+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
415+
errmsg_internal("result of bytea output conversion is too large")));
406416
rp = result = (char *) palloc(len);
417+
407418
vp = VARDATA_ANY(vlena);
408419
for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
409420
{
@@ -3456,22 +3467,23 @@ Datum
34563467
byteaGetBit(PG_FUNCTION_ARGS)
34573468
{
34583469
bytea *v = PG_GETARG_BYTEA_PP(0);
3459-
int32 n = PG_GETARG_INT32(1);
3470+
int64 n = PG_GETARG_INT64(1);
34603471
int byteNo,
34613472
bitNo;
34623473
int len;
34633474
int byte;
34643475

34653476
len = VARSIZE_ANY_EXHDR(v);
34663477

3467-
if (n < 0 || n >= len * 8)
3478+
if (n < 0 || n >= (int64) len * 8)
34683479
ereport(ERROR,
34693480
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3470-
errmsg("index %d out of valid range, 0..%d",
3471-
n, len * 8 - 1)));
3481+
errmsg("index %lld out of valid range, 0..%lld",
3482+
(long long) n, (long long) len * 8 - 1)));
34723483

3473-
byteNo = n / 8;
3474-
bitNo = n % 8;
3484+
/* n/8 is now known < len, so safe to cast to int */
3485+
byteNo = (int) (n / 8);
3486+
bitNo = (int) (n % 8);
34753487

34763488
byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
34773489

@@ -3525,7 +3537,7 @@ Datum
35253537
byteaSetBit(PG_FUNCTION_ARGS)
35263538
{
35273539
bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3528-
int32 n = PG_GETARG_INT32(1);
3540+
int64 n = PG_GETARG_INT64(1);
35293541
int32 newBit = PG_GETARG_INT32(2);
35303542
int len;
35313543
int oldByte,
@@ -3535,14 +3547,15 @@ byteaSetBit(PG_FUNCTION_ARGS)
35353547

35363548
len = VARSIZE(res) - VARHDRSZ;
35373549

3538-
if (n < 0 || n >= len * 8)
3550+
if (n < 0 || n >= (int64) len * 8)
35393551
ereport(ERROR,
35403552
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3541-
errmsg("index %d out of valid range, 0..%d",
3542-
n, len * 8 - 1)));
3553+
errmsg("index %lld out of valid range, 0..%lld",
3554+
(long long) n, (long long) len * 8 - 1)));
35433555

3544-
byteNo = n / 8;
3545-
bitNo = n % 8;
3556+
/* n/8 is now known < len, so safe to cast to int */
3557+
byteNo = (int) (n / 8);
3558+
bitNo = (int) (n % 8);
35463559

35473560
/*
35483561
* sanity check!

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 202004062
56+
#define CATALOG_VERSION_NO 202004071
5757

5858
#endif

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy