Skip to content

Commit 6aebedc

Browse files
committed
Grab the low-hanging fruit from forcing sizeof(Datum) to 8.
Remove conditionally-compiled code for smaller Datum widths, and simplify comments that describe cases no longer of interest. I also fixed up a few more places that were not using DatumGetIntXX where they should, and made some cosmetic adjustments such as using sizeof(int64) not sizeof(Datum) in places where that fit better with the surrounding code. One thing I remembered while preparing this part is that SP-GiST stores pass-by-value prefix keys as Datums, so that the on-disk representation depends on sizeof(Datum). That's even more unfortunate than the existing commentary makes it out to be, because now there is a hazard that the change of sizeof(Datum) will break SP-GiST indexes on 32-bit machines. It appears that there are no existing SP-GiST opclasses that are actually affected; and if there are some that I didn't find, the number of installations that are using them on 32-bit machines is doubtless tiny. So I'm proceeding on the assumption that we can get away with this, but it's something to worry about. (gininsert.c looks like it has a similar problem, but it's okay because the "tuples" it's constructing are just transient data within the tuplesort step. That's pretty poorly documented though, so I added some comments.) Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us
1 parent 2a600a9 commit 6aebedc

File tree

17 files changed

+72
-284
lines changed

17 files changed

+72
-284
lines changed

doc/src/sgml/xfunc.sgml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2051,8 +2051,7 @@ PG_MODULE_MAGIC_EXT(
20512051
</para>
20522052

20532053
<para>
2054-
By-value types can only be 1, 2, or 4 bytes in length
2055-
(also 8 bytes, if <literal>sizeof(Datum)</literal> is 8 on your machine).
2054+
By-value types can only be 1, 2, 4, or 8 bytes in length.
20562055
You should be careful to define your types such that they will be the
20572056
same size (in bytes) on all architectures. For example, the
20582057
<literal>long</literal> type is dangerous because it is 4 bytes on some

src/backend/access/gin/gininsert.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2189,7 +2189,10 @@ typedef struct
21892189
* we simply copy the whole Datum, so that we don't have to care about stuff
21902190
* like endianess etc. We could make it a little bit smaller, but it's not
21912191
* worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
2192-
* start of the TID list anyway. So we wouldn't save anything.
2192+
* start of the TID list anyway. So we wouldn't save anything. (This would
2193+
* not be a good idea for the permanent in-index data, since we'd prefer
2194+
* that that not depend on sizeof(Datum). But this is just a transient
2195+
* representation to use while sorting the data.)
21932196
*
21942197
* The TID list is serialized as compressed - it's highly compressible, and
21952198
* we already have ginCompressPostingList for this purpose. The list may be

src/backend/access/gist/gistproc.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,8 +1707,8 @@ gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup)
17071707
* Abbreviated version of Z-order comparison
17081708
*
17091709
* The abbreviated format is a Z-order value computed from the two 32-bit
1710-
* floats. If SIZEOF_DATUM == 8, the 64-bit Z-order value fits fully in the
1711-
* abbreviated Datum, otherwise use its most significant bits.
1710+
* floats. Now that sizeof(Datum) is always 8, the 64-bit Z-order value
1711+
* always fits fully in the abbreviated Datum.
17121712
*/
17131713
static Datum
17141714
gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
@@ -1718,11 +1718,7 @@ gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
17181718

17191719
z = point_zorder_internal(p->x, p->y);
17201720

1721-
#if SIZEOF_DATUM == 8
1722-
return (Datum) z;
1723-
#else
1724-
return (Datum) (z >> 32);
1725-
#endif
1721+
return UInt64GetDatum(z);
17261722
}
17271723

17281724
/*

src/backend/access/nbtree/nbtcompare.c

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -278,32 +278,12 @@ btint8cmp(PG_FUNCTION_ARGS)
278278
PG_RETURN_INT32(A_LESS_THAN_B);
279279
}
280280

281-
#if SIZEOF_DATUM < 8
282-
static int
283-
btint8fastcmp(Datum x, Datum y, SortSupport ssup)
284-
{
285-
int64 a = DatumGetInt64(x);
286-
int64 b = DatumGetInt64(y);
287-
288-
if (a > b)
289-
return A_GREATER_THAN_B;
290-
else if (a == b)
291-
return 0;
292-
else
293-
return A_LESS_THAN_B;
294-
}
295-
#endif
296-
297281
Datum
298282
btint8sortsupport(PG_FUNCTION_ARGS)
299283
{
300284
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
301285

302-
#if SIZEOF_DATUM >= 8
303286
ssup->comparator = ssup_datum_signed_cmp;
304-
#else
305-
ssup->comparator = btint8fastcmp;
306-
#endif
307287
PG_RETURN_VOID();
308288
}
309289

src/backend/catalog/pg_type.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,16 +285,14 @@ TypeCreate(Oid newTypeOid,
285285
errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d",
286286
alignment, internalSize)));
287287
}
288-
#if SIZEOF_DATUM == 8
289-
else if (internalSize == (int16) sizeof(Datum))
288+
else if (internalSize == (int16) sizeof(int64))
290289
{
291290
if (alignment != TYPALIGN_DOUBLE)
292291
ereport(ERROR,
293292
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
294293
errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d",
295294
alignment, internalSize)));
296295
}
297-
#endif
298296
else
299297
ereport(ERROR,
300298
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),

src/backend/utils/adt/mac.c

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -481,33 +481,26 @@ macaddr_abbrev_convert(Datum original, SortSupport ssup)
481481
Datum res;
482482

483483
/*
484-
* On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
485-
* the MAC address in. There will be two bytes of zero padding on the end
486-
* of the least significant bits.
484+
* Zero out the 8-byte Datum and copy in the 6 bytes of the MAC address.
485+
* There will be two bytes of zero padding on the end of the least
486+
* significant bits.
487487
*/
488-
#if SIZEOF_DATUM == 8
489-
memset(&res, 0, SIZEOF_DATUM);
488+
StaticAssertStmt(sizeof(res) >= sizeof(macaddr),
489+
"Datum is too small for macaddr");
490+
memset(&res, 0, sizeof(res));
490491
memcpy(&res, authoritative, sizeof(macaddr));
491-
#else /* SIZEOF_DATUM != 8 */
492-
memcpy(&res, authoritative, SIZEOF_DATUM);
493-
#endif
494492
uss->input_count += 1;
495493

496494
/*
497-
* Cardinality estimation. The estimate uses uint32, so on a 64-bit
498-
* architecture, XOR the two 32-bit halves together to produce slightly
499-
* more entropy. The two zeroed bytes won't have any practical impact on
500-
* this operation.
495+
* Cardinality estimation. The estimate uses uint32, so XOR the two 32-bit
496+
* halves together to produce slightly more entropy. The two zeroed bytes
497+
* won't have any practical impact on this operation.
501498
*/
502499
if (uss->estimating)
503500
{
504501
uint32 tmp;
505502

506-
#if SIZEOF_DATUM == 8
507-
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
508-
#else /* SIZEOF_DATUM != 8 */
509-
tmp = (uint32) res;
510-
#endif
503+
tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
511504

512505
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
513506
}

src/backend/utils/adt/network.c

Lines changed: 12 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -567,24 +567,11 @@ network_abbrev_abort(int memtupcount, SortSupport ssup)
567567
*
568568
* When generating abbreviated keys for SortSupport, we pack as much as we can
569569
* into a datum while ensuring that when comparing those keys as integers,
570-
* these rules will be respected. Exact contents depend on IP family and datum
571-
* size.
570+
* these rules will be respected. Exact contents depend on IP family:
572571
*
573572
* IPv4
574573
* ----
575574
*
576-
* 4 byte datums:
577-
*
578-
* Start with 1 bit for the IP family (IPv4 or IPv6; this bit is present in
579-
* every case below) followed by all but 1 of the netmasked bits.
580-
*
581-
* +----------+---------------------+
582-
* | 1 bit IP | 31 bits network | (1 bit network
583-
* | family | (truncated) | omitted)
584-
* +----------+---------------------+
585-
*
586-
* 8 byte datums:
587-
*
588575
* We have space to store all netmasked bits, followed by the netmask size,
589576
* followed by 25 bits of the subnet (25 bits is usually more than enough in
590577
* practice). cidr datums always have all-zero subnet bits.
@@ -597,15 +584,6 @@ network_abbrev_abort(int memtupcount, SortSupport ssup)
597584
* IPv6
598585
* ----
599586
*
600-
* 4 byte datums:
601-
*
602-
* +----------+---------------------+
603-
* | 1 bit IP | 31 bits network | (up to 97 bits
604-
* | family | (truncated) | network omitted)
605-
* +----------+---------------------+
606-
*
607-
* 8 byte datums:
608-
*
609587
* +----------+---------------------------------+
610588
* | 1 bit IP | 63 bits network | (up to 65 bits
611589
* | family | (truncated) | network omitted)
@@ -628,8 +606,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
628606
/*
629607
* Get an unsigned integer representation of the IP address by taking its
630608
* first 4 or 8 bytes. Always take all 4 bytes of an IPv4 address. Take
631-
* the first 8 bytes of an IPv6 address with an 8 byte datum and 4 bytes
632-
* otherwise.
609+
* the first 8 bytes of an IPv6 address.
633610
*
634611
* We're consuming an array of unsigned char, so byteswap on little endian
635612
* systems (an inet's ipaddr field stores the most significant byte
@@ -659,7 +636,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
659636
ipaddr_datum = DatumBigEndianToNative(ipaddr_datum);
660637

661638
/* Initialize result with ipfamily (most significant) bit set */
662-
res = ((Datum) 1) << (SIZEOF_DATUM * BITS_PER_BYTE - 1);
639+
res = ((Datum) 1) << (sizeof(Datum) * BITS_PER_BYTE - 1);
663640
}
664641

665642
/*
@@ -668,8 +645,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
668645
* while low order bits go in "subnet" component when there is space for
669646
* one. This is often accomplished by generating a temp datum subnet
670647
* bitmask, which we may reuse later when generating the subnet bits
671-
* themselves. (Note that subnet bits are only used with IPv4 datums on
672-
* platforms where datum is 8 bytes.)
648+
* themselves.
673649
*
674650
* The number of bits in subnet is used to generate a datum subnet
675651
* bitmask. For example, with a /24 IPv4 datum there are 8 subnet bits
@@ -681,14 +657,14 @@ network_abbrev_convert(Datum original, SortSupport ssup)
681657
subnet_size = ip_maxbits(authoritative) - ip_bits(authoritative);
682658
Assert(subnet_size >= 0);
683659
/* subnet size must work with prefix ipaddr cases */
684-
subnet_size %= SIZEOF_DATUM * BITS_PER_BYTE;
660+
subnet_size %= sizeof(Datum) * BITS_PER_BYTE;
685661
if (ip_bits(authoritative) == 0)
686662
{
687663
/* Fit as many ipaddr bits as possible into subnet */
688664
subnet_bitmask = ((Datum) 0) - 1;
689665
network = 0;
690666
}
691-
else if (ip_bits(authoritative) < SIZEOF_DATUM * BITS_PER_BYTE)
667+
else if (ip_bits(authoritative) < sizeof(Datum) * BITS_PER_BYTE)
692668
{
693669
/* Split ipaddr bits between network and subnet */
694670
subnet_bitmask = (((Datum) 1) << subnet_size) - 1;
@@ -701,12 +677,11 @@ network_abbrev_convert(Datum original, SortSupport ssup)
701677
network = ipaddr_datum;
702678
}
703679

704-
#if SIZEOF_DATUM == 8
705680
if (ip_family(authoritative) == PGSQL_AF_INET)
706681
{
707682
/*
708-
* IPv4 with 8 byte datums: keep all 32 netmasked bits, netmask size,
709-
* and most significant 25 subnet bits
683+
* IPv4: keep all 32 netmasked bits, netmask size, and most
684+
* significant 25 subnet bits
710685
*/
711686
Datum netmask_size = (Datum) ip_bits(authoritative);
712687
Datum subnet;
@@ -750,12 +725,11 @@ network_abbrev_convert(Datum original, SortSupport ssup)
750725
res |= network | netmask_size | subnet;
751726
}
752727
else
753-
#endif
754728
{
755729
/*
756-
* 4 byte datums, or IPv6 with 8 byte datums: Use as many of the
757-
* netmasked bits as will fit in final abbreviated key. Avoid
758-
* clobbering the ipfamily bit that was set earlier.
730+
* IPv6: Use as many of the netmasked bits as will fit in final
731+
* abbreviated key. Avoid clobbering the ipfamily bit that was set
732+
* earlier.
759733
*/
760734
res |= network >> 1;
761735
}
@@ -767,11 +741,7 @@ network_abbrev_convert(Datum original, SortSupport ssup)
767741
{
768742
uint32 tmp;
769743

770-
#if SIZEOF_DATUM == 8
771-
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
772-
#else /* SIZEOF_DATUM != 8 */
773-
tmp = (uint32) res;
774-
#endif
744+
tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
775745

776746
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
777747
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy