Skip to content

Commit 1230be1

Browse files
Fix re-distributing previously distributed invalidation messages during logical decoding.
Commit 4909b38 introduced logic to distribute invalidation messages from catalog-modifying transactions to all concurrent in-progress transactions. However, since each transaction distributes not only its original invalidation messages but also previously distributed messages to other transactions, this leads to an exponential increase in allocation request size for invalidation messages, ultimately causing memory allocation failure. This commit fixes this issue by tracking distributed invalidation messages separately per decoded transaction and not redistributing these messages to other in-progress transactions. The maximum size of distributed invalidation messages that one transaction can store is limited to MAX_DISTR_INVAL_MSG_PER_TXN (8MB). Once the size of the distributed invalidation messages exceeds this threshold, we invalidate all caches in locations where distributed invalidation messages need to be executed. Back-patch to all supported versions where we introduced the fix by commit 4909b38. Note that this commit adds two new fields to ReorderBufferTXN to store the distributed transactions. This change breaks ABI compatibility in back branches, affecting third-party extensions that depend on the size of the ReorderBufferTXN struct, though this scenario seems unlikely. Additionally, it adds a new flag to the txn_flags field of ReorderBufferTXN to indicate distributed invalidation message overflow. This should not affect existing implementations, as it is unlikely that third-party extensions use unused bits in the txn_flags field. Bug: #18938 #18942 Author: vignesh C <vignesh21@gmail.com> Reported-by: Duncan Sands <duncan.sands@deepbluecap.com> Reported-by: John Hutchins <john.hutchins@wicourts.gov> Reported-by: Laurence Parry <greenreaper@hotmail.com> Reported-by: Max Madden <maxmmadden@gmail.com> Reported-by: Braulio Fdo Gonzalez <brauliofg@gmail.com> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com> Discussion: https://postgr.es/m/680bdaf6-f7d1-4536-b580-05c2760c67c6@deepbluecap.com Discussion: https://postgr.es/m/18942-0ab1e5ae156613ad@postgresql.org Discussion: https://postgr.es/m/18938-57c9a1c463b68ce0@postgresql.org Discussion: https://postgr.es/m/CAD1FGCT2sYrP_70RTuo56QTizyc+J3wJdtn2gtO3VttQFpdMZg@mail.gmail.com Discussion: https://postgr.es/m/CANO2=B=2BT1hSYCE=nuuTnVTnjidMg0+-FfnRnqM6kd23qoygg@mail.gmail.com Backpatch-through: 13
1 parent dd3df0b commit 1230be1

File tree

5 files changed

+207
-30
lines changed

5 files changed

+207
-30
lines changed

contrib/test_decoding/expected/invalidation_distribution.out

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Parsed test spec with 2 sessions
1+
Parsed test spec with 3 sessions
22

33
starting permutation: s1_insert_tbl1 s1_begin s1_insert_tbl1 s2_alter_pub_add_tbl s1_commit s1_insert_tbl1 s2_get_binary_changes
44
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
@@ -18,3 +18,24 @@ count
1818
stop
1919
(1 row)
2020

21+
22+
starting permutation: s1_begin s1_insert_tbl1 s3_begin s3_insert_tbl1 s2_alter_pub_add_tbl s1_insert_tbl1 s1_commit s3_commit s2_get_binary_changes
23+
step s1_begin: BEGIN;
24+
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
25+
step s3_begin: BEGIN;
26+
step s3_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (2, 2);
27+
step s2_alter_pub_add_tbl: ALTER PUBLICATION pub ADD TABLE tbl1;
28+
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
29+
step s1_commit: COMMIT;
30+
step s3_commit: COMMIT;
31+
step s2_get_binary_changes: SELECT count(data) FROM pg_logical_slot_get_binary_changes('isolation_slot', NULL, NULL, 'proto_version', '1', 'publication_names', 'pub') WHERE get_byte(data, 0) = 73;
32+
count
33+
-----
34+
0
35+
(1 row)
36+
37+
?column?
38+
--------
39+
stop
40+
(1 row)
41+

contrib/test_decoding/specs/invalidation_distribution.spec

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,16 @@ setup { SET synchronous_commit=on; }
2828
step "s2_alter_pub_add_tbl" { ALTER PUBLICATION pub ADD TABLE tbl1; }
2929
step "s2_get_binary_changes" { SELECT count(data) FROM pg_logical_slot_get_binary_changes('isolation_slot', NULL, NULL, 'proto_version', '1', 'publication_names', 'pub') WHERE get_byte(data, 0) = 73; }
3030

31+
session "s3"
32+
setup { SET synchronous_commit=on; }
33+
step "s3_begin" { BEGIN; }
34+
step "s3_insert_tbl1" { INSERT INTO tbl1 (val1, val2) VALUES (2, 2); }
35+
step "s3_commit" { COMMIT; }
36+
3137
# Expect to get one insert change. LOGICAL_REP_MSG_INSERT = 'I'
3238
permutation "s1_insert_tbl1" "s1_begin" "s1_insert_tbl1" "s2_alter_pub_add_tbl" "s1_commit" "s1_insert_tbl1" "s2_get_binary_changes"
39+
40+
# Expect to get no change because both s1's and s3's transactions
41+
# use the snapshot from before adding the table tbl1 to the
42+
# publication by "s2_alter_pub_add_tbl".
43+
permutation "s1_begin" "s1_insert_tbl1" "s3_begin" "s3_insert_tbl1" "s2_alter_pub_add_tbl" "s1_insert_tbl1" "s1_commit" "s3_commit" "s2_get_binary_changes"

src/backend/replication/logical/reorderbuffer.c

Lines changed: 145 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,24 @@
103103
#include "storage/sinval.h"
104104
#include "utils/builtins.h"
105105
#include "utils/combocid.h"
106+
#include "utils/inval.h"
106107
#include "utils/memdebug.h"
107108
#include "utils/memutils.h"
108109
#include "utils/rel.h"
109110
#include "utils/relfilenodemap.h"
110111

111112

113+
/*
114+
* Each transaction has an 8MB limit for invalidation messages distributed from
115+
* other transactions. This limit is set considering scenarios with many
116+
* concurrent logical decoding operations. When the distributed invalidation
117+
* messages reach this threshold, the transaction is marked as
118+
* RBTXN_DISTR_INVAL_OVERFLOWED to invalidate the complete cache as we have lost
119+
* some inval messages and hence don't know what needs to be invalidated.
120+
*/
121+
#define MAX_DISTR_INVAL_MSG_PER_TXN \
122+
((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))
123+
112124
/* entry for a hash table we use to map from xid to our transaction state */
113125
typedef struct ReorderBufferTXNByIdEnt
114126
{
@@ -220,7 +232,8 @@ static void ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn,
220232
static ReorderBufferChange *ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state);
221233
static void ReorderBufferIterTXNFinish(ReorderBuffer *rb,
222234
ReorderBufferIterTXNState *state);
223-
static void ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTXN *txn);
235+
static void ReorderBufferExecuteInvalidations(uint32 nmsgs,
236+
SharedInvalidationMessage *msgs);
224237

225238
/*
226239
* ---------------------------------------
@@ -406,6 +419,12 @@ ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
406419
txn->invalidations = NULL;
407420
}
408421

422+
if (txn->invalidations_distributed)
423+
{
424+
pfree(txn->invalidations_distributed);
425+
txn->invalidations_distributed = NULL;
426+
}
427+
409428
/* Reset the toast hash */
410429
ReorderBufferToastReset(rb, txn);
411430

@@ -1883,7 +1902,8 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
18831902
* see new catalog contents, so execute all
18841903
* invalidations.
18851904
*/
1886-
ReorderBufferExecuteInvalidations(rb, txn);
1905+
ReorderBufferExecuteInvalidations(txn->ninvalidations,
1906+
txn->invalidations);
18871907
}
18881908

18891909
break;
@@ -1921,7 +1941,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
19211941
AbortCurrentTransaction();
19221942

19231943
/* make sure there's no cache pollution */
1924-
ReorderBufferExecuteInvalidations(rb, txn);
1944+
if (rbtxn_distr_inval_overflowed(txn))
1945+
{
1946+
Assert(txn->ninvalidations_distributed == 0);
1947+
InvalidateSystemCaches();
1948+
}
1949+
else
1950+
{
1951+
ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
1952+
ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed,
1953+
txn->invalidations_distributed);
1954+
}
19251955

19261956
if (using_subtxn)
19271957
RollbackAndReleaseCurrentSubTransaction();
@@ -1947,7 +1977,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
19471977
AbortCurrentTransaction();
19481978

19491979
/* make sure there's no cache pollution */
1950-
ReorderBufferExecuteInvalidations(rb, txn);
1980+
if (rbtxn_distr_inval_overflowed(txn))
1981+
{
1982+
Assert(txn->ninvalidations_distributed == 0);
1983+
InvalidateSystemCaches();
1984+
}
1985+
else
1986+
{
1987+
ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
1988+
ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed,
1989+
txn->invalidations_distributed);
1990+
}
19511991

19521992
if (using_subtxn)
19531993
RollbackAndReleaseCurrentSubTransaction();
@@ -2060,9 +2100,10 @@ ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
20602100
txn->final_lsn = lsn;
20612101

20622102
/*
2063-
* Process cache invalidation messages if there are any. Even if we're not
2064-
* interested in the transaction's contents, it could have manipulated the
2065-
* catalog and we need to update the caches according to that.
2103+
* Process only cache invalidation messages in this transaction if there
2104+
* are any. Even if we're not interested in the transaction's contents, it
2105+
* could have manipulated the catalog and we need to update the caches
2106+
* according to that.
20662107
*/
20672108
if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
20682109
ReorderBufferImmediateInvalidation(rb, txn->ninvalidations,
@@ -2253,6 +2294,36 @@ ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid,
22532294
txn->ntuplecids++;
22542295
}
22552296

2297+
/*
2298+
* A helper function for ReorderBufferAddInvalidations() and
2299+
* ReorderBufferAddDistributedInvalidations() to accumulate the invalidation
2300+
* messages to the **invals_out.
2301+
*/
2302+
static void
2303+
ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out,
2304+
uint32 *ninvals_out,
2305+
SharedInvalidationMessage *msgs_new,
2306+
Size nmsgs_new)
2307+
{
2308+
if (*ninvals_out == 0)
2309+
{
2310+
*ninvals_out = nmsgs_new;
2311+
*invals_out = (SharedInvalidationMessage *)
2312+
palloc(sizeof(SharedInvalidationMessage) * nmsgs_new);
2313+
memcpy(*invals_out, msgs_new, sizeof(SharedInvalidationMessage) * nmsgs_new);
2314+
}
2315+
else
2316+
{
2317+
/* Enlarge the array of inval messages */
2318+
*invals_out = (SharedInvalidationMessage *)
2319+
repalloc(*invals_out, sizeof(SharedInvalidationMessage) *
2320+
(*ninvals_out + nmsgs_new));
2321+
memcpy(*invals_out + *ninvals_out, msgs_new,
2322+
nmsgs_new * sizeof(SharedInvalidationMessage));
2323+
*ninvals_out += nmsgs_new;
2324+
}
2325+
}
2326+
22562327
/*
22572328
* Setup the invalidation of the toplevel transaction.
22582329
*
@@ -2282,24 +2353,74 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
22822353

22832354
Assert(nmsgs > 0);
22842355

2285-
/* Accumulate invalidations. */
2286-
if (txn->ninvalidations == 0)
2356+
ReorderBufferAccumulateInvalidations(&txn->invalidations,
2357+
&txn->ninvalidations,
2358+
msgs, nmsgs);
2359+
2360+
MemoryContextSwitchTo(oldcontext);
2361+
}
2362+
2363+
2364+
/*
2365+
* Accumulate the invalidations distributed by other committed transactions
2366+
* for executing them later.
2367+
*
2368+
* This function is similar to ReorderBufferAddInvalidations() but stores
2369+
* the given inval messages to the txn->invalidations_distributed with the
2370+
* overflow check.
2371+
*
2372+
* This needs to be called by committed transactions to distribute their
2373+
* inval messages to in-progress transactions.
2374+
*/
2375+
void
2376+
ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid,
2377+
XLogRecPtr lsn, Size nmsgs,
2378+
SharedInvalidationMessage *msgs)
2379+
{
2380+
ReorderBufferTXN *txn;
2381+
MemoryContext oldcontext;
2382+
2383+
txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
2384+
2385+
oldcontext = MemoryContextSwitchTo(rb->context);
2386+
2387+
/*
2388+
* Collect all the invalidations under the top transaction, if available,
2389+
* so that we can execute them all together.
2390+
*/
2391+
if (txn->toplevel_xid)
22872392
{
2288-
txn->ninvalidations = nmsgs;
2289-
txn->invalidations = (SharedInvalidationMessage *)
2290-
palloc(sizeof(SharedInvalidationMessage) * nmsgs);
2291-
memcpy(txn->invalidations, msgs,
2292-
sizeof(SharedInvalidationMessage) * nmsgs);
2393+
txn = ReorderBufferTXNByXid(rb, txn->toplevel_xid, true, NULL, lsn,
2394+
true);
22932395
}
2294-
else
2396+
2397+
Assert(nmsgs > 0);
2398+
2399+
if (!rbtxn_distr_inval_overflowed(txn))
22952400
{
2296-
txn->invalidations = (SharedInvalidationMessage *)
2297-
repalloc(txn->invalidations, sizeof(SharedInvalidationMessage) *
2298-
(txn->ninvalidations + nmsgs));
2401+
/*
2402+
* Check the transaction has enough space for storing distributed
2403+
* invalidation messages.
2404+
*/
2405+
if (txn->ninvalidations_distributed + nmsgs >= MAX_DISTR_INVAL_MSG_PER_TXN)
2406+
{
2407+
/*
2408+
* Mark the invalidation message as overflowed and free up the
2409+
* messages accumulated so far.
2410+
*/
2411+
txn->txn_flags |= RBTXN_DISTR_INVAL_OVERFLOWED;
22992412

2300-
memcpy(txn->invalidations + txn->ninvalidations, msgs,
2301-
nmsgs * sizeof(SharedInvalidationMessage));
2302-
txn->ninvalidations += nmsgs;
2413+
if (txn->invalidations_distributed)
2414+
{
2415+
pfree(txn->invalidations_distributed);
2416+
txn->invalidations_distributed = NULL;
2417+
txn->ninvalidations_distributed = 0;
2418+
}
2419+
}
2420+
else
2421+
ReorderBufferAccumulateInvalidations(&txn->invalidations_distributed,
2422+
&txn->ninvalidations_distributed,
2423+
msgs, nmsgs);
23032424
}
23042425

23052426
MemoryContextSwitchTo(oldcontext);
@@ -2310,12 +2431,12 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
23102431
* in the changestream but we don't know which those are.
23112432
*/
23122433
static void
2313-
ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTXN *txn)
2434+
ReorderBufferExecuteInvalidations(uint32 nmsgs, SharedInvalidationMessage *msgs)
23142435
{
23152436
int i;
23162437

2317-
for (i = 0; i < txn->ninvalidations; i++)
2318-
LocalExecuteInvalidationMessage(&txn->invalidations[i]);
2438+
for (i = 0; i < nmsgs; i++)
2439+
LocalExecuteInvalidationMessage(&msgs[i]);
23192440
}
23202441

23212442
/*

src/backend/replication/logical/snapbuild.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,13 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
927927
* contents built by the current transaction even after its decoding,
928928
* which should have been invalidated due to concurrent catalog
929929
* changing transaction.
930+
*
931+
* Distribute only the invalidation messages generated by the current
932+
* committed transaction. Invalidation messages received from other
933+
* transactions would have already been propagated to the relevant
934+
* in-progress transactions. This transaction would have processed
935+
* those invalidations, ensuring that subsequent transactions observe
936+
* a consistent cache state.
930937
*/
931938
if (txn->xid != xid)
932939
{
@@ -940,8 +947,9 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
940947
{
941948
Assert(msgs != NULL);
942949

943-
ReorderBufferAddInvalidations(builder->reorder, txn->xid, lsn,
944-
ninvalidations, msgs);
950+
ReorderBufferAddDistributedInvalidations(builder->reorder,
951+
txn->xid, lsn,
952+
ninvalidations, msgs);
945953
}
946954
}
947955
}

src/include/replication/reorderbuffer.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,10 @@ typedef struct ReorderBufferChange
160160
} ReorderBufferChange;
161161

162162
/* ReorderBufferTXN txn_flags */
163-
#define RBTXN_HAS_CATALOG_CHANGES 0x0001
164-
#define RBTXN_IS_SUBXACT 0x0002
165-
#define RBTXN_IS_SERIALIZED 0x0004
163+
#define RBTXN_HAS_CATALOG_CHANGES 0x0001
164+
#define RBTXN_IS_SUBXACT 0x0002
165+
#define RBTXN_IS_SERIALIZED 0x0004
166+
#define RBTXN_DISTR_INVAL_OVERFLOWED 0x0008
166167

167168
/* Does the transaction have catalog changes? */
168169
#define rbtxn_has_catalog_changes(txn) \
@@ -182,6 +183,12 @@ typedef struct ReorderBufferChange
182183
((txn)->txn_flags & RBTXN_IS_SERIALIZED) != 0 \
183184
)
184185

186+
/* Is the array of distributed inval messages overflowed? */
187+
#define rbtxn_distr_inval_overflowed(txn) \
188+
( \
189+
((txn)->txn_flags & RBTXN_DISTR_INVAL_OVERFLOWED) != 0 \
190+
)
191+
185192
typedef struct ReorderBufferTXN
186193
{
187194
/* See above */
@@ -311,6 +318,12 @@ typedef struct ReorderBufferTXN
311318
* Size of this transaction (changes currently in memory, in bytes).
312319
*/
313320
Size size;
321+
322+
/*
323+
* Stores cache invalidation messages distributed by other transactions.
324+
*/
325+
uint32 ninvalidations_distributed;
326+
SharedInvalidationMessage *invalidations_distributed;
314327
} ReorderBufferTXN;
315328

316329
/* so we can define the callbacks used inside struct ReorderBuffer itself */
@@ -451,6 +464,9 @@ void ReorderBufferAddNewTupleCids(ReorderBuffer *, TransactionId, XLogRecPtr ls
451464
CommandId cmin, CommandId cmax, CommandId combocid);
452465
void ReorderBufferAddInvalidations(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
453466
Size nmsgs, SharedInvalidationMessage *msgs);
467+
void ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid,
468+
XLogRecPtr lsn, Size nmsgs,
469+
SharedInvalidationMessage *msgs);
454470
void ReorderBufferImmediateInvalidation(ReorderBuffer *, uint32 ninvalidations,
455471
SharedInvalidationMessage *invalidations);
456472
void ReorderBufferProcessXid(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy