Skip to content

Commit d9f01a2

Browse files
author
Amit Kapila
committed
Fix a deadlock during ALTER SUBSCRIPTION ... DROP PUBLICATION.
A deadlock can occur when the DDL command and the apply worker acquire catalog locks in different orders while dropping replication origins. The issue is rare in PG16 and higher branches because, in most cases, the tablesync worker performs the origin drop in those branches, and its locking sequence does not conflict with DDL operations. This patch ensures consistent lock acquisition to prevent such deadlocks. As per buildfarm. Reported-by: Alexander Lakhin <exclusion@gmail.com> Author: Ajin Cherian <itsajin@gmail.com> Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: vignesh C <vignesh21@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 14, where it was introduced Discussion: https://postgr.es/m/bab95e12-6cc5-4ebb-80a8-3e41956aa297@gmail.com
1 parent 8891433 commit d9f01a2

File tree

3 files changed

+49
-8
lines changed

3 files changed

+49
-8
lines changed

src/backend/catalog/pg_subscription.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,17 +319,32 @@ AddSubscriptionRelState(Oid subid, Oid relid, char state,
319319
*/
320320
void
321321
UpdateSubscriptionRelState(Oid subid, Oid relid, char state,
322-
XLogRecPtr sublsn)
322+
XLogRecPtr sublsn, bool already_locked)
323323
{
324324
Relation rel;
325325
HeapTuple tup;
326326
bool nulls[Natts_pg_subscription_rel];
327327
Datum values[Natts_pg_subscription_rel];
328328
bool replaces[Natts_pg_subscription_rel];
329329

330-
LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock);
330+
if (already_locked)
331+
{
332+
#ifdef USE_ASSERT_CHECKING
333+
LOCKTAG tag;
331334

332-
rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
335+
Assert(CheckRelationOidLockedByMe(SubscriptionRelRelationId,
336+
RowExclusiveLock, true));
337+
SET_LOCKTAG_OBJECT(tag, InvalidOid, SubscriptionRelationId, subid, 0);
338+
Assert(LockHeldByMe(&tag, AccessShareLock, true));
339+
#endif
340+
341+
rel = table_open(SubscriptionRelRelationId, NoLock);
342+
}
343+
else
344+
{
345+
LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock);
346+
rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
347+
}
333348

334349
/* Try finding existing mapping. */
335350
tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP,

src/backend/replication/logical/tablesync.c

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,8 @@ process_syncing_tables_for_sync(XLogRecPtr current_lsn)
316316
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
317317
MyLogicalRepWorker->relid,
318318
MyLogicalRepWorker->relstate,
319-
MyLogicalRepWorker->relstate_lsn);
319+
MyLogicalRepWorker->relstate_lsn,
320+
false);
320321

321322
/*
322323
* End streaming so that LogRepWorkerWalRcvConn can be used to drop
@@ -425,6 +426,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
425426
ListCell *lc;
426427
bool started_tx = false;
427428
bool should_exit = false;
429+
Relation rel = NULL;
428430

429431
Assert(!IsTransactionState());
430432

@@ -492,7 +494,17 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
492494
* worker to remove the origin tracking as if there is any
493495
* error while dropping we won't restart it to drop the
494496
* origin. So passing missing_ok = true.
497+
*
498+
* Lock the subscription and origin in the same order as we
499+
* are doing during DDL commands to avoid deadlocks. See
500+
* AlterSubscription_refresh.
495501
*/
502+
LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid,
503+
0, AccessShareLock);
504+
505+
if (!rel)
506+
rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
507+
496508
ReplicationOriginNameForLogicalRep(MyLogicalRepWorker->subid,
497509
rstate->relid,
498510
originname,
@@ -504,7 +516,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
504516
*/
505517
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
506518
rstate->relid, rstate->state,
507-
rstate->lsn);
519+
rstate->lsn, true);
508520
}
509521
}
510522
else
@@ -555,7 +567,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
555567
* This is required to avoid any undetected deadlocks
556568
* due to any existing lock as deadlock detector won't
557569
* be able to detect the waits on the latch.
570+
*
571+
* Also close any tables prior to the commit.
558572
*/
573+
if (rel)
574+
{
575+
table_close(rel, NoLock);
576+
rel = NULL;
577+
}
559578
CommitTransactionCommand();
560579
pgstat_report_stat(false);
561580
}
@@ -622,6 +641,11 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
622641
}
623642
}
624643

644+
/* Close table if opened */
645+
if (rel)
646+
table_close(rel, NoLock);
647+
648+
625649
if (started_tx)
626650
{
627651
/*
@@ -1413,7 +1437,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
14131437
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
14141438
MyLogicalRepWorker->relid,
14151439
MyLogicalRepWorker->relstate,
1416-
MyLogicalRepWorker->relstate_lsn);
1440+
MyLogicalRepWorker->relstate_lsn,
1441+
false);
14171442
CommitTransactionCommand();
14181443
pgstat_report_stat(true);
14191444

@@ -1546,7 +1571,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
15461571
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
15471572
MyLogicalRepWorker->relid,
15481573
SUBREL_STATE_FINISHEDCOPY,
1549-
MyLogicalRepWorker->relstate_lsn);
1574+
MyLogicalRepWorker->relstate_lsn,
1575+
false);
15501576

15511577
CommitTransactionCommand();
15521578

src/include/catalog/pg_subscription_rel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ typedef struct SubscriptionRelState
8585
extern void AddSubscriptionRelState(Oid subid, Oid relid, char state,
8686
XLogRecPtr sublsn, bool retain_lock);
8787
extern void UpdateSubscriptionRelState(Oid subid, Oid relid, char state,
88-
XLogRecPtr sublsn);
88+
XLogRecPtr sublsn, bool already_locked);
8989
extern char GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn);
9090
extern void RemoveSubscriptionRel(Oid subid, Oid relid);
9191

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy