Content-Length: 496783 | pFad | https://github.com/postgrespro/postgres/commit/f6c5edb8abcac04eb3eac6da356e59d399b2bcef

99 Drop replication origen slots before tablesync worker exits. · postgrespro/postgres@f6c5edb · GitHub
Skip to content

Commit f6c5edb

Browse files
author
Amit Kapila
committed
Drop replication origen slots before tablesync worker exits.
Currently, the replication origen tracking of the tablesync worker is dropped by the apply worker. So, there will be a small lag between the tablesync worker exit and its origen tracking got removed. In the meantime, new tablesync workers can be launched and will try to set up a new origen tracking. This can lead the system to reach max configured limit (max_replication_slots) even if the user has configured the max limit considering the number of tablesync workers required in the system. We decided not to back-patch as this can occur in very narrow circumstances and users have to option to increase the configured limit by increasing max_replication_slots. Reported-by: Hubert Depesz Lubaczewski Author: Ajin Cherian Reviwed-by: Masahiko Sawada, Peter Smith, Hou Zhijie, Amit Kapila Discussion: https://postgr.es/m/20220714115155.GA5439@depesz.com
1 parent 8654246 commit f6c5edb

File tree

2 files changed

+41
-34
lines changed

2 files changed

+41
-34
lines changed

src/backend/commands/subscriptioncmds.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -919,22 +919,19 @@ AlterSubscription_refresh(Subscription *sub, bool copy_data,
919919
logicalrep_worker_stop(sub->oid, relid);
920920

921921
/*
922-
* For READY state, we would have already dropped the
923-
* tablesync origen.
922+
* For READY state and SYNCDONE state, we would have already
923+
* dropped the tablesync origen.
924924
*/
925-
if (state != SUBREL_STATE_READY)
925+
if (state != SUBREL_STATE_READY && state != SUBREL_STATE_SYNCDONE)
926926
{
927927
char origenname[NAMEDATALEN];
928928

929929
/*
930930
* Drop the tablesync's origen tracking if exists.
931931
*
932932
* It is possible that the origen is not yet created for
933-
* tablesync worker, this can happen for the states before
934-
* SUBREL_STATE_FINISHEDCOPY. The apply worker can also
935-
* concurrently try to drop the origen and by this time
936-
* the origen might be already removed. For these reasons,
937-
* passing missing_ok = true.
933+
* tablesync worker so passing missing_ok = true. This can
934+
* happen for the states before SUBREL_STATE_FINISHEDCOPY.
938935
*/
939936
ReplicationOriginNameForTablesync(sub->oid, relid, origenname,
940937
sizeof(origenname));
@@ -1507,13 +1504,19 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
15071504
/*
15081505
* Drop the tablesync's origen tracking if exists.
15091506
*
1507+
* For SYNCDONE/READY states, the tablesync origen tracking is known
1508+
* to have already been dropped by the tablesync worker.
1509+
*
15101510
* It is possible that the origen is not yet created for tablesync
15111511
* worker so passing missing_ok = true. This can happen for the states
15121512
* before SUBREL_STATE_FINISHEDCOPY.
15131513
*/
1514-
ReplicationOriginNameForTablesync(subid, relid, origenname,
1515-
sizeof(origenname));
1516-
replorigen_drop_by_name(origenname, true, false);
1514+
if (rstate->state != SUBREL_STATE_SYNCDONE)
1515+
{
1516+
ReplicationOriginNameForTablesync(subid, relid, origenname,
1517+
sizeof(origenname));
1518+
replorigen_drop_by_name(origenname, true, false);
1519+
}
15171520
}
15181521

15191522
/* Clean up dependencies */

src/backend/replication/logical/tablesync.c

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ process_syncing_tables_for_sync(XLogRecPtr current_lsn)
291291
{
292292
TimeLineID tli;
293293
char syncslotname[NAMEDATALEN] = {0};
294+
char origenname[NAMEDATALEN] = {0};
294295

295296
MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCDONE;
296297
MyLogicalRepWorker->relstate_lsn = current_lsn;
@@ -309,6 +310,30 @@ process_syncing_tables_for_sync(XLogRecPtr current_lsn)
309310
MyLogicalRepWorker->relstate,
310311
MyLogicalRepWorker->relstate_lsn);
311312

313+
/*
314+
* Cleanup the tablesync origen tracking.
315+
*
316+
* Resetting the origen session removes the ownership of the slot.
317+
* This is needed to allow the origen to be dropped.
318+
*/
319+
ReplicationOriginNameForTablesync(MyLogicalRepWorker->subid,
320+
MyLogicalRepWorker->relid,
321+
origenname,
322+
sizeof(origenname));
323+
replorigen_session_reset();
324+
replorigen_session_origen = InvalidRepOriginId;
325+
replorigen_session_origen_lsn = InvalidXLogRecPtr;
326+
replorigen_session_origen_timestamp = 0;
327+
328+
/*
329+
* We expect that origen must be present. The concurrent operations
330+
* that remove origen like a refresh for the subscription take an
331+
* access exclusive lock on pg_subscription which prevent the previous
332+
* operation to update the rel state to SUBREL_STATE_SYNCDONE to
333+
* succeed.
334+
*/
335+
replorigen_drop_by_name(origenname, false, false);
336+
312337
/*
313338
* End streaming so that LogRepWorkerWalRcvConn can be used to drop
314339
* the slot.
@@ -318,7 +343,7 @@ process_syncing_tables_for_sync(XLogRecPtr current_lsn)
318343
/*
319344
* Cleanup the tablesync slot.
320345
*
321-
* This has to be done after updating the state because otherwise if
346+
* This has to be done after the data changes because otherwise if
322347
* there is an error while doing the database operations we won't be
323348
* able to rollback dropped slot.
324349
*/
@@ -441,8 +466,6 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
441466
*/
442467
if (current_lsn >= rstate->lsn)
443468
{
444-
char origenname[NAMEDATALEN];
445-
446469
rstate->state = SUBREL_STATE_READY;
447470
rstate->lsn = current_lsn;
448471
if (!started_tx)
@@ -452,26 +475,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
452475
}
453476

454477
/*
455-
* Remove the tablesync origen tracking if exists.
456-
*
457-
* The normal case origen drop is done here instead of in the
458-
* process_syncing_tables_for_sync function because we don't
459-
* allow to drop the origen till the process owning the origen
460-
* is alive.
461-
*
462-
* There is a chance that the user is concurrently performing
463-
* refresh for the subscription where we remove the table
464-
* state and its origen and by this time the origen might be
465-
* already removed. So passing missing_ok = true.
466-
*/
467-
ReplicationOriginNameForTablesync(MyLogicalRepWorker->subid,
468-
rstate->relid,
469-
origenname,
470-
sizeof(origenname));
471-
replorigen_drop_by_name(origenname, true, false);
472-
473-
/*
474-
* Update the state to READY only after the origen cleanup.
478+
* Update the state to READY.
475479
*/
476480
UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
477481
rstate->relid, rstate->state,

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/postgrespro/postgres/commit/f6c5edb8abcac04eb3eac6da356e59d399b2bcef

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy