Skip to content

Commit f204c05

Browse files
committed
fix more races in referee mode
1 parent 756c6e9 commit f204c05

File tree

1 file changed

+29
-15
lines changed

1 file changed

+29
-15
lines changed

state.c

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ MtmRefreshClusterStatus()
395395

396396
/*
397397
* Check for referee decision when only half of nodes are visible.
398+
* Do not hold lock here, but recheck later wheter mask changed.
398399
*/
399400
if (MtmRefereeConnStr && *MtmRefereeConnStr && !Mtm->refereeWinnerId &&
400401
countZeroBits(SELF_CONNECTIVITY_MASK, Mtm->nAllNodes) == Mtm->nAllNodes/2)
@@ -406,29 +407,40 @@ MtmRefreshClusterStatus()
406407
Mtm->refereeWinnerId = winner_node_id;
407408
if (!BIT_CHECK(SELF_CONNECTIVITY_MASK, winner_node_id - 1))
408409
{
409-
MTM_LOG1("[STATE] Referee allowed to proceed with half of the nodes (winner_id = %d)",
410-
winner_node_id);
411-
Mtm->refereeGrant = true;
410+
/*
411+
* By the time we enter this block we can already see other nodes.
412+
* So recheck old conditions under lock.
413+
*/
412414
MtmLock(LW_EXCLUSIVE);
413-
if (countZeroBits(SELF_CONNECTIVITY_MASK, Mtm->nAllNodes) == 1)
415+
if (countZeroBits(SELF_CONNECTIVITY_MASK, Mtm->nAllNodes) == Mtm->nAllNodes/2 &&
416+
!BIT_CHECK(SELF_CONNECTIVITY_MASK, winner_node_id - 1))
414417
{
415-
// XXXX: that is valid for two nodes. Better idea is to parametrize MtmPollStatus*
416-
// functions.
417-
int neighbor_node_id = MtmNodeId == 1 ? 2 : 1;
418-
MtmPollStatusOfPreparedTransactionsForDisabledNode(neighbor_node_id, true);
418+
MTM_LOG1("[STATE] Referee allowed to proceed with half of the nodes (winner_id = %d)",
419+
winner_node_id);
420+
Mtm->refereeGrant = true;
421+
if (countZeroBits(SELF_CONNECTIVITY_MASK, Mtm->nAllNodes) == 1)
422+
{
423+
// XXXX: that is valid for two nodes. Better idea is to parametrize MtmPollStatus*
424+
// functions.
425+
int neighbor_node_id = MtmNodeId == 1 ? 2 : 1;
426+
MtmPollStatusOfPreparedTransactionsForDisabledNode(neighbor_node_id, true);
427+
}
428+
MtmEnableNode(MtmNodeId);
429+
MtmCheckState();
419430
}
420-
MtmEnableNode(MtmNodeId);
421-
MtmCheckState();
422431
MtmUnlock();
423432
}
424433
}
425434
}
426435

427436
/*
428-
* Clear winner if we again have all nodes online.
437+
* Clear winner if we again have all nodes recovered.
438+
* We should clean old value based on disabledNodeMask instead of SELF_CONNECTIVITY_MASK
439+
* because we can clean old value before failed node starts it recovery and that node
440+
* can get refereeGrant before start of walsender, so it start in recovered mode.
429441
*/
430-
if (MtmRefereeConnStr && *MtmRefereeConnStr && Mtm->refereeWinnerId &&
431-
countZeroBits(SELF_CONNECTIVITY_MASK, Mtm->nAllNodes) == Mtm->nAllNodes)
442+
if (MtmRefereeConnStr && *MtmRefereeConnStr && Mtm->refereeWinnerId &&
443+
countZeroBits(Mtm->disabledNodeMask, Mtm->nAllNodes) == Mtm->nAllNodes)
432444
{
433445
if (MtmRefereeClearWinner())
434446
{
@@ -438,8 +450,10 @@ MtmRefreshClusterStatus()
438450
}
439451
}
440452

441-
/* Do not check clique with referee grant */
442-
if (Mtm->refereeWinnerId)
453+
/*
454+
* Do not check clique with referee grant, because we can disable ourself.
455+
*/
456+
if (Mtm->refereeGrant)
443457
return;
444458

445459
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy