Skip to content

Commit ed8b893

Browse files
committed
Start rewriting multimaster to 2PC
1 parent 9972466 commit ed8b893

File tree

4 files changed

+51
-151
lines changed

4 files changed

+51
-151
lines changed

contrib/mmts/arbiter.c

Lines changed: 14 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ static void MtmAppendBuffer(MtmBuffer* txBuffer, TransactionId xid, int node, Mt
458458
MTM_TRACE("Send message %s CSN=%ld to node %d from node %d for global transaction %d/local transaction %d\n",
459459
messageText[ts->cmd], ts->csn, node+1, MtmNodeId, ts->gtid.xid, ts->xid);
460460
Assert(ts->cmd != MSG_INVALID);
461-
buf->data[buf->used].code = ts->cmd;
461+
buf->data[buf->used].code = ts->status == TRANSACTION_STATUS_ABORTED ? MSG_ABORTED : MSG_PREPARED;
462462
buf->data[buf->used].dxid = xid;
463463
buf->data[buf->used].sxid = ts->xid;
464464
buf->data[buf->used].csn = ts->csn;
@@ -467,21 +467,6 @@ static void MtmAppendBuffer(MtmBuffer* txBuffer, TransactionId xid, int node, Mt
467467
buf->used += 1;
468468
}
469469

470-
static void MtmBroadcastMessage(MtmBuffer* txBuffer, MtmTransState* ts)
471-
{
472-
int i;
473-
int n = 1;
474-
for (i = 0; i < MtmNodes; i++)
475-
{
476-
if (TransactionIdIsValid(ts->xids[i])) {
477-
Assert(i+1 != MtmNodeId);
478-
MtmAppendBuffer(txBuffer, ts->xids[i], i, ts);
479-
n += 1;
480-
}
481-
}
482-
Assert(n == ds->nNodes);
483-
}
484-
485470
static void MtmTransSender(Datum arg)
486471
{
487472
int nNodes = MtmNodes;
@@ -508,12 +493,7 @@ static void MtmTransSender(Datum arg)
508493
MtmLock(LW_SHARED);
509494

510495
for (ts = ds->votingTransactions; ts != NULL; ts = ts->nextVoting) {
511-
if (MtmIsCoordinator(ts)) {
512-
MtmBroadcastMessage(txBuffer, ts);
513-
} else {
514-
MtmAppendBuffer(txBuffer, ts->gtid.xid, ts->gtid.node-1, ts);
515-
}
516-
ts->cmd = MSG_INVALID;
496+
MtmAppendBuffer(txBuffer, ts->gtid.xid, ts->gtid.node-1, ts);
517497
}
518498
ds->votingTransactions = NULL;
519499
MtmUnlock();
@@ -634,109 +614,31 @@ static void MtmTransReceiver(Datum arg)
634614
MtmArbiterMessage* msg = &rxBuffer[i].data[j];
635615
MtmTransState* ts = (MtmTransState*)hash_search(xid2state, &msg->dxid, HASH_FIND, NULL);
636616
Assert(ts != NULL);
637-
Assert(ts->cmd == MSG_INVALID);
638617
Assert(msg->node > 0 && msg->node <= nNodes && msg->node != MtmNodeId);
639-
ts->xids[msg->node-1] = msg->sxid;
640-
641-
if (MtmIsCoordinator(ts)) {
642-
switch (msg->code) {
643-
case MSG_READY:
644-
Assert(ts->status == TRANSACTION_STATUS_ABORTED || ts->status == TRANSACTION_STATUS_IN_PROGRESS);
645-
Assert(ts->nVotes < ds->nNodes);
646-
ds->nodeTransDelay[msg->node-1] += MtmGetCurrentTime() - ts->csn;
647-
if (++ts->nVotes == ds->nNodes) {
648-
/* All nodes are finished their transactions */
649-
if (ts->status == TRANSACTION_STATUS_IN_PROGRESS) {
650-
ts->nVotes = 1; /* I voted myself */
651-
ts->cmd = MSG_PREPARE;
652-
} else {
653-
ts->status = TRANSACTION_STATUS_ABORTED;
654-
ts->cmd = MSG_ABORT;
655-
MtmAdjustSubtransactions(ts);
656-
MtmWakeUpBackend(ts);
657-
}
658-
MtmSendNotificationMessage(ts);
659-
}
660-
break;
618+
Assert (MtmIsCoordinator(ts));
619+
switch (msg->code) {
661620
case MSG_PREPARED:
662-
Assert(ts->status == TRANSACTION_STATUS_IN_PROGRESS);
663-
Assert(ts->nVotes < ds->nNodes);
664-
if (msg->csn > ts->csn) {
665-
ts->csn = msg->csn;
666-
MtmSyncClock(ts->csn);
667-
}
668-
if (++ts->nVotes == ds->nNodes) {
669-
/* ts->csn is maximum of CSNs at all nodes */
670-
ts->nVotes = 1; /* I voted myself */
671-
ts->cmd = MSG_COMMIT;
672-
ts->csn = MtmAssignCSN();
673-
ts->status = TRANSACTION_STATUS_UNKNOWN;
674-
MtmAdjustSubtransactions(ts);
675-
MtmSendNotificationMessage(ts);
676-
}
677-
break;
678-
case MSG_COMMITTED:
679-
Assert(ts->status == TRANSACTION_STATUS_UNKNOWN);
680-
Assert(ts->nVotes < ds->nNodes);
681-
if (++ts->nVotes == ds->nNodes) {
682-
/* All nodes have the same CSN */
683-
MtmWakeUpBackend(ts);
621+
if (ts->status == TRANSACTION_STATUS_IN_PROGRESS) {
622+
if (msg->csn > ts->csn) {
623+
ts->csn = msg->csn;
624+
MtmSyncClock(ts->csn);
625+
}
626+
if (++ts->nVotes == ds->nNodes) {
627+
MtmWakeUpBackend(ts);
628+
}
684629
}
685630
break;
686631
case MSG_ABORTED:
687-
Assert(ts->status == TRANSACTION_STATUS_ABORTED || ts->status == TRANSACTION_STATUS_IN_PROGRESS);
688-
Assert(ts->nVotes < ds->nNodes);
689-
ts->status = TRANSACTION_STATUS_ABORTED;
690-
if (++ts->nVotes == ds->nNodes) {
691-
ts->cmd = MSG_ABORT;
692-
MtmAdjustSubtransactions(ts);
693-
MtmSendNotificationMessage(ts);
694-
MtmWakeUpBackend(ts);
695-
}
696-
break;
697-
default:
698-
Assert(false);
699-
}
700-
} else { /* replica */
701-
switch (msg->code) {
702-
case MSG_PREPARE:
703-
Assert(ts->status == TRANSACTION_STATUS_IN_PROGRESS);
704-
if ((msg->disabledNodeMask & ~ds->disabledNodeMask) != 0) {
705-
/* Coordinator's disabled mask is wider than my: so reject such transaction to avoid
706-
commit on smaller subset of nodes */
632+
if (ts->status == TRANSACTION_STATUS_IN_PROGRESS) {
707633
ts->status = TRANSACTION_STATUS_ABORTED;
708-
ts->cmd = MSG_ABORT;
709-
MtmAdjustSubtransactions(ts);
710-
MtmWakeUpBackend(ts);
711-
} else {
712-
ts->status = TRANSACTION_STATUS_UNKNOWN;
713-
ts->csn = MtmAssignCSN();
714-
ts->cmd = MSG_PREPARED;
715-
}
716-
MtmSendNotificationMessage(ts);
717-
break;
718-
case MSG_COMMIT:
719-
Assert(ts->status == TRANSACTION_STATUS_UNKNOWN);
720-
Assert(ts->csn < msg->csn);
721-
ts->csn = msg->csn;
722-
MtmSyncClock(ts->csn);
723-
ts->cmd = MSG_COMMITTED;
724-
MtmAdjustSubtransactions(ts);
725-
MtmSendNotificationMessage(ts);
726-
MtmWakeUpBackend(ts);
727-
break;
728-
case MSG_ABORT:
729-
if (ts->status != TRANSACTION_STATUS_ABORTED) {
730-
Assert(ts->status == TRANSACTION_STATUS_UNKNOWN || ts->status == TRANSACTION_STATUS_IN_PROGRESS);
731-
ts->status = TRANSACTION_STATUS_ABORTED;
732634
MtmAdjustSubtransactions(ts);
733635
MtmWakeUpBackend(ts);
734636
}
735637
break;
736638
default:
737639
Assert(false);
738640
}
739-
}
641+
}
740642
}
741643
MtmUnlock();
742644

contrib/mmts/multimaster.c

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ typedef struct {
6464
bool isReplicated; /* transaction on replica */
6565
bool isDistributed; /* transaction performed INSERT/UPDATE/DELETE and has to be replicated to other nodes */
6666
bool containsDML; /* transaction contains DML statements */
67-
bool isPrepared; /* transaction is prepared as part of 2PC */
6867
csn_t snapshot; /* transaction snaphsot */
6968
} MtmCurrentTrans;
7069

@@ -100,8 +99,8 @@ static void MtmSetTransactionStatus(TransactionId xid, int nsubxids, Transaction
10099
static void MtmInitialize(void);
101100
static void MtmXactCallback(XactEvent event, void *arg);
102101
static void MtmBeginTransaction(MtmCurrentTrans* x);
103-
static void MtmPrecommitTransaction(MtmCurrentTrans* x);
104102
static bool MtmCommitTransaction(TransactionId xid, int nsubxids, TransactionId *subxids);
103+
static void MtmPrePrepareTransaction(MtmCurrentTrans* x);
105104
static void MtmPrepareTransaction(MtmCurrentTrans* x);
106105
static void MtmCommitPreparedTransaction(MtmCurrentTrans* x);
107106
static void MtmEndTransaction(MtmCurrentTrans* x, bool commit);
@@ -347,15 +346,6 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
347346
return PgXidInMVCCSnapshot(xid, snapshot);
348347
}
349348

350-
static uint32 MtmXidHashFunc(const void *key, Size keysize)
351-
{
352-
return (uint32)*(TransactionId*)key;
353-
}
354-
355-
static int MtmXidMatchFunc(const void *key1, const void *key2, Size keysize)
356-
{
357-
return *(TransactionId*)key1 - *(TransactionId*)key2;
358-
}
359349

360350
static void MtmTransactionListAppend(MtmTransState* ts)
361351
{
@@ -489,8 +479,8 @@ MtmXactCallback(XactEvent event, void *arg)
489479
case XACT_EVENT_START:
490480
MtmBeginTransaction(&dtmTx);
491481
break;
492-
case XACT_EVENT_PRE_COMMIT:
493-
MtmPrecommitTransaction(&dtmTx);
482+
case XACT_EVENT_PRE_PREPARE:
483+
MtmPrePrepareTransaction(&dtmTx);
494484
break;
495485
case XACT_EVENT_PREPARE:
496486
MtmPrepareTransaction(&dtmTx);
@@ -598,16 +588,14 @@ MtmCheckClusterLock()
598588
}
599589

600590
/*
601-
* This functions is called as pre-commit callback.
602-
* We need to pass snapshot to WAL-sender, so create record in transaction status hash table
603-
* before commit
591+
* Prepare transaction for two-phase commit
604592
*/
605-
static void MtmPrecommitTransaction(MtmCurrentTrans* x)
593+
MtmPrePrepareTransaction(MtmCurrentTrans* x)
606594
{
607595
MtmTransState* ts;
608596
int i;
609597

610-
if (!x->isDistributed || x->isPrepared) {
598+
if (!x->isDistributed) {
611599
return;
612600
}
613601

@@ -632,7 +620,6 @@ static void MtmPrecommitTransaction(MtmCurrentTrans* x)
632620
ts->snapshot = x->isReplicated || !x->containsDML ? INVALID_CSN : x->snapshot;
633621
ts->csn = MtmAssignCSN();
634622
ts->gtid = x->gtid;
635-
ts->cmd = MSG_INVALID;
636623
ts->procno = MyProc->pgprocno;
637624
ts->nVotes = 0;
638625
ts->voteCompleted = false;
@@ -644,24 +631,41 @@ static void MtmPrecommitTransaction(MtmCurrentTrans* x)
644631
ts->gtid.xid = x->xid;
645632
ts->gtid.node = MtmNodeId;
646633
}
647-
for (i = 0; i < MtmNodes; i++) {
648-
ts->xids[i] = InvalidTransactionId;
649-
}
650634
MtmTransactionListAppend(ts);
651635

652636
MtmUnlock();
653637

654638
MTM_TRACE("%d: MtmPrepareTransaction prepare commit of %d CSN=%ld\n", MyProcPid, x->xid, ts->csn);
655639
}
656640

657-
static void
658641
MtmPrepareTransaction(MtmCurrentTrans* x)
659-
{
660-
MtmPrecommitTransaction(x);
661-
MTM_TRACE("Prepare transaction %d", x->xid);
662-
x->isPrepared = true;
642+
{
643+
MtmLock(LW_EXCLUSIVE);
644+
if (ts->status = TRANSACTION_STATUS_IN_PROGRESS) {
645+
ts->status = TRANSACTION_STATUS_UNKNOWN;
646+
MtmAdjustSubtransactions(ts);
647+
}
648+
649+
if (!MtmIsCoordinator(ts)) {
650+
MtmSendNotificationMessage(ts); /* send notification to coordinator */
651+
MtmUnlock();
652+
} else {
653+
/* wait N commits or just one ABORT */
654+
ts->nVotes += 1;
655+
while (ts->nVotes != dtm->nNodes && ts->status == TRANSACTION_STATUS_PROGRESS) {
656+
MtmUnlock();
657+
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, -1);
658+
ResetLatch(&MyProc->procLatch);
659+
MtmLock(LW_SHARED);
660+
}
661+
MtmUnlock();
662+
if (ts->status == TRANSACTION_STATUS_ABORTED) {
663+
elog(ERROR, "Distributed transaction %d is rejected by DTM", x->xid);
664+
}
665+
}
663666
}
664667

668+
665669
static void
666670
MtmCommitPreparedTransaction(MtmCurrentTrans* x)
667671
{
@@ -1679,14 +1683,12 @@ HTAB* MtmCreateHash(void)
16791683
Assert(MtmNodes > 0);
16801684
memset(&info, 0, sizeof(info));
16811685
info.keysize = sizeof(TransactionId);
1682-
info.entrysize = sizeof(MtmTransState) + (MtmNodes-1)*sizeof(TransactionId);
1683-
info.hash = MtmXidHashFunc;
1684-
info.match = MtmXidMatchFunc;
1686+
info.entrysize = sizeof(MtmTransState);
16851687
htab = ShmemInitHash(
16861688
"xid2state",
16871689
MTM_HASH_SIZE, MTM_HASH_SIZE,
16881690
&info,
1689-
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE
1691+
HASH_ELEM
16901692
);
16911693
return htab;
16921694
}

contrib/mmts/multimaster.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#define MULTIMASTER_SLOT_PATTERN "mtm_slot_%d"
2020
#define MULTIMASTER_MIN_PROTO_VERSION 1
2121
#define MULTIMASTER_MAX_PROTO_VERSION 1
22+
#define MULTIMASTER_MAX_GID 32
2223

2324
#define USEC 1000000
2425

@@ -51,12 +52,7 @@ typedef enum
5152
{
5253
MSG_INVALID,
5354
MSG_HANDSHAKE,
54-
MSG_READY,
55-
MSG_PREPARE,
56-
MSG_COMMIT,
57-
MSG_ABORT,
5855
MSG_PREPARED,
59-
MSG_COMMITTED,
6056
MSG_ABORTED,
6157
MSG_STATUS
6258
} MtmMessageCode;
@@ -88,11 +84,10 @@ typedef struct MtmTransState
8884
finally should be nNodes-1 */
8985
int procno; /* pgprocno of transaction coordinator waiting for responses from replicas,
9086
used to notify coordinator by arbiter */
91-
MtmMessageCode cmd; /* Notification message code to be sent */
87+
bool voteCompleted; /* Responses necessary to make a decision are received by coordinator of transaction */
9288
int nSubxids; /* Number of subtransanctions */
9389
struct MtmTransState* nextVoting; /* Next element in L1-list of voting transactions. */
9490
struct MtmTransState* next; /* Next element in L1 list of all finished transaction present in xid2state hash */
95-
bool voteCompleted; /* Responses necessary to make a decision are received by coordinator of transaction */
9691
TransactionId xids[1]; /* transaction ID at replicas: varying size MtmNodes */
9792
} MtmTransState;
9893

src/backend/replication/logical/decode.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,8 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
289289
* transactions in the changestream allowing for a kind of
290290
* distributed 2PC.
291291
*/
292-
// ReorderBufferProcessXid(reorder, XLogRecGetXid(r), buf->origptr);
292+
elog(WARNING, "DECODE process XLOG_XACT_PREPARE");
293+
//ReorderBufferProcessXid(reorder, XLogRecGetXid(r), buf->origptr);
293294
break;
294295

295296
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy