Skip to content

Commit a088277

Browse files
committed
2 parents c2989af + db8fb8d commit a088277

17 files changed

+1827
-4
lines changed

contrib/mmts/multimaster.c

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,14 +1220,44 @@ void MtmOnNodeConnect(int nodeId)
12201220
*/
12211221
void* PaxosGet(char const* key, int* size, PaxosTimestamp* ts, bool nowait)
12221222
{
1223-
if (size != NULL) {
1223+
unsigned enclen, declen, len;
1224+
char *enc, *dec;
1225+
Assert(ts == NULL); // not implemented
1226+
1227+
enc = raftable_get(key);
1228+
if (enc == NULL)
1229+
{
12241230
*size = 0;
1231+
return NULL;
1232+
}
1233+
1234+
enclen = strlen(enc);
1235+
declen = hex_dec_len(enc, enclen);
1236+
dec = palloc(declen);
1237+
len = hex_decode(enc, enclen, dec);
1238+
pfree(enc);
1239+
Assert(len == declen);
1240+
1241+
if (size != NULL) {
1242+
*size = declen;
12251243
}
1226-
return NULL;
1244+
return dec;
12271245
}
12281246

12291247
void PaxosSet(char const* key, void const* value, int size, bool nowait)
1230-
{}
1248+
{
1249+
unsigned enclen, declen, len;
1250+
char *enc, *dec;
1251+
1252+
enclen = hex_enc_len(value, size);
1253+
enc = palloc(enclen) + 1;
1254+
len = hex_encode(value, size, enc);
1255+
Assert(len == enclen);
1256+
enc[len] = '\0';
1257+
1258+
raftable_set(key, enc, nowait ? 1 : INT_MAX);
1259+
pfree(enc);
1260+
}
12311261

12321262

12331263
/*

contrib/mmts/multimaster.control

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ comment = 'Multimaster'
22
default_version = '1.0'
33
module_pathname = '$libdir/multimaster'
44
schema = mtm
5-
relocatable = false
5+
relocatable = false
6+
requires = raftable

contrib/raftable/Makefile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
MODULE_big = raftable
2+
OBJS = raftable.o worker.o state.o blockmem.o
3+
EXTENSION = raftable
4+
DATA = raftable--1.0.sql
5+
6+
EXTRA_INSTALL = contrib/raftable
7+
8+
RAFT_PREFIX = $(HOME)/raft
9+
override LDFLAGS += -L$(RAFT_PREFIX)/lib -Wl,-whole-archive -lraft -Wl,-no-whole-archive
10+
override CFLAGS += -Wfatal-errors
11+
override CPPFLAGS += -I$(RAFT_PREFIX)/include
12+
13+
ifdef USE_PGXS
14+
PG_CONFIG = pg_config
15+
PGXS := $(shell $(PG_CONFIG) --pgxs)
16+
include $(PGXS)
17+
else
18+
subdir = contrib/raftable
19+
top_builddir = ../..
20+
include $(top_builddir)/src/Makefile.global
21+
include $(top_srcdir)/contrib/contrib-global.mk
22+
endif
23+
24+
check:
25+
$(prove_check)

contrib/raftable/README

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
Raftable
2+
========
3+
4+
This extension allows you to have a key-value table replicated between several
5+
Postgres instances over Raft protocol.
6+
7+
Depends on
8+
----------
9+
10+
Raft implementation in C: https://github.com/kvap/raft
11+
Please compile the raft library with -fpic flag.
12+
13+
Internals
14+
---------
15+
16+
Each 'postgres' instance starts a background worker 'raftable' and creates a
17+
shared memory segment for storing the 'state'. All 'raftable' workers are
18+
communicating Raft protocol over UDP.
19+
20+
When 'frontend' issues a read command, the 'backend' returns the data from the
21+
local replica of the 'state' which is in the shared memory of current instance.
22+
23+
When 'frontend' issues a write command, the 'backend' connects to the current
24+
'raftable' leader directly through TCP and sends an update. Raftable leader
25+
returns 'ok' to the backend when the update gets applied on current instance.
26+
27+
The backend can also issue commands to itself through C API.
28+
29+
┓ ┏━━━━━━━━━┓┏━━━━━━━━━┓┏━━━━━━━━━┓ ┏━━━━━━━━━┓┏━━━━━━━━━┓┏━━━━━━━━━┓ ┏
30+
┃ ┃ backend ┃┃ backend ┃┃ backend ┠─┐ ┃ backend ┃┃ backend ┃┃ backend ┃ ┃
31+
┛ ┗━━━━┯━━━━┛┗━━━━┯━━━━┛┗━━━━┯━━━━┛ │ ┗━━━━┯━━━━┛┗━━━━┯━━━━┛┗━━━━┯━━━━┛ ┗
32+
╗ ╔════╧══════════╧══════════╧════╗ T ╔════╧══════════╧══════════╧════╗ ╔
33+
║ ║ state in shared memory ║ C ║ state in shared memory ║ ║
34+
╝ ╚═══════════════╤═══════════════╝ P ╚═══════════════╤═══════════════╝ ╚
35+
┓ ┏━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┓ │ ┏━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┓ ┏
36+
┃ ┃ raftable worker ┃ └─┨ raftable worker ┃ ┃
37+
┛ ┗━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┛ ┗
38+
┆ ┆
39+
╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴┴╶╶╶╶╶╶╶╶╶╶ Raft over UDP ╴╴╴╴╴╴╴╴╴╴┴╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶
40+
41+
C API:
42+
/* Gets value by key. Returns the value or NULL if not found. */
43+
char *raftable_get(char *key);
44+
45+
/*
46+
* Adds/updates value by key. Returns when the value gets replicated on
47+
* current machine. Storing NULL will delete the item from the table.
48+
*/
49+
void raftable_set(char *key, char *value);
50+
51+
/*
52+
* Iterates over all items in the table, calling func(key, value, arg)
53+
* for each of them.
54+
*/
55+
void raftable_every(void (*func)(char *, char *, void *), void *arg);
56+
57+
SQL API:
58+
-- set
59+
raftable(key varchar(64), value text, tries int);
60+
61+
-- get
62+
raftable(key varchar(64)) returns text;
63+
64+
-- list
65+
raftable() returns table (key varchar(64), value text);

contrib/raftable/blockmem.c

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#include "blockmem.h"
2+
3+
#include <stddef.h>
4+
#include <assert.h>
5+
#include <stdbool.h>
6+
#include <string.h>
7+
8+
#define BLOCK_LEN (256)
9+
#define BLOCK_DATA_LEN ((BLOCK_LEN) - offsetof(block_t, data))
10+
#define META(ORIGIN) ((meta_t *)(ORIGIN))
11+
#define BLOCK(ORIGIN, ID) ((block_t *)((char *)(ORIGIN) + BLOCK_LEN * ID))
12+
#define TAIL(ORIGIN, ID) (BLOCK(ORIGIN, ID)->next)
13+
#define USED(ORIGIN, ID) (BLOCK(ORIGIN, ID)->used)
14+
#define LEN(ORIGIN, ID) (BLOCK(ORIGIN, ID)->len)
15+
#define DATA(ORIGIN, ID) (BLOCK(ORIGIN, ID)->data)
16+
#define FREE(ORIGIN) (META(ORIGIN)->freeblock)
17+
18+
typedef struct meta_t
19+
{
20+
int freeblock; /* the id of first free block, or -1 if none */
21+
char data[1];
22+
} meta_t;
23+
24+
typedef struct block_t
25+
{
26+
bool used;
27+
int next;
28+
int len;
29+
char data[1];
30+
} block_t;
31+
32+
int
33+
blockmem_format(void *origin, size_t size)
34+
{
35+
block_t *block;
36+
meta_t *meta;
37+
int id;
38+
int blocks = (size - 1) / BLOCK_LEN;
39+
if (blocks <= 0) return 0;
40+
41+
FREE(origin) = 1;
42+
43+
for (id = 1; id <= blocks; id++)
44+
{
45+
USED(origin, id) = 0;
46+
TAIL(origin, id) = id + 1;
47+
}
48+
TAIL(origin, blocks) = 0; /* the last block has no tail */
49+
50+
return 1;
51+
}
52+
53+
static size_t
54+
block_fill(void *origin, int id, void *src, size_t len)
55+
{
56+
if (len > BLOCK_DATA_LEN)
57+
len = BLOCK_DATA_LEN;
58+
LEN(origin, id) = len;
59+
memcpy(DATA(origin, id), src, len);
60+
return len;
61+
}
62+
63+
void
64+
block_clear(void *origin, int id)
65+
{
66+
TAIL(origin, id) = 0;
67+
USED(origin, id) = true;
68+
LEN(origin, id) = 0;
69+
}
70+
71+
static int
72+
block_alloc(void *origin)
73+
{
74+
int newborn = FREE(origin);
75+
if (!newborn) return 0;
76+
77+
FREE(origin) = TAIL(origin, newborn);
78+
block_clear(origin, newborn);
79+
return newborn;
80+
}
81+
82+
static void
83+
block_free(void *origin, int id)
84+
{
85+
/* behead the victim, and repeat for the tail */
86+
while (id > 0)
87+
{
88+
int tail;
89+
assert(USED(origin, id));
90+
91+
USED(origin, id) = false;
92+
tail = TAIL(origin, id);
93+
TAIL(origin, id) = FREE(origin);
94+
FREE(origin) = id;
95+
id = tail;
96+
}
97+
}
98+
99+
int
100+
blockmem_put(void *origin, void *src, size_t len)
101+
{
102+
int head = 0;
103+
int id = 0;
104+
char *cursor = src;
105+
size_t bytesleft = len;
106+
107+
while (bytesleft > 0)
108+
{
109+
int copied;
110+
int newid = block_alloc(origin);
111+
if (!newid) goto failure;
112+
113+
copied = block_fill(origin, newid, cursor, bytesleft);
114+
115+
cursor += copied;
116+
bytesleft -= copied;
117+
if (id)
118+
TAIL(origin, id) = newid;
119+
else
120+
head = newid;
121+
id = newid;
122+
}
123+
124+
return head;
125+
failure:
126+
block_free(origin, head);
127+
return -1;
128+
}
129+
130+
size_t
131+
blockmem_len(void *origin, int id)
132+
{
133+
size_t len = 0;
134+
while (id > 0)
135+
{
136+
assert(USED(origin, id));
137+
len += LEN(origin, id);
138+
id = TAIL(origin, id);
139+
}
140+
assert(len > 0);
141+
return len;
142+
}
143+
144+
size_t
145+
blockmem_get(void *origin, int id, void *dst, size_t len)
146+
{
147+
size_t copied = 0;
148+
char *cursor = dst;
149+
while ((id > 0) && (copied < len))
150+
{
151+
size_t tocopy = LEN(origin, id);
152+
if (tocopy > len - copied)
153+
tocopy = len - copied;
154+
assert(tocopy > 0);
155+
assert(USED(origin, id));
156+
memcpy(cursor, DATA(origin, id), tocopy);
157+
copied += tocopy;
158+
cursor += tocopy;
159+
id = TAIL(origin, id);
160+
}
161+
return len;
162+
}
163+
164+
void
165+
blockmem_forget(void *origin, int id)
166+
{
167+
block_free(origin, id);
168+
}

contrib/raftable/blockmem.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#ifndef BLOCKMEM_H
2+
#define BLOCKMEM_H
3+
4+
#include <stdlib.h>
5+
6+
/*
7+
* Formats the memory region of 'size' bytes starting at 'origin'. Use this
8+
* before any other functions related to blockmem. Returns 1 on success,
9+
* 0 otherwise.
10+
*/
11+
int blockmem_format(void *origin, size_t size);
12+
13+
/*
14+
* Allocates blocks in the formatted region starting at 'origin' and stores
15+
* 'len' bytes from 'src' inside those blocks. Returns the id for later _get(),
16+
* _len() and _forget() operations. Returns 0 if not enough free blocks.
17+
*/
18+
int blockmem_put(void *origin, void *src, size_t len);
19+
20+
/*
21+
* Returns the length of data identified by 'id' that was previously stored
22+
* with a call to _put().
23+
*/
24+
size_t blockmem_len(void *origin, int id);
25+
26+
/*
27+
* Retrieves into 'dst' no more than 'len' bytes of data identified by 'id'
28+
* that were previously stored with a call to _put(). Returns the length of data
29+
* actually copied.
30+
*/
31+
size_t blockmem_get(void *origin, int id, void *dst, size_t len);
32+
33+
/*
34+
* Frees the blocks occupied by data identified by 'id'.
35+
*/
36+
void blockmem_forget(void *origin, int id);
37+
38+
#endif

contrib/raftable/raftable--1.0.sql

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
2+
\echo Use "CREATE EXTENSION raftable" to load this file. \quit
3+
4+
-- get
5+
CREATE FUNCTION raftable(key varchar(64))
6+
RETURNS text
7+
AS 'MODULE_PATHNAME','raftable_sql_get'
8+
LANGUAGE C;
9+
10+
-- set
11+
CREATE FUNCTION raftable(key varchar(64), value text, tries int)
12+
RETURNS void
13+
AS 'MODULE_PATHNAME','raftable_sql_set'
14+
LANGUAGE C;
15+
16+
-- list
17+
CREATE FUNCTION raftable()
18+
RETURNS table (key varchar(64), value text)
19+
AS 'MODULE_PATHNAME','raftable_sql_list'
20+
LANGUAGE C;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy