Skip to content

Commit ec159fa

Browse files
committed
2 parents d5e8622 + 98eb376 commit ec159fa

File tree

1 file changed

+181
-43
lines changed

1 file changed

+181
-43
lines changed

contrib/raftable/raftable.c

Lines changed: 181 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "worker.h"
2222
#include "state.h"
2323

24+
#include <poll.h>
2425
#include <sys/socket.h>
2526
#include <netinet/in.h>
2627
#include <netinet/tcp.h>
@@ -80,17 +81,111 @@ static void disconnect_leader(void)
8081
leadersock = -1;
8182
}
8283

83-
static bool connect_leader(void)
84+
static bool poll_until_writable(int sock, int timeout_ms)
85+
{
86+
struct pollfd pfd = {sock, POLLOUT, 0};
87+
int r = poll(&pfd, 1, timeout_ms);
88+
if (r != 1) return false;
89+
return pfd.revents & POLLOUT;
90+
}
91+
92+
static bool poll_until_readable(int sock, int timeout_ms)
93+
{
94+
struct pollfd pfd = {sock, POLLIN, 0};
95+
int r = poll(&pfd, 1, timeout_ms);
96+
if (r != 1) return false;
97+
return pfd.revents & POLLIN;
98+
}
99+
100+
static long msec(TimestampTz timer)
101+
{
102+
long sec;
103+
int usec;
104+
TimestampDifference(0, timer, &sec, &usec);
105+
return sec * 1000 + usec / 1000;
106+
}
107+
108+
static bool timed_write(int sock, void *data, size_t len, int timeout_ms)
109+
{
110+
TimestampTz start, now;
111+
int sent = 0;
112+
113+
now = start = GetCurrentTimestamp();
114+
115+
while (sent < len)
116+
{
117+
int newbytes;
118+
now = GetCurrentTimestamp();
119+
if ((timeout_ms != -1) && (msec(now - start) > timeout_ms)) {
120+
elog(WARNING, "write timed out");
121+
return false;
122+
}
123+
124+
newbytes = write(sock, (char *)data + sent, len - sent);
125+
if (newbytes == -1)
126+
{
127+
if (errno == EAGAIN) {
128+
if (poll_until_writable(sock, timeout_ms - msec(now - start))) {
129+
continue;
130+
}
131+
}
132+
elog(WARNING, "failed to write: %s", strerror(errno));
133+
return false;
134+
}
135+
sent += newbytes;
136+
}
137+
138+
return true;
139+
}
140+
141+
static bool timed_read(int sock, void *data, size_t len, int timeout_ms)
142+
{
143+
int recved = 0;
144+
TimestampTz start, now;
145+
now = start = GetCurrentTimestamp();
146+
147+
while (recved < len)
148+
{
149+
int newbytes;
150+
now = GetCurrentTimestamp();
151+
if ((timeout_ms != -1) && (msec(now - start) > timeout_ms)) {
152+
elog(WARNING, "read timed out");
153+
return false;
154+
}
155+
156+
newbytes = read(sock, (char *)data + recved, len - recved);
157+
if (newbytes == -1)
158+
{
159+
if (errno == EAGAIN) {
160+
if (poll_until_readable(sock, timeout_ms - msec(now - start))) {
161+
continue;
162+
}
163+
}
164+
elog(WARNING, "failed to read: %s", strerror(errno));
165+
return false;
166+
}
167+
recved += newbytes;
168+
}
169+
170+
return true;
171+
}
172+
173+
static bool connect_leader(int timeout_ms)
84174
{
85175
struct addrinfo *addrs = NULL;
86176
struct addrinfo hint;
87177
char portstr[6];
88178
struct addrinfo *a;
89179
int rc;
90180

181+
TimestampTz now;
182+
int elapsed_ms;
183+
184+
HostPort *leaderhp;
185+
91186
if (*shared.leader == NOBODY) select_next_peer();
92187

93-
HostPort *leaderhp = wcfg.peers + *shared.leader;
188+
leaderhp = wcfg.peers + *shared.leader;
94189

95190
memset(&hint, 0, sizeof(hint));
96191
hint.ai_socktype = SOCK_STREAM;
@@ -108,11 +203,13 @@ static bool connect_leader(void)
108203
}
109204

110205
fprintf(stderr, "trying [%d] %s:%d\n", *shared.leader, leaderhp->host, leaderhp->port);
206+
elapsed_ms = 0;
207+
now = GetCurrentTimestamp();
111208
for (a = addrs; a != NULL; a = a->ai_next)
112209
{
113210
int one = 1;
114211

115-
int sd = socket(a->ai_family, a->ai_socktype, a->ai_protocol);
212+
int sd = socket(a->ai_family, SOCK_STREAM | SOCK_NONBLOCK, 0);
116213
if (sd == -1)
117214
{
118215
perror("failed to create a socket");
@@ -122,9 +219,34 @@ static bool connect_leader(void)
122219

123220
if (connect(sd, a->ai_addr, a->ai_addrlen) == -1)
124221
{
125-
perror("failed to connect to an address");
126-
close(sd);
127-
continue;
222+
if (errno == EINPROGRESS)
223+
{
224+
while ((elapsed_ms <= timeout_ms) || (timeout_ms == -1))
225+
{
226+
TimestampTz past = now;
227+
228+
if (poll_until_writable(sd, timeout_ms - elapsed_ms))
229+
{
230+
int err;
231+
socklen_t optlen = sizeof(err);
232+
getsockopt(sd, SOL_SOCKET, SO_ERROR, &err, &optlen);
233+
if (err == 0)
234+
{
235+
// success
236+
break;
237+
}
238+
}
239+
240+
now = GetCurrentTimestamp();
241+
elapsed_ms += msec(now - past);
242+
}
243+
}
244+
else
245+
{
246+
perror("failed to connect to an address");
247+
close(sd);
248+
continue;
249+
}
128250
}
129251

130252
/* success */
@@ -138,15 +260,14 @@ static bool connect_leader(void)
138260
return false;
139261
}
140262

141-
static int get_connection(void)
263+
static int get_connection(int timeout_ms)
142264
{
143265
if (leadersock < 0)
144266
{
145-
if (connect_leader()) return leadersock;
146-
147-
int timeout_ms = 100;
148-
struct timespec timeout = {0, timeout_ms * 1000000};
149-
nanosleep(&timeout, NULL);
267+
if (connect_leader(timeout_ms)) return leadersock;
268+
// int timeout_ms = 100;
269+
// struct timespec timeout = {0, timeout_ms * 1000000};
270+
// nanosleep(&timeout, NULL);
150271
}
151272
return leadersock;
152273
}
@@ -162,11 +283,12 @@ raftable_sql_get(PG_FUNCTION_ARGS)
162283
{
163284
RaftableKey key;
164285
size_t len;
286+
char *s;
165287
text_to_cstring_buffer(PG_GETARG_TEXT_P(0), key.data, sizeof(key.data));
166288

167289
Assert(shared.state);
168290

169-
char *s = state_get(shared.state, key.data, &len);
291+
s = state_get(shared.state, key.data, &len);
170292
if (s)
171293
{
172294
text *t = cstring_to_text_with_len(s, len);
@@ -177,54 +299,65 @@ raftable_sql_get(PG_FUNCTION_ARGS)
177299
PG_RETURN_NULL();
178300
}
179301

180-
static long msec(TimestampTz timer)
302+
static bool try_sending_update(RaftableUpdate *ru, size_t size, int timeout_ms)
181303
{
182-
long sec;
183-
int usec;
184-
TimestampDifference(0, timer, &sec, &usec);
185-
return sec * 1000 + usec / 1000;
186-
}
304+
int s, status;
305+
TimestampTz start, now;
187306

188-
static bool try_sending_update(RaftableUpdate *ru, size_t size)
189-
{
190-
int s = get_connection();
307+
now = start = GetCurrentTimestamp();
191308

309+
s = get_connection(timeout_ms - (now - start));
192310
if (s < 0) return false;
193311

194-
int sent = 0, recved = 0;
195-
int status;
312+
now = GetCurrentTimestamp();
313+
if ((timeout_ms != -1) && (msec(now - start) > timeout_ms))
314+
{
315+
elog(WARNING, "update: connect() timed out");
316+
return false;
317+
}
196318

197-
if (write(s, &size, sizeof(size)) != sizeof(size))
319+
if (!timed_write(s, &size, sizeof(size), timeout_ms - msec(now - start)))
198320
{
199-
disconnect_leader();
200321
elog(WARNING, "failed to send the update size to the leader");
201322
return false;
202323
}
203324

204-
while (sent < size)
325+
now = GetCurrentTimestamp();
326+
if ((timeout_ms != -1) && (msec(now - start) > timeout_ms))
205327
{
206-
int newbytes = write(s, (char *)ru + sent, size - sent);
207-
if (newbytes == -1)
208-
{
209-
disconnect_leader();
210-
elog(WARNING, "failed to send the update to the leader");
211-
return false;
212-
}
213-
sent += newbytes;
328+
elog(WARNING, "update: send(size) timed out");
329+
return false;
214330
}
215331

216-
recved = read(s, &status, sizeof(status));
217-
if (recved != sizeof(status))
332+
if (!timed_write(s, ru, size, timeout_ms - msec(now - start)))
333+
{
334+
elog(WARNING, "failed to send the update to the leader");
335+
return false;
336+
}
337+
338+
now = GetCurrentTimestamp();
339+
if ((timeout_ms != -1) && (msec(now - start) > timeout_ms))
340+
{
341+
elog(WARNING, "update: send(body) timed out");
342+
return false;
343+
}
344+
345+
if (!timed_read(s, &status, sizeof(status), timeout_ms - msec(now - start)))
218346
{
219-
disconnect_leader();
220347
elog(WARNING, "failed to recv the update status from the leader");
221348
return false;
222349
}
223350

351+
now = GetCurrentTimestamp();
352+
if ((timeout_ms != -1) && (msec(now - start) > timeout_ms))
353+
{
354+
elog(WARNING, "update: recv(status) timed out");
355+
return false;
356+
}
357+
224358
if (status != 1)
225359
{
226-
disconnect_leader();
227-
elog(WARNING, "leader returned %d", status);
360+
elog(WARNING, "update: leader returned status = %d", status);
228361
return false;
229362
}
230363

@@ -233,6 +366,7 @@ static bool try_sending_update(RaftableUpdate *ru, size_t size)
233366

234367
bool raftable_set(const char *key, const char *value, size_t vallen, int timeout_ms)
235368
{
369+
RaftableField *f;
236370
RaftableUpdate *ru;
237371
size_t size = sizeof(RaftableUpdate);
238372
size_t keylen = 0;
@@ -251,7 +385,7 @@ bool raftable_set(const char *key, const char *value, size_t vallen, int timeout
251385
ru->expector = wcfg.id;
252386
ru->fieldnum = 1;
253387

254-
RaftableField *f = (RaftableField *)ru->data;
388+
f = (RaftableField *)ru->data;
255389
f->keylen = keylen;
256390
f->vallen = vallen;
257391
memcpy(f->data, key, keylen);
@@ -262,17 +396,21 @@ bool raftable_set(const char *key, const char *value, size_t vallen, int timeout
262396
while ((elapsed_ms <= timeout_ms) || (timeout_ms == -1))
263397
{
264398
TimestampTz past = now;
265-
if (try_sending_update(ru, size))
399+
if (try_sending_update(ru, size, timeout_ms - elapsed_ms))
266400
{
267401
pfree(ru);
268402
return true;
269403
}
404+
else
405+
{
406+
disconnect_leader();
407+
}
270408
now = GetCurrentTimestamp();
271409
elapsed_ms += msec(now - past);
272410
}
273411

274412
pfree(ru);
275-
elog(WARNING, "failed to set raftable value after %d ms", timeout_ms);
413+
elog(WARNING, "failed to set raftable value after %d ms", elapsed_ms);
276414
return false;
277415
}
278416

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy