Skip to content

Commit ad458cf

Browse files
committed
Don't use O_DIRECT when writing WAL files if archiving or streaming is
enabled. Bypassing the kernel cache is counter-productive in that case, because the archiver/walsender process will read from the WAL file soon after it's written, and if it's not cached the read will cause a physical read, eating I/O bandwidth available on the WAL drive. Also, walreceiver process does unaligned writes, so disable O_DIRECT in walreceiver process for that reason too.
1 parent 94f610b commit ad458cf

File tree

4 files changed

+47
-26
lines changed

4 files changed

+47
-26
lines changed

src/backend/access/transam/xlog.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.376 2010/02/19 01:04:03 itagaki Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.377 2010/02/19 10:51:03 heikki Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -2686,13 +2686,10 @@ XLogFileClose(void)
26862686
* WAL segment files will not be re-read in normal operation, so we advise
26872687
* the OS to release any cached pages. But do not do so if WAL archiving
26882688
* or streaming is active, because archiver and walsender process could use
2689-
* the cache to read the WAL segment. Also, don't bother with it if we
2690-
* are using O_DIRECT, since the kernel is presumably not caching in that
2691-
* case.
2689+
* the cache to read the WAL segment.
26922690
*/
26932691
#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
2694-
if (!XLogIsNeeded() &&
2695-
(get_sync_bit(sync_method) & PG_O_DIRECT) == 0)
2692+
if (!XLogIsNeeded())
26962693
(void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
26972694
#endif
26982695

@@ -7652,10 +7649,29 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
76527649
static int
76537650
get_sync_bit(int method)
76547651
{
7652+
int o_direct_flag = 0;
7653+
76557654
/* If fsync is disabled, never open in sync mode */
76567655
if (!enableFsync)
76577656
return 0;
76587657

7658+
/*
7659+
* Optimize writes by bypassing kernel cache with O_DIRECT when using
7660+
* O_SYNC, O_DSYNC or O_FSYNC. But only if archiving and streaming are
7661+
* disabled, otherwise the archive command or walsender process will
7662+
* read the WAL soon after writing it, which is guaranteed to cause a
7663+
* physical read if we bypassed the kernel cache. We also skip the
7664+
* posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the
7665+
* same reason.
7666+
*
7667+
* Never use O_DIRECT in walreceiver process for similar reasons; the WAL
7668+
* written by walreceiver is normally read by the startup process soon
7669+
* after its written. Also, walreceiver performs unaligned writes, which
7670+
* don't work with O_DIRECT, so it is required for correctness too.
7671+
*/
7672+
if (!XLogIsNeeded() && !am_walreceiver)
7673+
o_direct_flag = PG_O_DIRECT;
7674+
76597675
switch (method)
76607676
{
76617677
/*
@@ -7670,11 +7686,11 @@ get_sync_bit(int method)
76707686
return 0;
76717687
#ifdef OPEN_SYNC_FLAG
76727688
case SYNC_METHOD_OPEN:
7673-
return OPEN_SYNC_FLAG;
7689+
return OPEN_SYNC_FLAG | o_direct_flag;
76747690
#endif
76757691
#ifdef OPEN_DATASYNC_FLAG
76767692
case SYNC_METHOD_OPEN_DSYNC:
7677-
return OPEN_DATASYNC_FLAG;
7693+
return OPEN_DATASYNC_FLAG | o_direct_flag;
76787694
#endif
76797695
default:
76807696
/* can't happen (unless we are out of sync with option array) */

src/backend/replication/walreceiver.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
*
3030
*
3131
* IDENTIFICATION
32-
* $PostgreSQL: pgsql/src/backend/replication/walreceiver.c,v 1.4 2010/02/17 04:19:39 tgl Exp $
32+
* $PostgreSQL: pgsql/src/backend/replication/walreceiver.c,v 1.5 2010/02/19 10:51:04 heikki Exp $
3333
*
3434
*-------------------------------------------------------------------------
3535
*/
@@ -50,6 +50,9 @@
5050
#include "utils/ps_status.h"
5151
#include "utils/resowner.h"
5252

53+
/* Global variable to indicate if this process is a walreceiver process */
54+
bool am_walreceiver;
55+
5356
/* libpqreceiver hooks to these when loaded */
5457
walrcv_connect_type walrcv_connect = NULL;
5558
walrcv_receive_type walrcv_receive = NULL;
@@ -158,6 +161,8 @@ WalReceiverMain(void)
158161
/* use volatile pointer to prevent code rearrangement */
159162
volatile WalRcvData *walrcv = WalRcv;
160163

164+
am_walreceiver = true;
165+
161166
/*
162167
* WalRcv should be set up already (if we are a backend, we inherit
163168
* this by fork() or EXEC_BACKEND mechanism from the postmaster).
@@ -424,16 +429,18 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
424429
bool use_existent;
425430

426431
/*
427-
* XLOG segment files will be re-read in recovery operation soon,
428-
* so we don't need to advise the OS to release any cache page.
432+
* fsync() and close current file before we switch to next one.
433+
* We would otherwise have to reopen this file to fsync it later
429434
*/
430435
if (recvFile >= 0)
431436
{
437+
XLogWalRcvFlush();
438+
432439
/*
433-
* fsync() before we switch to next file. We would otherwise
434-
* have to reopen this file to fsync it later
440+
* XLOG segment files will be re-read by recovery in startup
441+
* process soon, so we don't advise the OS to release cache
442+
* pages associated with the file like XLogFileClose() does.
435443
*/
436-
XLogWalRcvFlush();
437444
if (close(recvFile) != 0)
438445
ereport(PANIC,
439446
(errcode_for_file_access(),
@@ -445,8 +452,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
445452
/* Create/use new log file */
446453
XLByteToSeg(recptr, recvId, recvSeg);
447454
use_existent = true;
448-
recvFile = XLogFileInit(recvId, recvSeg,
449-
&use_existent, true);
455+
recvFile = XLogFileInit(recvId, recvSeg, &use_existent, true);
450456
recvOff = 0;
451457
}
452458

src/include/access/xlogdefs.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.25 2010/01/15 09:19:06 heikki Exp $
10+
* $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.26 2010/02/19 10:51:04 heikki Exp $
1111
*/
1212
#ifndef XLOG_DEFS_H
1313
#define XLOG_DEFS_H
@@ -106,23 +106,20 @@ typedef uint32 TimeLineID;
106106
* configure determined whether fdatasync() is.
107107
*/
108108
#if defined(O_SYNC)
109-
#define BARE_OPEN_SYNC_FLAG O_SYNC
109+
#define OPEN_SYNC_FLAG O_SYNC
110110
#elif defined(O_FSYNC)
111-
#define BARE_OPEN_SYNC_FLAG O_FSYNC
112-
#endif
113-
#ifdef BARE_OPEN_SYNC_FLAG
114-
#define OPEN_SYNC_FLAG (BARE_OPEN_SYNC_FLAG | PG_O_DIRECT)
111+
#define OPEN_SYNC_FLAG O_FSYNC
115112
#endif
116113

117114
#if defined(O_DSYNC)
118115
#if defined(OPEN_SYNC_FLAG)
119116
/* O_DSYNC is distinct? */
120-
#if O_DSYNC != BARE_OPEN_SYNC_FLAG
121-
#define OPEN_DATASYNC_FLAG (O_DSYNC | PG_O_DIRECT)
117+
#if O_DSYNC != OPEN_SYNC_FLAG
118+
#define OPEN_DATASYNC_FLAG O_DSYNC
122119
#endif
123120
#else /* !defined(OPEN_SYNC_FLAG) */
124121
/* Win32 only has O_DSYNC */
125-
#define OPEN_DATASYNC_FLAG (O_DSYNC | PG_O_DIRECT)
122+
#define OPEN_DATASYNC_FLAG O_DSYNC
126123
#endif
127124
#endif
128125

src/include/replication/walreceiver.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* Portions Copyright (c) 2010-2010, PostgreSQL Global Development Group
77
*
8-
* $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.6 2010/02/03 09:47:19 heikki Exp $
8+
* $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.7 2010/02/19 10:51:04 heikki Exp $
99
*
1010
*-------------------------------------------------------------------------
1111
*/
@@ -15,6 +15,8 @@
1515
#include "access/xlogdefs.h"
1616
#include "storage/spin.h"
1717

18+
extern bool am_walreceiver;
19+
1820
/*
1921
* MAXCONNINFO: maximum size of a connection string.
2022
*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy