Skip to content

Commit f8c183a

Browse files
committed
Speed up CREATE DATABASE by deferring the fsyncs until after copying
all the data and using posix_fadvise to nudge the OS into flushing it earlier. This also hopefully makes CREATE DATABASE avoid spamming the cache. Tests show a big speedup on Linux at least on some filesystems. Idea and patch from Andres Freund.
1 parent e26c539 commit f8c183a

File tree

3 files changed

+76
-29
lines changed

3 files changed

+76
-29
lines changed

src/backend/storage/file/fd.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.153 2010/01/12 02:42:52 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.154 2010/02/15 00:50:57 stark Exp $
1111
*
1212
* NOTES:
1313
*
@@ -319,6 +319,22 @@ pg_fdatasync(int fd)
319319
return 0;
320320
}
321321

322+
/*
323+
* pg_flush_data --- advise OS that the data described won't be needed soon
324+
*
325+
* Not all platforms have posix_fadvise; treat as noop if not available.
326+
*/
327+
int
328+
pg_flush_data(int fd, off_t offset, off_t amount)
329+
{
330+
#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
331+
return posix_fadvise(fd, offset, amount, POSIX_FADV_DONTNEED);
332+
#else
333+
return 0;
334+
#endif
335+
}
336+
337+
322338
/*
323339
* InitFileAccess --- initialize this module during backend startup
324340
*

src/include/storage/fd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.66 2010/01/02 16:58:08 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.67 2010/02/15 00:50:57 stark Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -98,6 +98,7 @@ extern int pg_fsync(int fd);
9898
extern int pg_fsync_no_writethrough(int fd);
9999
extern int pg_fsync_writethrough(int fd);
100100
extern int pg_fdatasync(int fd);
101+
extern int pg_flush_data(int fd, off_t offset, off_t amount);
101102

102103
/* Filename components for OpenTemporaryFile */
103104
#define PG_TEMP_FILES_DIR "pgsql_tmp"

src/port/copydir.c

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* as a service.
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.25 2010/02/14 17:50:52 stark Exp $
14+
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.26 2010/02/15 00:50:57 stark Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -37,6 +37,7 @@
3737

3838

3939
static void copy_file(char *fromfile, char *tofile);
40+
static void fsync_fname(char *fname);
4041

4142

4243
/*
@@ -91,27 +92,32 @@ copydir(char *fromdir, char *todir, bool recurse)
9192
copy_file(fromfile, tofile);
9293
}
9394

94-
FreeDir(xldir);
95-
9695
/*
97-
* fsync the directory to make sure not just the data but also the
98-
* new directory file entries have reached the disk. While needed
99-
* by most filesystems, the window got bigger with newer ones like
100-
* ext4.
96+
* Be paranoid here and fsync all files to ensure we catch problems.
10197
*/
102-
dirfd = BasicOpenFile(todir,
103-
O_RDONLY | PG_BINARY,
104-
S_IRUSR | S_IWUSR);
105-
if(dirfd == -1)
106-
ereport(ERROR,
107-
(errcode_for_file_access(),
108-
errmsg("could not open directory for fsync \"%s\": %m", todir)));
109-
110-
if(pg_fsync(dirfd) == -1)
98+
if (xldir == NULL)
11199
ereport(ERROR,
112100
(errcode_for_file_access(),
113-
errmsg("could not fsync directory \"%s\": %m", todir)));
114-
close(dirfd);
101+
errmsg("could not open directory \"%s\": %m", fromdir)));
102+
103+
while ((xlde = ReadDir(xldir, fromdir)) != NULL)
104+
{
105+
if (strcmp(xlde->d_name, ".") == 0 ||
106+
strcmp(xlde->d_name, "..") == 0)
107+
continue;
108+
109+
snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name);
110+
fsync_fname(tofile);
111+
}
112+
FreeDir(xldir);
113+
114+
/* It's important to fsync the destination directory itself as
115+
* individual file fsyncs don't guarantee that the directory entry
116+
* for the file is synced. Recent versions of ext4 have made the
117+
* window much wider but it's been true for ext3 and other
118+
* filesyetems in the past
119+
*/
120+
fsync_fname(todir);
115121
}
116122

117123
/*
@@ -124,6 +130,7 @@ copy_file(char *fromfile, char *tofile)
124130
int srcfd;
125131
int dstfd;
126132
int nbytes;
133+
off_t offset;
127134

128135
/* Use palloc to ensure we get a maxaligned buffer */
129136
#define COPY_BUF_SIZE (8 * BLCKSZ)
@@ -149,7 +156,7 @@ copy_file(char *fromfile, char *tofile)
149156
/*
150157
* Do the data copying.
151158
*/
152-
for (;;)
159+
for (offset=0; ; offset+=nbytes)
153160
{
154161
nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
155162
if (nbytes < 0)
@@ -168,15 +175,14 @@ copy_file(char *fromfile, char *tofile)
168175
(errcode_for_file_access(),
169176
errmsg("could not write to file \"%s\": %m", tofile)));
170177
}
171-
}
172178

173-
/*
174-
* Be paranoid here to ensure we catch problems.
175-
*/
176-
if (pg_fsync(dstfd) != 0)
177-
ereport(ERROR,
178-
(errcode_for_file_access(),
179-
errmsg("could not fsync file \"%s\": %m", tofile)));
179+
/*
180+
* We fsync the files later but first flush them to avoid spamming
181+
* the cache and hopefully get the kernel to start writing them
182+
* out before the fsync comes.
183+
*/
184+
pg_flush_data(dstfd, offset, nbytes);
185+
}
180186

181187
if (close(dstfd))
182188
ereport(ERROR,
@@ -187,3 +193,27 @@ copy_file(char *fromfile, char *tofile)
187193

188194
pfree(buffer);
189195
}
196+
197+
198+
199+
/*
200+
* fsync a file
201+
*/
202+
static void
203+
fsync_fname(char *fname)
204+
{
205+
int fd = BasicOpenFile(fname,
206+
O_RDONLY | PG_BINARY,
207+
S_IRUSR | S_IWUSR);
208+
209+
if (fd < 0)
210+
ereport(ERROR,
211+
(errcode_for_file_access(),
212+
errmsg("could not open file \"%s\": %m", fname)));
213+
214+
if (pg_fsync(fd) != 0)
215+
ereport(ERROR,
216+
(errcode_for_file_access(),
217+
errmsg("could not fsync file \"%s\": %m", fname)));
218+
close(fd);
219+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy