Skip to content

Commit 899bd78

Browse files
committed
Avoid SIGBUS on Linux when a DSM memory request overruns tmpfs.
On Linux, shared memory segments created with shm_open() are backed by swap files created in tmpfs. If the swap file needs to be extended, but there's no tmpfs space left, you get a very unfriendly SIGBUS trap. To avoid this, force allocation of the full request size when we create the segment. This adds a few cycles, but none that we wouldn't expend later anyway, assuming the request isn't hugely bigger than the actual need. Make this code #ifdef __linux__, because (a) there's not currently a reason to think the same problem exists on other platforms, and (b) applying posix_fallocate() to an FD created by shm_open() isn't very portable anyway. Back-patch to 9.4 where the DSM code came in. Thomas Munro, per a bug report from Amul Sul Discussion: https://postgr.es/m/1002664500.12301802.1471008223422.JavaMail.yahoo@mail.yahoo.com
1 parent 716ea62 commit 899bd78

File tree

5 files changed

+60
-4
lines changed

5 files changed

+60
-4
lines changed

configure

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12970,7 +12970,7 @@ fi
1297012970
LIBS_including_readline="$LIBS"
1297112971
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
1297212972

12973-
for ac_func in cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l
12973+
for ac_func in cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l
1297412974
do :
1297512975
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
1297612976
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"

configure.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1399,7 +1399,7 @@ PGAC_FUNC_WCSTOMBS_L
13991399
LIBS_including_readline="$LIBS"
14001400
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
14011401

1402-
AC_CHECK_FUNCS([cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l])
1402+
AC_CHECK_FUNCS([cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l])
14031403

14041404
AC_REPLACE_FUNCS(fseeko)
14051405
case $host_os in

src/backend/storage/ipc/dsm_impl.c

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
7474
void **impl_private, void **mapped_address,
7575
Size *mapped_size, int elevel);
76+
static int dsm_impl_posix_resize(int fd, off_t size);
7677
#endif
7778
#ifdef USE_DSM_SYSV
7879
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
@@ -319,7 +320,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
319320
}
320321
request_size = st.st_size;
321322
}
322-
else if (*mapped_size != request_size && ftruncate(fd, request_size))
323+
else if (*mapped_size != request_size &&
324+
dsm_impl_posix_resize(fd, request_size) != 0)
323325
{
324326
int save_errno;
325327

@@ -392,7 +394,55 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
392394

393395
return true;
394396
}
395-
#endif
397+
398+
/*
399+
* Set the size of a virtual memory region associated with a file descriptor.
400+
* If necessary, also ensure that virtual memory is actually allocated by the
401+
* operating system, to avoid nasty surprises later.
402+
*
403+
* Returns non-zero if either truncation or allocation fails, and sets errno.
404+
*/
405+
static int
406+
dsm_impl_posix_resize(int fd, off_t size)
407+
{
408+
int rc;
409+
410+
/* Truncate (or extend) the file to the requested size. */
411+
rc = ftruncate(fd, size);
412+
413+
/*
414+
* On Linux, a shm_open fd is backed by a tmpfs file. After resizing with
415+
* ftruncate, the file may contain a hole. Accessing memory backed by a
416+
* hole causes tmpfs to allocate pages, which fails with SIGBUS if there
417+
* is no more tmpfs space available. So we ask tmpfs to allocate pages
418+
* here, so we can fail gracefully with ENOSPC now rather than risking
419+
* SIGBUS later.
420+
*/
421+
#if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
422+
if (rc == 0)
423+
{
424+
/* We may get interrupted, if so just retry. */
425+
do
426+
{
427+
rc = posix_fallocate(fd, 0, size);
428+
} while (rc == -1 && errno == EINTR);
429+
430+
if (rc != 0 && errno == ENOSYS)
431+
{
432+
/*
433+
* Kernel too old (< 2.6.23). Rather than fail, just trust that
434+
* we won't hit the problem (it typically doesn't show up without
435+
* many-GB-sized requests, anyway).
436+
*/
437+
rc = 0;
438+
}
439+
}
440+
#endif /* HAVE_POSIX_FALLOCATE && __linux__ */
441+
442+
return rc;
443+
}
444+
445+
#endif /* USE_DSM_POSIX */
396446

397447
#ifdef USE_DSM_SYSV
398448
/*

src/include/pg_config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,9 @@
393393
/* Define to 1 if you have the `posix_fadvise' function. */
394394
#undef HAVE_POSIX_FADVISE
395395

396+
/* Define to 1 if you have the `posix_fallocate' function. */
397+
#undef HAVE_POSIX_FALLOCATE
398+
396399
/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */
397400
#undef HAVE_PPC_LWARX_MUTEX_HINT
398401

src/include/pg_config.h.win32

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,9 @@
261261
/* Define to 1 if you have the <poll.h> header file. */
262262
/* #undef HAVE_POLL_H */
263263

264+
/* Define to 1 if you have the `posix_fallocate' function. */
265+
/* #undef HAVE_POSIX_FALLOCATE */
266+
264267
/* Define to 1 if you have the `pstat' function. */
265268
/* #undef HAVE_PSTAT */
266269

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy