Skip to content

Commit 1c6821b

Browse files
committed
Fix and enhance the assertion of no palloc's in a critical section.
The assertion failed if WAL_DEBUG or LWLOCK_STATS was enabled; fix that by using separate memory contexts for the allocations made within those code blocks. This patch introduces a mechanism for marking any memory context as allowed in a critical section. Previously ErrorContext was exempt as a special case. Instead of a blanket exception of the checkpointer process, only exempt the memory context used for the pending ops hash table.
1 parent a749a23 commit 1c6821b

File tree

9 files changed

+147
-42
lines changed

9 files changed

+147
-42
lines changed

src/backend/access/transam/xlog.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#include "storage/spin.h"
6161
#include "utils/builtins.h"
6262
#include "utils/guc.h"
63+
#include "utils/memutils.h"
6364
#include "utils/ps_status.h"
6465
#include "utils/relmapper.h"
6566
#include "utils/snapmgr.h"
@@ -736,6 +737,10 @@ static bool bgwriterLaunched = false;
736737
static int MyLockNo = 0;
737738
static bool holdingAllLocks = false;
738739

740+
#ifdef WAL_DEBUG
741+
static MemoryContext walDebugCxt = NULL;
742+
#endif
743+
739744
static void readRecoveryCommandFile(void);
740745
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
741746
static bool recoveryStopsBefore(XLogRecord *record);
@@ -1258,6 +1263,7 @@ begin:;
12581263
if (XLOG_DEBUG)
12591264
{
12601265
StringInfoData buf;
1266+
MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
12611267

12621268
initStringInfo(&buf);
12631269
appendStringInfo(&buf, "INSERT @ %X/%X: ",
@@ -1282,10 +1288,11 @@ begin:;
12821288

12831289
appendStringInfoString(&buf, " - ");
12841290
RmgrTable[rechdr->xl_rmid].rm_desc(&buf, (XLogRecord *) recordbuf.data);
1285-
pfree(recordbuf.data);
12861291
}
12871292
elog(LOG, "%s", buf.data);
1288-
pfree(buf.data);
1293+
1294+
MemoryContextSwitchTo(oldCxt);
1295+
MemoryContextReset(walDebugCxt);
12891296
}
12901297
#endif
12911298

@@ -4807,6 +4814,24 @@ XLOGShmemInit(void)
48074814
char *allocptr;
48084815
int i;
48094816

4817+
#ifdef WAL_DEBUG
4818+
/*
4819+
* Create a memory context for WAL debugging that's exempt from the
4820+
* normal "no pallocs in critical section" rule. Yes, that can lead to a
4821+
* PANIC if an allocation fails, but wal_debug is not for production use
4822+
* anyway.
4823+
*/
4824+
if (walDebugCxt == NULL)
4825+
{
4826+
walDebugCxt = AllocSetContextCreate(TopMemoryContext,
4827+
"WAL Debug",
4828+
ALLOCSET_DEFAULT_MINSIZE,
4829+
ALLOCSET_DEFAULT_INITSIZE,
4830+
ALLOCSET_DEFAULT_MAXSIZE);
4831+
MemoryContextAllowInCriticalSection(walDebugCxt, true);
4832+
}
4833+
#endif
4834+
48104835
ControlFile = (ControlFileData *)
48114836
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
48124837
XLogCtl = (XLogCtlData *)

src/backend/postmaster/checkpointer.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void)
13051305
if (!AmCheckpointerProcess())
13061306
return;
13071307

1308-
/*
1309-
* We have to PANIC if we fail to absorb all the pending requests (eg,
1310-
* because our hashtable runs out of memory). This is because the system
1311-
* cannot run safely if we are unable to fsync what we have been told to
1312-
* fsync. Fortunately, the hashtable is so small that the problem is
1313-
* quite unlikely to arise in practice.
1314-
*/
1315-
START_CRIT_SECTION();
1316-
1317-
/*
1318-
* We try to avoid holding the lock for a long time by copying the request
1319-
* array.
1320-
*/
13211308
LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
13221309

13231310
/* Transfer stats counts into pending pgstats message */
@@ -1327,23 +1314,36 @@ AbsorbFsyncRequests(void)
13271314
CheckpointerShmem->num_backend_writes = 0;
13281315
CheckpointerShmem->num_backend_fsync = 0;
13291316

1317+
/*
1318+
* We try to avoid holding the lock for a long time by copying the request
1319+
* array, and processing the requests after releasing the lock.
1320+
*
1321+
* Once we have cleared the requests from shared memory, we have to PANIC
1322+
* if we then fail to absorb them (eg, because our hashtable runs out of
1323+
* memory). This is because the system cannot run safely if we are unable
1324+
* to fsync what we have been told to fsync. Fortunately, the hashtable
1325+
* is so small that the problem is quite unlikely to arise in practice.
1326+
*/
13301327
n = CheckpointerShmem->num_requests;
13311328
if (n > 0)
13321329
{
13331330
requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
13341331
memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
13351332
}
1333+
1334+
START_CRIT_SECTION();
1335+
13361336
CheckpointerShmem->num_requests = 0;
13371337

13381338
LWLockRelease(CheckpointerCommLock);
13391339

13401340
for (request = requests; n > 0; request++, n--)
13411341
RememberFsyncRequest(request->rnode, request->forknum, request->segno);
13421342

1343+
END_CRIT_SECTION();
1344+
13431345
if (requests)
13441346
pfree(requests);
1345-
1346-
END_CRIT_SECTION();
13471347
}
13481348

13491349
/*

src/backend/storage/lmgr/lwlock.c

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ typedef struct lwlock_stats
104104
int spin_delay_count;
105105
} lwlock_stats;
106106

107-
static int counts_for_pid = 0;
108107
static HTAB *lwlock_stats_htab;
108+
static lwlock_stats lwlock_stats_dummy;
109109
#endif
110110

111111
#ifdef LOCK_DEBUG
@@ -142,21 +142,39 @@ static void
142142
init_lwlock_stats(void)
143143
{
144144
HASHCTL ctl;
145+
static MemoryContext lwlock_stats_cxt = NULL;
146+
static bool exit_registered = false;
145147

146-
if (lwlock_stats_htab != NULL)
147-
{
148-
hash_destroy(lwlock_stats_htab);
149-
lwlock_stats_htab = NULL;
150-
}
148+
if (lwlock_stats_cxt != NULL)
149+
MemoryContextDelete(lwlock_stats_cxt);
150+
151+
/*
152+
* The LWLock stats will be updated within a critical section, which
153+
* requires allocating new hash entries. Allocations within a critical
154+
* section are normally not allowed because running out of memory would
155+
* lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
156+
* turned on in production, so that's an acceptable risk. The hash entries
157+
* are small, so the risk of running out of memory is minimal in practice.
158+
*/
159+
lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
160+
"LWLock stats",
161+
ALLOCSET_DEFAULT_MINSIZE,
162+
ALLOCSET_DEFAULT_INITSIZE,
163+
ALLOCSET_DEFAULT_MAXSIZE);
164+
MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
151165

152166
MemSet(&ctl, 0, sizeof(ctl));
153167
ctl.keysize = sizeof(lwlock_stats_key);
154168
ctl.entrysize = sizeof(lwlock_stats);
155169
ctl.hash = tag_hash;
170+
ctl.hcxt = lwlock_stats_cxt;
156171
lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
157-
HASH_ELEM | HASH_FUNCTION);
158-
counts_for_pid = MyProcPid;
159-
on_shmem_exit(print_lwlock_stats, 0);
172+
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
173+
if (!exit_registered)
174+
{
175+
on_shmem_exit(print_lwlock_stats, 0);
176+
exit_registered = true;
177+
}
160178
}
161179

162180
static void
@@ -190,9 +208,13 @@ get_lwlock_stats_entry(LWLock *lock)
190208
lwlock_stats *lwstats;
191209
bool found;
192210

193-
/* Set up local count state first time through in a given process */
194-
if (counts_for_pid != MyProcPid)
195-
init_lwlock_stats();
211+
/*
212+
* During shared memory initialization, the hash table doesn't exist yet.
213+
* Stats of that phase aren't very interesting, so just collect operations
214+
* on all locks in a single dummy entry.
215+
*/
216+
if (lwlock_stats_htab == NULL)
217+
return &lwlock_stats_dummy;
196218

197219
/* Fetch or create the entry. */
198220
key.tranche = lock->tranche;
@@ -361,6 +383,16 @@ CreateLWLocks(void)
361383
LWLockRegisterTranche(0, &MainLWLockTranche);
362384
}
363385

386+
/*
387+
* InitLWLockAccess - initialize backend-local state needed to hold LWLocks
388+
*/
389+
void
390+
InitLWLockAccess(void)
391+
{
392+
#ifdef LWLOCK_STATS
393+
init_lwlock_stats();
394+
#endif
395+
}
364396

365397
/*
366398
* LWLockAssign - assign a dynamically-allocated LWLock number

src/backend/storage/lmgr/proc.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,9 @@ InitProcess(void)
411411

412412
/*
413413
* Now that we have a PGPROC, we could try to acquire locks, so initialize
414-
* the deadlock checker.
414+
* local state needed for LWLocks, and the deadlock checker.
415415
*/
416+
InitLWLockAccess();
416417
InitDeadLockChecking();
417418
}
418419

src/backend/storage/smgr/md.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ typedef struct _MdfdVec
115115
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
116116
} MdfdVec;
117117

118-
static MemoryContext MdCxt; /* context for all md.c allocations */
118+
static MemoryContext MdCxt; /* context for all MdfdVec objects */
119119

120120

121121
/*
@@ -157,6 +157,7 @@ typedef struct
157157

158158
static HTAB *pendingOpsTable = NULL;
159159
static List *pendingUnlinks = NIL;
160+
static MemoryContext pendingOpsCxt; /* context for the above */
160161

161162
static CycleCtr mdsync_cycle_ctr = 0;
162163
static CycleCtr mdckpt_cycle_ctr = 0;
@@ -209,11 +210,27 @@ mdinit(void)
209210
{
210211
HASHCTL hash_ctl;
211212

213+
/*
214+
* XXX: The checkpointer needs to add entries to the pending ops table
215+
* when absorbing fsync requests. That is done within a critical
216+
* section, which isn't usually allowed, but we make an exception.
217+
* It means that there's a theoretical possibility that you run out of
218+
* memory while absorbing fsync requests, which leads to a PANIC.
219+
* Fortunately the hash table is small so that's unlikely to happen in
220+
* practice.
221+
*/
222+
pendingOpsCxt = AllocSetContextCreate(MdCxt,
223+
"Pending Ops Context",
224+
ALLOCSET_DEFAULT_MINSIZE,
225+
ALLOCSET_DEFAULT_INITSIZE,
226+
ALLOCSET_DEFAULT_MAXSIZE);
227+
MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
228+
212229
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
213230
hash_ctl.keysize = sizeof(RelFileNode);
214231
hash_ctl.entrysize = sizeof(PendingOperationEntry);
215232
hash_ctl.hash = tag_hash;
216-
hash_ctl.hcxt = MdCxt;
233+
hash_ctl.hcxt = pendingOpsCxt;
217234
pendingOpsTable = hash_create("Pending Ops Table",
218235
100L,
219236
&hash_ctl,
@@ -1516,7 +1533,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
15161533
else if (segno == UNLINK_RELATION_REQUEST)
15171534
{
15181535
/* Unlink request: put it in the linked list */
1519-
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
1536+
MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
15201537
PendingUnlinkEntry *entry;
15211538

15221539
/* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */
@@ -1533,7 +1550,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
15331550
else
15341551
{
15351552
/* Normal case: enter a request to fsync this segment */
1536-
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
1553+
MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
15371554
PendingOperationEntry *entry;
15381555
bool found;
15391556

src/backend/utils/mmgr/mcxt.c

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,9 @@ static void MemoryContextStatsInternal(MemoryContext context, int level);
6060
* You should not do memory allocations within a critical section, because
6161
* an out-of-memory error will be escalated to a PANIC. To enforce that
6262
* rule, the allocation functions Assert that.
63-
*
64-
* There are a two exceptions: 1) error recovery uses ErrorContext, which
65-
* has some memory set aside so that you don't run out. And 2) checkpointer
66-
* currently just hopes for the best, which is wrong and ought to be fixed,
67-
* but it's a known issue so let's not complain about in the meanwhile.
6863
*/
6964
#define AssertNotInCriticalSection(context) \
70-
Assert(CritSectionCount == 0 || (context) == ErrorContext || \
71-
AmCheckpointerProcess())
65+
Assert(CritSectionCount == 0 || (context)->allowInCritSection)
7266

7367
/*****************************************************************************
7468
* EXPORTED ROUTINES *
@@ -120,7 +114,10 @@ MemoryContextInit(void)
120114
* require it to contain at least 8K at all times. This is the only case
121115
* where retained memory in a context is *essential* --- we want to be
122116
* sure ErrorContext still has some memory even if we've run out
123-
* elsewhere!
117+
* elsewhere! Also, allow allocations in ErrorContext within a critical
118+
* section. Otherwise a PANIC will cause an assertion failure in the
119+
* error reporting code, before printing out the real cause of the
120+
* failure.
124121
*
125122
* This should be the last step in this function, as elog.c assumes memory
126123
* management works once ErrorContext is non-null.
@@ -130,6 +127,7 @@ MemoryContextInit(void)
130127
8 * 1024,
131128
8 * 1024,
132129
8 * 1024);
130+
MemoryContextAllowInCriticalSection(ErrorContext, true);
133131
}
134132

135133
/*
@@ -305,6 +303,26 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
305303
}
306304
}
307305

306+
/*
307+
* MemoryContextAllowInCriticalSection
308+
* Allow/disallow allocations in this memory context within a critical
309+
* section.
310+
*
311+
* Normally, memory allocations are not allowed within a critical section,
312+
* because a failure would lead to PANIC. There are a few exceptions to
313+
* that, like allocations related to debugging code that is not supposed to
314+
* be enabled in production. This function can be used to exempt specific
315+
* memory contexts from the assertion in palloc().
316+
*/
317+
void
318+
MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
319+
{
320+
AssertArg(MemoryContextIsValid(context));
321+
#ifdef USE_ASSERT_CHECKING
322+
context->allowInCritSection = allow;
323+
#endif
324+
}
325+
308326
/*
309327
* GetMemoryChunkSpace
310328
* Given a currently-allocated chunk, determine the total space
@@ -533,6 +551,7 @@ MemoryContextCreate(NodeTag tag, Size size,
533551
MemoryContext node;
534552
Size needed = size + strlen(name) + 1;
535553

554+
/* creating new memory contexts is not allowed in a critical section */
536555
Assert(CritSectionCount == 0);
537556

538557
/* Get space for node and name */
@@ -570,6 +589,11 @@ MemoryContextCreate(NodeTag tag, Size size,
570589
node->parent = parent;
571590
node->nextchild = parent->firstchild;
572591
parent->firstchild = node;
592+
593+
#ifdef USE_ASSERT_CHECKING
594+
/* inherit allowInCritSection flag from parent */
595+
node->allowInCritSection = parent->allowInCritSection;
596+
#endif
573597
}
574598

575599
VALGRIND_CREATE_MEMPOOL(node, 0, false);

src/include/nodes/memnodes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ typedef struct MemoryContextData
6060
MemoryContext nextchild; /* next child of same parent */
6161
char *name; /* context name (just for debugging) */
6262
bool isReset; /* T = no space alloced since last reset */
63+
#ifdef USE_ASSERT_CHECKING
64+
bool allowInCritSection; /* allow palloc in critical section */
65+
#endif
6366
} MemoryContextData;
6467

6568
/* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */

src/include/storage/lwlock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
182182

183183
extern Size LWLockShmemSize(void);
184184
extern void CreateLWLocks(void);
185+
extern void InitLWLockAccess(void);
185186

186187
/*
187188
* The traditional method for obtaining an lwlock for use by an extension is

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy