Skip to content

Commit defe934

Browse files
committed
Make the world safe for full_page_writes. Allow XLOG records that try to
update no-longer-existing pages to fall through as no-ops, but make a note of each page number referenced by such records. If we don't see a later XLOG entry dropping the table or truncating away the page, complain at the end of XLOG replay. Since this fixes the known failure mode for full_page_writes = off, revert my previous band-aid patch that disabled that GUC variable.
1 parent 0fcc3c2 commit defe934

File tree

4 files changed

+226
-22
lines changed

4 files changed

+226
-22
lines changed

src/backend/access/transam/xlog.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.234 2006/04/05 03:34:05 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.235 2006/04/14 20:27:24 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -980,7 +980,8 @@ XLogCheckBuffer(XLogRecData *rdata,
980980
*/
981981
*lsn = page->pd_lsn;
982982

983-
if (XLByteLE(page->pd_lsn, RedoRecPtr))
983+
if (fullPageWrites &&
984+
XLByteLE(page->pd_lsn, RedoRecPtr))
984985
{
985986
/*
986987
* The page needs to be backed up, so set up *bkpb
@@ -4786,6 +4787,12 @@ StartupXLOG(void)
47864787
RmgrTable[rmid].rm_cleanup();
47874788
}
47884789

4790+
/*
4791+
* Check to see if the XLOG sequence contained any unresolved
4792+
* references to uninitialized pages.
4793+
*/
4794+
XLogCheckInvalidPages();
4795+
47894796
/*
47904797
* Reset pgstat data, because it may be invalid after recovery.
47914798
*/

src/backend/access/transam/xlogutils.c

Lines changed: 209 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
14-
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.43 2006/03/31 23:32:06 tgl Exp $
14+
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.44 2006/04/14 20:27:24 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -24,6 +24,176 @@
2424
#include "utils/hsearch.h"
2525

2626

27+
/*
28+
* During XLOG replay, we may see XLOG records for incremental updates of
29+
* pages that no longer exist, because their relation was later dropped or
30+
* truncated. (Note: this is only possible when full_page_writes = OFF,
31+
* since when it's ON, the first reference we see to a page should always
32+
* be a full-page rewrite not an incremental update.) Rather than simply
33+
* ignoring such records, we make a note of the referenced page, and then
34+
* complain if we don't actually see a drop or truncate covering the page
35+
* later in replay.
36+
*/
37+
typedef struct xl_invalid_page_key
38+
{
39+
RelFileNode node; /* the relation */
40+
BlockNumber blkno; /* the page */
41+
} xl_invalid_page_key;
42+
43+
typedef struct xl_invalid_page
44+
{
45+
xl_invalid_page_key key; /* hash key ... must be first */
46+
bool present; /* page existed but contained zeroes */
47+
} xl_invalid_page;
48+
49+
static HTAB *invalid_page_tab = NULL;
50+
51+
52+
/* Log a reference to an invalid page */
53+
static void
54+
log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
55+
{
56+
xl_invalid_page_key key;
57+
xl_invalid_page *hentry;
58+
bool found;
59+
60+
/*
61+
* Log references to invalid pages at DEBUG1 level. This allows some
62+
* tracing of the cause (note the elog context mechanism will tell us
63+
* something about the XLOG record that generated the reference).
64+
*/
65+
if (present)
66+
elog(DEBUG1, "page %u of relation %u/%u/%u is uninitialized",
67+
blkno, node.spcNode, node.dbNode, node.relNode);
68+
else
69+
elog(DEBUG1, "page %u of relation %u/%u/%u does not exist",
70+
blkno, node.spcNode, node.dbNode, node.relNode);
71+
72+
if (invalid_page_tab == NULL)
73+
{
74+
/* create hash table when first needed */
75+
HASHCTL ctl;
76+
77+
memset(&ctl, 0, sizeof(ctl));
78+
ctl.keysize = sizeof(xl_invalid_page_key);
79+
ctl.entrysize = sizeof(xl_invalid_page);
80+
ctl.hash = tag_hash;
81+
82+
invalid_page_tab = hash_create("XLOG invalid-page table",
83+
100,
84+
&ctl,
85+
HASH_ELEM | HASH_FUNCTION);
86+
}
87+
88+
/* we currently assume xl_invalid_page_key contains no padding */
89+
key.node = node;
90+
key.blkno = blkno;
91+
hentry = (xl_invalid_page *)
92+
hash_search(invalid_page_tab, (void *) &key, HASH_ENTER, &found);
93+
94+
if (!found)
95+
{
96+
/* hash_search already filled in the key */
97+
hentry->present = present;
98+
}
99+
else
100+
{
101+
/* repeat reference ... leave "present" as it was */
102+
}
103+
}
104+
105+
/* Forget any invalid pages >= minblkno, because they've been dropped */
106+
static void
107+
forget_invalid_pages(RelFileNode node, BlockNumber minblkno)
108+
{
109+
HASH_SEQ_STATUS status;
110+
xl_invalid_page *hentry;
111+
112+
if (invalid_page_tab == NULL)
113+
return; /* nothing to do */
114+
115+
hash_seq_init(&status, invalid_page_tab);
116+
117+
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
118+
{
119+
if (RelFileNodeEquals(hentry->key.node, node) &&
120+
hentry->key.blkno >= minblkno)
121+
{
122+
elog(DEBUG2, "page %u of relation %u/%u/%u has been dropped",
123+
hentry->key.blkno, hentry->key.node.spcNode,
124+
hentry->key.node.dbNode, hentry->key.node.relNode);
125+
126+
if (hash_search(invalid_page_tab,
127+
(void *) &hentry->key,
128+
HASH_REMOVE, NULL) == NULL)
129+
elog(ERROR, "hash table corrupted");
130+
}
131+
}
132+
}
133+
134+
/* Forget any invalid pages in a whole database */
135+
static void
136+
forget_invalid_pages_db(Oid dbid)
137+
{
138+
HASH_SEQ_STATUS status;
139+
xl_invalid_page *hentry;
140+
141+
if (invalid_page_tab == NULL)
142+
return; /* nothing to do */
143+
144+
hash_seq_init(&status, invalid_page_tab);
145+
146+
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
147+
{
148+
if (hentry->key.node.dbNode == dbid)
149+
{
150+
elog(DEBUG2, "page %u of relation %u/%u/%u has been dropped",
151+
hentry->key.blkno, hentry->key.node.spcNode,
152+
hentry->key.node.dbNode, hentry->key.node.relNode);
153+
154+
if (hash_search(invalid_page_tab,
155+
(void *) &hentry->key,
156+
HASH_REMOVE, NULL) == NULL)
157+
elog(ERROR, "hash table corrupted");
158+
}
159+
}
160+
}
161+
162+
/* Complain about any remaining invalid-page entries */
163+
void
164+
XLogCheckInvalidPages(void)
165+
{
166+
HASH_SEQ_STATUS status;
167+
xl_invalid_page *hentry;
168+
bool foundone = false;
169+
170+
if (invalid_page_tab == NULL)
171+
return; /* nothing to do */
172+
173+
hash_seq_init(&status, invalid_page_tab);
174+
175+
/*
176+
* Our strategy is to emit WARNING messages for all remaining entries
177+
* and only PANIC after we've dumped all the available info.
178+
*/
179+
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
180+
{
181+
if (hentry->present)
182+
elog(WARNING, "page %u of relation %u/%u/%u was uninitialized",
183+
hentry->key.blkno, hentry->key.node.spcNode,
184+
hentry->key.node.dbNode, hentry->key.node.relNode);
185+
else
186+
elog(WARNING, "page %u of relation %u/%u/%u did not exist",
187+
hentry->key.blkno, hentry->key.node.spcNode,
188+
hentry->key.node.dbNode, hentry->key.node.relNode);
189+
foundone = true;
190+
}
191+
192+
if (foundone)
193+
elog(PANIC, "WAL contains references to invalid pages");
194+
}
195+
196+
27197
/*
28198
* XLogReadBuffer
29199
* Read a page during XLOG replay
@@ -40,12 +210,11 @@
40210
* the page being "new" (all zeroes).
41211
*
42212
* If "init" is false then the caller needs the page to be valid already.
43-
* If the page doesn't exist or contains zeroes, we report failure.
44-
*
45-
* If the return value is InvalidBuffer (only possible when init = false),
46-
* the caller should silently skip the update on this page. This currently
47-
* never happens, but we retain it as part of the API spec for possible future
48-
* use.
213+
* If the page doesn't exist or contains zeroes, we return InvalidBuffer.
214+
* In this case the caller should silently skip the update on this page.
215+
* (In this situation, we expect that the page was later dropped or truncated.
216+
* If we don't see evidence of that later in the WAL sequence, we'll complain
217+
* at the end of WAL replay.)
49218
*/
50219
Buffer
51220
XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
@@ -64,9 +233,10 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
64233
{
65234
/* hm, page doesn't exist in file */
66235
if (!init)
67-
elog(PANIC, "block %u of relation %u/%u/%u does not exist",
68-
blkno, reln->rd_node.spcNode,
69-
reln->rd_node.dbNode, reln->rd_node.relNode);
236+
{
237+
log_invalid_page(reln->rd_node, blkno, false);
238+
return InvalidBuffer;
239+
}
70240
/* OK to extend the file */
71241
/* we do this in recovery only - no rel-extension lock needed */
72242
Assert(InRecovery);
@@ -89,9 +259,11 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
89259
Page page = (Page) BufferGetPage(buffer);
90260

91261
if (PageIsNew((PageHeader) page))
92-
elog(PANIC, "block %u of relation %u/%u/%u is uninitialized",
93-
blkno, reln->rd_node.spcNode,
94-
reln->rd_node.dbNode, reln->rd_node.relNode);
262+
{
263+
UnlockReleaseBuffer(buffer);
264+
log_invalid_page(reln->rd_node, blkno, true);
265+
return InvalidBuffer;
266+
}
95267
}
96268

97269
return buffer;
@@ -195,6 +367,7 @@ void
195367
XLogInitRelationCache(void)
196368
{
197369
_xl_init_rel_cache();
370+
invalid_page_tab = NULL;
198371
}
199372

200373
void
@@ -300,22 +473,26 @@ XLogOpenRelation(RelFileNode rnode)
300473
*
301474
* Currently, we don't bother to physically remove the relation from the
302475
* cache, we just let it age out normally.
476+
*
477+
* This also takes care of removing any open "invalid-page" records for
478+
* the relation.
303479
*/
304480
void
305481
XLogDropRelation(RelFileNode rnode)
306482
{
307-
XLogRelDesc *rdesc;
308483
XLogRelCacheEntry *hentry;
309484

310485
hentry = (XLogRelCacheEntry *)
311486
hash_search(_xlrelcache, (void *) &rnode, HASH_FIND, NULL);
312487

313-
if (!hentry)
314-
return; /* not in cache so no work */
488+
if (hentry)
489+
{
490+
XLogRelDesc *rdesc = hentry->rdesc;
315491

316-
rdesc = hentry->rdesc;
492+
RelationCloseSmgr(&(rdesc->reldata));
493+
}
317494

318-
RelationCloseSmgr(&(rdesc->reldata));
495+
forget_invalid_pages(rnode, 0);
319496
}
320497

321498
/*
@@ -338,4 +515,18 @@ XLogDropDatabase(Oid dbid)
338515
if (hentry->rnode.dbNode == dbid)
339516
RelationCloseSmgr(&(rdesc->reldata));
340517
}
518+
519+
forget_invalid_pages_db(dbid);
520+
}
521+
522+
/*
523+
* Truncate a relation during XLOG replay
524+
*
525+
* We don't need to do anything to the fake relcache, but we do need to
526+
* clean up any open "invalid-page" records for the dropped pages.
527+
*/
528+
void
529+
XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks)
530+
{
531+
forget_invalid_pages(rnode, nblocks);
341532
}

src/backend/storage/smgr/smgr.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@
1111
*
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.98 2006/03/30 22:11:55 tgl Exp $
14+
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.99 2006/04/14 20:27:24 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
1818
#include "postgres.h"
1919

2020
#include "access/xact.h"
21+
#include "access/xlogutils.h"
2122
#include "commands/tablespace.h"
2223
#include "pgstat.h"
2324
#include "storage/bufmgr.h"
@@ -942,6 +943,9 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
942943
reln->smgr_rnode.dbNode,
943944
reln->smgr_rnode.relNode,
944945
xlrec->blkno)));
946+
947+
/* Also tell xlogutils.c about it */
948+
XLogTruncateRelation(xlrec->rnode, xlrec->blkno);
945949
}
946950
else
947951
elog(PANIC, "smgr_redo: unknown op code %u", info);

src/include/access/xlogutils.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.20 2006/03/29 21:17:39 tgl Exp $
9+
* $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.21 2006/04/14 20:27:24 tgl Exp $
1010
*/
1111
#ifndef XLOG_UTILS_H
1212
#define XLOG_UTILS_H
@@ -16,11 +16,13 @@
1616

1717

1818
extern void XLogInitRelationCache(void);
19+
extern void XLogCheckInvalidPages(void);
1920
extern void XLogCloseRelationCache(void);
2021

2122
extern Relation XLogOpenRelation(RelFileNode rnode);
2223
extern void XLogDropRelation(RelFileNode rnode);
2324
extern void XLogDropDatabase(Oid dbid);
25+
extern void XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks);
2426

2527
extern Buffer XLogReadBuffer(Relation reln, BlockNumber blkno, bool init);
2628

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy