Skip to content

Commit 0851e12

Browse files
committed
Reorganize clog's error reporting so that PANIC on clog I/O error can
be reduced to a plain ERROR. Should make it at least a little less painful to deal with data-corruption problems.
1 parent 6bfa2df commit 0851e12

File tree

1 file changed

+135
-30
lines changed
  • src/backend/access/transam

1 file changed

+135
-30
lines changed

src/backend/access/transam/clog.c

Lines changed: 135 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
1414
* Portions Copyright (c) 1994, Regents of the University of California
1515
*
16-
* $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.11 2002/09/26 22:58:33 tgl Exp $
16+
* $Header: /cvsroot/pgsql/src/backend/access/transam/clog.c,v 1.12 2003/04/14 17:31:33 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -123,7 +123,7 @@ typedef enum
123123
CLOG_PAGE_READ_IN_PROGRESS, /* CLOG page is being read in */
124124
CLOG_PAGE_CLEAN, /* CLOG page is valid and not dirty */
125125
CLOG_PAGE_DIRTY, /* CLOG page is valid but needs write */
126-
CLOG_PAGE_WRITE_IN_PROGRESS /* CLOG page is being written out in */
126+
CLOG_PAGE_WRITE_IN_PROGRESS /* CLOG page is being written out */
127127
} ClogPageStatus;
128128

129129
/*
@@ -180,12 +180,25 @@ static char ClogDir[MAXPGPATH];
180180
ClogCtl->page_lru_count[slotno] = 0; \
181181
} while (0)
182182

183+
/* Saved info for CLOGReportIOError */
184+
typedef enum
185+
{
186+
CLOG_OPEN_FAILED,
187+
CLOG_CREATE_FAILED,
188+
CLOG_SEEK_FAILED,
189+
CLOG_READ_FAILED,
190+
CLOG_WRITE_FAILED
191+
} ClogErrorCause;
192+
static ClogErrorCause clog_errcause;
193+
static int clog_errno;
194+
183195

184196
static int ZeroCLOGPage(int pageno, bool writeXlog);
185-
static int ReadCLOGPage(int pageno);
197+
static int ReadCLOGPage(int pageno, TransactionId xid);
186198
static void WriteCLOGPage(int slotno);
187-
static void CLOGPhysicalReadPage(int pageno, int slotno);
188-
static void CLOGPhysicalWritePage(int pageno, int slotno);
199+
static bool CLOGPhysicalReadPage(int pageno, int slotno);
200+
static bool CLOGPhysicalWritePage(int pageno, int slotno);
201+
static void CLOGReportIOError(int pageno, TransactionId xid);
189202
static int SelectLRUCLOGPage(int pageno);
190203
static bool ScanCLOGDirectory(int cutoffPage, bool doDeletions);
191204
static bool CLOGPagePrecedes(int page1, int page2);
@@ -212,7 +225,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
212225

213226
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
214227

215-
slotno = ReadCLOGPage(pageno);
228+
slotno = ReadCLOGPage(pageno, xid);
216229
byteptr = ClogCtl->page_buffer[slotno] + byteno;
217230

218231
/* Current state should be 0 or target state */
@@ -244,7 +257,7 @@ TransactionIdGetStatus(TransactionId xid)
244257

245258
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
246259

247-
slotno = ReadCLOGPage(pageno);
260+
slotno = ReadCLOGPage(pageno, xid);
248261
byteptr = ClogCtl->page_buffer[slotno] + byteno;
249262

250263
status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
@@ -362,18 +375,22 @@ ZeroCLOGPage(int pageno, bool writeXlog)
362375
* Find a CLOG page in a shared buffer, reading it in if necessary.
363376
* The page number must correspond to an already-initialized page.
364377
*
378+
* The passed-in xid is used only for error reporting, and may be
379+
* InvalidTransactionId if no specific xid is associated with the action.
380+
*
365381
* Return value is the shared-buffer slot number now holding the page.
366382
* The buffer's LRU access info is updated.
367383
*
368384
* Control lock must be held at entry, and will be held at exit.
369385
*/
370386
static int
371-
ReadCLOGPage(int pageno)
387+
ReadCLOGPage(int pageno, TransactionId xid)
372388
{
373389
/* Outer loop handles restart if we lose the buffer to someone else */
374390
for (;;)
375391
{
376392
int slotno;
393+
bool ok;
377394

378395
/* See if page already is in memory; if not, pick victim slot */
379396
slotno = SelectLRUCLOGPage(pageno);
@@ -424,18 +441,22 @@ ReadCLOGPage(int pageno)
424441
}
425442

426443
/* Okay, do the read */
427-
CLOGPhysicalReadPage(pageno, slotno);
444+
ok = CLOGPhysicalReadPage(pageno, slotno);
428445

429446
/* Re-acquire shared control lock and update page state */
430447
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
431448

432449
Assert(ClogCtl->page_number[slotno] == pageno &&
433450
ClogCtl->page_status[slotno] == CLOG_PAGE_READ_IN_PROGRESS);
434451

435-
ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN;
452+
ClogCtl->page_status[slotno] = ok ? CLOG_PAGE_CLEAN : CLOG_PAGE_EMPTY;
436453

437454
LWLockRelease(ClogBufferLocks[slotno]);
438455

456+
/* Now it's okay to elog if we failed */
457+
if (!ok)
458+
CLOGReportIOError(pageno, xid);
459+
439460
ClogRecentlyUsed(slotno);
440461
return slotno;
441462
}
@@ -456,6 +477,7 @@ static void
456477
WriteCLOGPage(int slotno)
457478
{
458479
int pageno;
480+
bool ok;
459481

460482
/* Do nothing if page does not need writing */
461483
if (ClogCtl->page_status[slotno] != CLOG_PAGE_DIRTY &&
@@ -499,7 +521,7 @@ WriteCLOGPage(int slotno)
499521
ClogCtl->page_status[slotno] = CLOG_PAGE_WRITE_IN_PROGRESS;
500522

501523
/* Okay, do the write */
502-
CLOGPhysicalWritePage(pageno, slotno);
524+
ok = CLOGPhysicalWritePage(pageno, slotno);
503525

504526
/* Re-acquire shared control lock and update page state */
505527
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
@@ -510,18 +532,26 @@ WriteCLOGPage(int slotno)
510532

511533
/* Cannot set CLEAN if someone re-dirtied page since write started */
512534
if (ClogCtl->page_status[slotno] == CLOG_PAGE_WRITE_IN_PROGRESS)
513-
ClogCtl->page_status[slotno] = CLOG_PAGE_CLEAN;
535+
ClogCtl->page_status[slotno] = ok ? CLOG_PAGE_CLEAN : CLOG_PAGE_DIRTY;
514536

515537
LWLockRelease(ClogBufferLocks[slotno]);
538+
539+
/* Now it's okay to elog if we failed */
540+
if (!ok)
541+
CLOGReportIOError(pageno, InvalidTransactionId);
516542
}
517543

518544
/*
519545
* Physical read of a (previously existing) page into a buffer slot
520546
*
547+
* On failure, we cannot just elog(ERROR) since caller has put state in
548+
* shared memory that must be undone. So, we return FALSE and save enough
549+
* info in static variables to let CLOGReportIOError make the report.
550+
*
521551
* For now, assume it's not worth keeping a file pointer open across
522552
* read/write operations. We could cache one virtual file pointer ...
523553
*/
524-
static void
554+
static bool
525555
CLOGPhysicalReadPage(int pageno, int slotno)
526556
{
527557
int segno = pageno / CLOG_PAGES_PER_SEGMENT;
@@ -543,31 +573,47 @@ CLOGPhysicalReadPage(int pageno, int slotno)
543573
if (fd < 0)
544574
{
545575
if (errno != ENOENT || !InRecovery)
546-
elog(PANIC, "open of %s failed: %m", path);
576+
{
577+
clog_errcause = CLOG_OPEN_FAILED;
578+
clog_errno = errno;
579+
return false;
580+
}
581+
547582
elog(LOG, "clog file %s doesn't exist, reading as zeroes", path);
548583
MemSet(ClogCtl->page_buffer[slotno], 0, CLOG_BLCKSZ);
549-
return;
584+
return true;
550585
}
551586

552587
if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
553-
elog(PANIC, "lseek of clog file %u, offset %u failed: %m",
554-
segno, offset);
588+
{
589+
clog_errcause = CLOG_SEEK_FAILED;
590+
clog_errno = errno;
591+
return false;
592+
}
555593

556594
errno = 0;
557595
if (read(fd, ClogCtl->page_buffer[slotno], CLOG_BLCKSZ) != CLOG_BLCKSZ)
558-
elog(PANIC, "read of clog file %u, offset %u failed: %m",
559-
segno, offset);
596+
{
597+
clog_errcause = CLOG_READ_FAILED;
598+
clog_errno = errno;
599+
return false;
600+
}
560601

561602
close(fd);
603+
return true;
562604
}
563605

564606
/*
565607
* Physical write of a page from a buffer slot
566608
*
609+
* On failure, we cannot just elog(ERROR) since caller has put state in
610+
* shared memory that must be undone. So, we return FALSE and save enough
611+
* info in static variables to let CLOGReportIOError make the report.
612+
*
567613
* For now, assume it's not worth keeping a file pointer open across
568614
* read/write operations. We could cache one virtual file pointer ...
569615
*/
570-
static void
616+
static bool
571617
CLOGPhysicalWritePage(int pageno, int slotno)
572618
{
573619
int segno = pageno / CLOG_PAGES_PER_SEGMENT;
@@ -595,28 +641,85 @@ CLOGPhysicalWritePage(int pageno, int slotno)
595641
if (fd < 0)
596642
{
597643
if (errno != ENOENT)
598-
elog(PANIC, "open of %s failed: %m", path);
644+
{
645+
clog_errcause = CLOG_OPEN_FAILED;
646+
clog_errno = errno;
647+
return false;
648+
}
649+
599650
fd = BasicOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
600651
S_IRUSR | S_IWUSR);
601652
if (fd < 0)
602-
elog(PANIC, "creation of file %s failed: %m", path);
653+
{
654+
clog_errcause = CLOG_CREATE_FAILED;
655+
clog_errno = errno;
656+
return false;
657+
}
603658
}
604659

605660
if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
606-
elog(PANIC, "lseek of clog file %u, offset %u failed: %m",
607-
segno, offset);
661+
{
662+
clog_errcause = CLOG_SEEK_FAILED;
663+
clog_errno = errno;
664+
return false;
665+
}
608666

609667
errno = 0;
610668
if (write(fd, ClogCtl->page_buffer[slotno], CLOG_BLCKSZ) != CLOG_BLCKSZ)
611669
{
612670
/* if write didn't set errno, assume problem is no disk space */
613671
if (errno == 0)
614672
errno = ENOSPC;
615-
elog(PANIC, "write of clog file %u, offset %u failed: %m",
616-
segno, offset);
673+
clog_errcause = CLOG_WRITE_FAILED;
674+
clog_errno = errno;
675+
return false;
617676
}
618677

619678
close(fd);
679+
return true;
680+
}
681+
682+
/*
683+
* Issue the error message after failure of CLOGPhysicalReadPage or
684+
* CLOGPhysicalWritePage. Call this after cleaning up shared-memory state.
685+
*/
686+
static void
687+
CLOGReportIOError(int pageno, TransactionId xid)
688+
{
689+
int segno = pageno / CLOG_PAGES_PER_SEGMENT;
690+
int rpageno = pageno % CLOG_PAGES_PER_SEGMENT;
691+
int offset = rpageno * CLOG_BLCKSZ;
692+
char path[MAXPGPATH];
693+
694+
/* XXX TODO: provide xid as context in error messages */
695+
696+
ClogFileName(path, segno);
697+
errno = clog_errno;
698+
switch (clog_errcause)
699+
{
700+
case CLOG_OPEN_FAILED:
701+
elog(ERROR, "open of %s failed: %m", path);
702+
break;
703+
case CLOG_CREATE_FAILED:
704+
elog(ERROR, "creation of file %s failed: %m", path);
705+
break;
706+
case CLOG_SEEK_FAILED:
707+
elog(ERROR, "lseek of clog file %u, offset %u failed: %m",
708+
segno, offset);
709+
break;
710+
case CLOG_READ_FAILED:
711+
elog(ERROR, "read of clog file %u, offset %u failed: %m",
712+
segno, offset);
713+
break;
714+
case CLOG_WRITE_FAILED:
715+
elog(ERROR, "write of clog file %u, offset %u failed: %m",
716+
segno, offset);
717+
break;
718+
default:
719+
/* can't get here, we trust */
720+
elog(ERROR, "unknown CLOG I/O error");
721+
break;
722+
}
620723
}
621724

622725
/*
@@ -679,7 +782,8 @@ SelectLRUCLOGPage(int pageno)
679782
* the read to complete.
680783
*/
681784
if (ClogCtl->page_status[bestslot] == CLOG_PAGE_READ_IN_PROGRESS)
682-
(void) ReadCLOGPage(ClogCtl->page_number[bestslot]);
785+
(void) ReadCLOGPage(ClogCtl->page_number[bestslot],
786+
InvalidTransactionId);
683787
else
684788
WriteCLOGPage(bestslot);
685789

@@ -857,7 +961,8 @@ restart:;
857961
* This is the same logic as in SelectLRUCLOGPage.
858962
*/
859963
if (ClogCtl->page_status[slotno] == CLOG_PAGE_READ_IN_PROGRESS)
860-
(void) ReadCLOGPage(ClogCtl->page_number[slotno]);
964+
(void) ReadCLOGPage(ClogCtl->page_number[slotno],
965+
InvalidTransactionId);
861966
else
862967
WriteCLOGPage(slotno);
863968
goto restart;
@@ -886,7 +991,7 @@ ScanCLOGDirectory(int cutoffPage, bool doDeletions)
886991

887992
cldir = opendir(ClogDir);
888993
if (cldir == NULL)
889-
elog(PANIC, "could not open transaction-commit log directory (%s): %m",
994+
elog(ERROR, "could not open transaction-commit log directory (%s): %m",
890995
ClogDir);
891996

892997
errno = 0;
@@ -911,7 +1016,7 @@ ScanCLOGDirectory(int cutoffPage, bool doDeletions)
9111016
errno = 0;
9121017
}
9131018
if (errno)
914-
elog(PANIC, "could not read transaction-commit log directory (%s): %m",
1019+
elog(ERROR, "could not read transaction-commit log directory (%s): %m",
9151020
ClogDir);
9161021
closedir(cldir);
9171022

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy