Skip to content

Commit 391eb5e

Browse files
committed
Reimplement free-space-map management as per recent discussions.
Adjustable threshold is gone in favor of keeping track of total requested page storage and doling out proportional fractions to each relation (with a minimum amount per relation, and some quantization of the results to avoid thrashing with small changes in page counts). Provide special- case code for indexes so as not to waste space storing useless page free space counts. Restructure internal data storage to be a flat array instead of list-of-chunks; this may cost a little more work in data copying when reorganizing, but allows binary search to be used during lookup_fsm_page_entry().
1 parent a455c94 commit 391eb5e

File tree

10 files changed

+1102
-678
lines changed

10 files changed

+1102
-678
lines changed

doc/src/sgml/runtime.sgml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.169 2003/02/19 04:06:28 momjian Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.170 2003/03/04 21:51:19 tgl Exp $
33
-->
44

55
<Chapter Id="runtime">
@@ -1725,7 +1725,9 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir'
17251725
<listitem>
17261726
<para>
17271727
Sets the maximum number of disk pages for which free space will
1728-
be tracked in the shared free-space map. The default is 10000.
1728+
be tracked in the shared free-space map. Six bytes of shared memory
1729+
are consumed for each page slot. This setting must be more than
1730+
16 * <varname>max_fsm_relations</varname>. The default is 20000.
17291731
This option can only be set at server start.
17301732
</para>
17311733
</listitem>
@@ -1735,9 +1737,11 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir'
17351737
<term><varname>MAX_FSM_RELATIONS</varname> (<type>integer</type>)</term>
17361738
<listitem>
17371739
<para>
1738-
Sets the maximum number of relations (tables) for which free
1739-
space will be tracked in the shared free-space map. The default
1740-
is 1000. This option can only be set at server start.
1740+
Sets the maximum number of relations (tables and indexes) for which
1741+
free space will be tracked in the shared free-space map. Roughly
1742+
fifty bytes of shared memory are consumed for each slot.
1743+
The default is 1000.
1744+
This option can only be set at server start.
17411745
</para>
17421746
</listitem>
17431747
</varlistentry>

src/backend/access/nbtree/nbtpage.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.63 2003/02/23 23:20:52 tgl Exp $
12+
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.64 2003/03/04 21:51:20 tgl Exp $
1313
*
1414
* NOTES
1515
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -401,15 +401,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
401401
* that the page is still free. (For example, an already-free page
402402
* could have been re-used between the time the last VACUUM scanned
403403
* it and the time the VACUUM made its FSM updates.)
404-
*
405-
* The request size should be more than half of what btvacuumcleanup
406-
* logs as the per-page free space. We use BLCKSZ/2 and BLCKSZ-1
407-
* to try to get some use out of FSM's space management algorithm.
408-
* XXX this needs some more thought...
409404
*/
410405
for (;;)
411406
{
412-
blkno = GetPageWithFreeSpace(&rel->rd_node, BLCKSZ/2);
407+
blkno = GetFreeIndexPage(&rel->rd_node);
413408
if (blkno == InvalidBlockNumber)
414409
break;
415410
buf = ReadBuffer(rel, blkno);

src/backend/access/nbtree/nbtree.c

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
1414
* IDENTIFICATION
15-
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.100 2003/02/24 00:57:17 tgl Exp $
15+
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.101 2003/03/04 21:51:20 tgl Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -697,7 +697,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
697697
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
698698
BlockNumber num_pages;
699699
BlockNumber blkno;
700-
PageFreeSpaceInfo *pageSpaces;
700+
BlockNumber *freePages;
701701
int nFreePages,
702702
maxFreePages;
703703
BlockNumber pages_deleted = 0;
@@ -712,7 +712,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
712712
maxFreePages = MaxFSMPages;
713713
if ((BlockNumber) maxFreePages > num_pages)
714714
maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */
715-
pageSpaces = (PageFreeSpaceInfo *) palloc(maxFreePages * sizeof(PageFreeSpaceInfo));
715+
freePages = (BlockNumber *) palloc(maxFreePages * sizeof(BlockNumber));
716716
nFreePages = 0;
717717

718718
/* Create a temporary memory context to run _bt_pagedel in */
@@ -740,12 +740,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
740740
{
741741
/* Okay to recycle this page */
742742
if (nFreePages < maxFreePages)
743-
{
744-
pageSpaces[nFreePages].blkno = blkno;
745-
/* claimed avail-space must be < BLCKSZ */
746-
pageSpaces[nFreePages].avail = BLCKSZ-1;
747-
nFreePages++;
748-
}
743+
freePages[nFreePages++] = blkno;
749744
pages_deleted++;
750745
}
751746
else if (P_ISDELETED(opaque))
@@ -781,12 +776,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
781776
if (ndel && info->vacuum_full)
782777
{
783778
if (nFreePages < maxFreePages)
784-
{
785-
pageSpaces[nFreePages].blkno = blkno;
786-
/* claimed avail-space must be < BLCKSZ */
787-
pageSpaces[nFreePages].avail = BLCKSZ-1;
788-
nFreePages++;
789-
}
779+
freePages[nFreePages++] = blkno;
790780
}
791781

792782
MemoryContextSwitchTo(oldcontext);
@@ -805,8 +795,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
805795
{
806796
BlockNumber new_pages = num_pages;
807797

808-
while (nFreePages > 0 &&
809-
pageSpaces[nFreePages-1].blkno == new_pages-1)
798+
while (nFreePages > 0 && freePages[nFreePages-1] == new_pages-1)
810799
{
811800
new_pages--;
812801
pages_deleted--;
@@ -841,12 +830,12 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
841830

842831
/*
843832
* Update the shared Free Space Map with the info we now have about
844-
* free space in the index, discarding any old info the map may have.
833+
* free pages in the index, discarding any old info the map may have.
845834
* We do not need to sort the page numbers; they're in order already.
846835
*/
847-
MultiRecordFreeSpace(&rel->rd_node, 0, nFreePages, pageSpaces);
836+
RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages);
848837

849-
pfree(pageSpaces);
838+
pfree(freePages);
850839

851840
MemoryContextDelete(mycontext);
852841

src/backend/commands/vacuum.c

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.250 2003/02/24 00:57:17 tgl Exp $
16+
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.251 2003/03/04 21:51:20 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -336,6 +336,13 @@ vacuum(VacuumStmt *vacstmt)
336336
*/
337337
StartTransactionCommand(true);
338338

339+
/*
340+
* If it was a database-wide VACUUM, print FSM usage statistics
341+
* (we don't make you be superuser to see these).
342+
*/
343+
if (vacstmt->relation == NULL)
344+
PrintFreeSpaceMapStatistics(elevel);
345+
339346
/*
340347
* If we completed a database-wide VACUUM without skipping any
341348
* relations, update the database's pg_database row with info
@@ -2781,31 +2788,48 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages,
27812788
BlockNumber rel_pages)
27822789
{
27832790
int nPages = fraged_pages->num_pages;
2784-
int i;
2791+
VacPage *pagedesc = fraged_pages->pagedesc;
2792+
Size threshold;
27852793
PageFreeSpaceInfo *pageSpaces;
2794+
int outPages;
2795+
int i;
2796+
2797+
/*
2798+
* We only report pages with free space at least equal to the average
2799+
* request size --- this avoids cluttering FSM with uselessly-small bits
2800+
* of space. Although FSM would discard pages with little free space
2801+
* anyway, it's important to do this prefiltering because (a) it reduces
2802+
* the time spent holding the FSM lock in RecordRelationFreeSpace, and
2803+
* (b) FSM uses the number of pages reported as a statistic for guiding
2804+
* space management. If we didn't threshold our reports the same way
2805+
* vacuumlazy.c does, we'd be skewing that statistic.
2806+
*/
2807+
threshold = GetAvgFSMRequestSize(&onerel->rd_node);
27862808

27872809
/* +1 to avoid palloc(0) */
27882810
pageSpaces = (PageFreeSpaceInfo *)
27892811
palloc((nPages + 1) * sizeof(PageFreeSpaceInfo));
2812+
outPages = 0;
27902813

27912814
for (i = 0; i < nPages; i++)
27922815
{
2793-
pageSpaces[i].blkno = fraged_pages->pagedesc[i]->blkno;
2794-
pageSpaces[i].avail = fraged_pages->pagedesc[i]->free;
2795-
27962816
/*
27972817
* fraged_pages may contain entries for pages that we later
27982818
* decided to truncate from the relation; don't enter them into
27992819
* the free space map!
28002820
*/
2801-
if (pageSpaces[i].blkno >= rel_pages)
2802-
{
2803-
nPages = i;
2821+
if (pagedesc[i]->blkno >= rel_pages)
28042822
break;
2823+
2824+
if (pagedesc[i]->free >= threshold)
2825+
{
2826+
pageSpaces[outPages].blkno = pagedesc[i]->blkno;
2827+
pageSpaces[outPages].avail = pagedesc[i]->free;
2828+
outPages++;
28052829
}
28062830
}
28072831

2808-
MultiRecordFreeSpace(&onerel->rd_node, 0, nPages, pageSpaces);
2832+
RecordRelationFreeSpace(&onerel->rd_node, outPages, pageSpaces);
28092833

28102834
pfree(pageSpaces);
28112835
}

src/backend/commands/vacuumlazy.c

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
*
3232
*
3333
* IDENTIFICATION
34-
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.26 2003/02/24 00:57:17 tgl Exp $
34+
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.27 2003/03/04 21:51:21 tgl Exp $
3535
*
3636
*-------------------------------------------------------------------------
3737
*/
@@ -51,21 +51,11 @@
5151
/*
5252
* Space/time tradeoff parameters: do these need to be user-tunable?
5353
*
54-
* A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten
55-
* immediately, and not even passed to the free space map. Removing the
56-
* uselessly small entries early saves cycles, and in particular reduces
57-
* the amount of time we spend holding the FSM lock when we finally call
58-
* MultiRecordFreeSpace. Since the FSM will ignore pages below its own
59-
* runtime threshold anyway, there's no point in making this really small.
60-
* XXX Is it worth trying to measure average tuple size, and using that to
61-
* set the threshold? Problem is we don't know average tuple size very
62-
* accurately for the first few pages...
63-
*
6454
* To consider truncating the relation, we want there to be at least
65-
* relsize / REL_TRUNCATE_FRACTION potentially-freeable pages.
55+
* REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
56+
* is less) potentially-freeable pages.
6657
*/
67-
#define PAGE_SPACE_THRESHOLD ((Size) (BLCKSZ / 32))
68-
58+
#define REL_TRUNCATE_MINIMUM 1000
6959
#define REL_TRUNCATE_FRACTION 16
7060

7161
/* MAX_TUPLES_PER_PAGE can be a conservative upper limit */
@@ -78,6 +68,7 @@ typedef struct LVRelStats
7868
BlockNumber rel_pages;
7969
double rel_tuples;
8070
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
71+
Size threshold; /* minimum interesting free space */
8172
/* List of TIDs of tuples we intend to delete */
8273
/* NB: this list is ordered by TID address */
8374
int num_dead_tuples; /* current # of entries */
@@ -149,6 +140,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
149140

150141
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
151142

143+
/* Set threshold for interesting free space = average request size */
144+
/* XXX should we scale it up or down? Adjust vacuum.c too, if so */
145+
vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
146+
152147
/* Open all indexes of the relation */
153148
vac_open_indexes(onerel, &nindexes, &Irel);
154149
hasindex = (nindexes > 0);
@@ -166,7 +161,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
166161
* number of pages. Otherwise, the time taken isn't worth it.
167162
*/
168163
possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
169-
if (possibly_freeable > vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
164+
if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
165+
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
170166
lazy_truncate_heap(onerel, vacrelstats);
171167

172168
/* Update shared free space map with final free space info */
@@ -943,8 +939,21 @@ lazy_record_free_space(LVRelStats *vacrelstats,
943939
PageFreeSpaceInfo *pageSpaces;
944940
int n;
945941

946-
/* Ignore pages with little free space */
947-
if (avail < PAGE_SPACE_THRESHOLD)
942+
/*
943+
* A page with less than stats->threshold free space will be forgotten
944+
* immediately, and never passed to the free space map. Removing the
945+
* uselessly small entries early saves cycles, and in particular reduces
946+
* the amount of time we spend holding the FSM lock when we finally call
947+
* RecordRelationFreeSpace. Since the FSM will probably drop pages with
948+
* little free space anyway, there's no point in making this really small.
949+
*
950+
* XXX Is it worth trying to measure average tuple size, and using that to
951+
* adjust the threshold? Would be worthwhile if FSM has no stats yet
952+
* for this relation. But changing the threshold as we scan the rel
953+
* might lead to bizarre behavior, too. Also, it's probably better if
954+
* vacuum.c has the same thresholding behavior as we do here.
955+
*/
956+
if (avail < vacrelstats->threshold)
948957
return;
949958

950959
/* Copy pointers to local variables for notational simplicity */
@@ -1079,13 +1088,13 @@ lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
10791088
int nPages = vacrelstats->num_free_pages;
10801089

10811090
/*
1082-
* Sort data into order, as required by MultiRecordFreeSpace.
1091+
* Sort data into order, as required by RecordRelationFreeSpace.
10831092
*/
10841093
if (nPages > 1)
10851094
qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo),
10861095
vac_cmp_page_spaces);
10871096

1088-
MultiRecordFreeSpace(&onerel->rd_node, 0, nPages, pageSpaces);
1097+
RecordRelationFreeSpace(&onerel->rd_node, nPages, pageSpaces);
10891098
}
10901099

10911100
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy