Skip to content

Commit e6858e6

Browse files
committed
Measure the number of all-visible pages for use in index-only scan costing.
Add a column pg_class.relallvisible to remember the number of pages that were all-visible according to the visibility map as of the last VACUUM (or ANALYZE, or some other operations that update pg_class.relpages). Use relallvisible/relpages, instead of an arbitrary constant, to estimate how many heap page fetches can be avoided during an index-only scan. This is pretty primitive and will no doubt see refinements once we've acquired more field experience with the index-only scan mechanism, but it's way better than using a constant. Note: I had to adjust an underspecified query in the window.sql regression test, because it was changing answers when the plan changed to use an index-only scan. Some of the adjacent tests perhaps should be adjusted as well, but I didn't do that here.
1 parent dea95c7 commit e6858e6

File tree

22 files changed

+246
-72
lines changed

22 files changed

+246
-72
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,6 +1654,19 @@
16541654
</entry>
16551655
</row>
16561656

1657+
<row>
1658+
<entry><structfield>relallvisible</structfield></entry>
1659+
<entry><type>int4</type></entry>
1660+
<entry></entry>
1661+
<entry>
1662+
Number of pages that are marked all-visible in the table's
1663+
visibility map. This is only an estimate used by the
1664+
planner. It is updated by <command>VACUUM</command>,
1665+
<command>ANALYZE</command>, and a few DDL commands such as
1666+
<command>CREATE INDEX</command>.
1667+
</entry>
1668+
</row>
1669+
16571670
<row>
16581671
<entry><structfield>reltoastrelid</structfield></entry>
16591672
<entry><type>oid</type></entry>

src/backend/access/hash/hash.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ hashbuild(PG_FUNCTION_ARGS)
5555
IndexBuildResult *result;
5656
BlockNumber relpages;
5757
double reltuples;
58+
double allvisfrac;
5859
uint32 num_buckets;
5960
HashBuildState buildstate;
6061

@@ -67,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS)
6768
RelationGetRelationName(index));
6869

6970
/* Estimate the number of rows currently present in the table */
70-
estimate_rel_size(heap, NULL, &relpages, &reltuples);
71+
estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
7172

7273
/* Initialize the hash index metadata page and initial buckets */
7374
num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);

src/backend/access/heap/visibilitymap.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
* visibilitymap_pin_ok - check whether correct map page is already pinned
1717
* visibilitymap_set - set a bit in a previously pinned page
1818
* visibilitymap_test - test if a bit is set
19+
* visibilitymap_count - count number of bits set in visibility map
20+
* visibilitymap_truncate - truncate the visibility map
1921
*
2022
* NOTES
2123
*
@@ -110,6 +112,26 @@
110112
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
111113
#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
112114

115+
/* table for fast counting of set bits */
116+
static const uint8 number_of_ones[256] = {
117+
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
118+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
119+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
120+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
121+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
122+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
123+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
124+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
125+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
126+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
127+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
128+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
129+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
130+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
131+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
132+
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
133+
};
134+
113135
/* prototypes for internal routines */
114136
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
115137
static void vm_extend(Relation rel, BlockNumber nvmblocks);
@@ -307,6 +329,52 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
307329
return result;
308330
}
309331

332+
/*
333+
* visibilitymap_count - count number of bits set in visibility map
334+
*
335+
* Note: we ignore the possibility of race conditions when the table is being
336+
* extended concurrently with the call. New pages added to the table aren't
337+
* going to be marked all-visible, so they won't affect the result.
338+
*/
339+
BlockNumber
340+
visibilitymap_count(Relation rel)
341+
{
342+
BlockNumber result = 0;
343+
BlockNumber mapBlock;
344+
345+
for (mapBlock = 0; ; mapBlock++)
346+
{
347+
Buffer mapBuffer;
348+
unsigned char *map;
349+
int i;
350+
351+
/*
352+
* Read till we fall off the end of the map. We assume that any
353+
* extra bytes in the last page are zeroed, so we don't bother
354+
* excluding them from the count.
355+
*/
356+
mapBuffer = vm_readbuf(rel, mapBlock, false);
357+
if (!BufferIsValid(mapBuffer))
358+
break;
359+
360+
/*
361+
* We choose not to lock the page, since the result is going to be
362+
* immediately stale anyway if anyone is concurrently setting or
363+
* clearing bits, and we only really need an approximate value.
364+
*/
365+
map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
366+
367+
for (i = 0; i < MAPSIZE; i++)
368+
{
369+
result += number_of_ones[map[i]];
370+
}
371+
372+
ReleaseBuffer(mapBuffer);
373+
}
374+
375+
return result;
376+
}
377+
310378
/*
311379
* visibilitymap_truncate - truncate the visibility map
312380
*

src/backend/catalog/heap.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,7 @@ InsertPgClassTuple(Relation pg_class_desc,
772772
values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace);
773773
values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
774774
values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
775+
values[Anum_pg_class_relallvisible - 1] = Int32GetDatum(rd_rel->relallvisible);
775776
values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid);
776777
values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid);
777778
values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex);
@@ -845,16 +846,19 @@ AddNewRelationTuple(Relation pg_class_desc,
845846
/* The relation is real, but as yet empty */
846847
new_rel_reltup->relpages = 0;
847848
new_rel_reltup->reltuples = 0;
849+
new_rel_reltup->relallvisible = 0;
848850
break;
849851
case RELKIND_SEQUENCE:
850852
/* Sequences always have a known size */
851853
new_rel_reltup->relpages = 1;
852854
new_rel_reltup->reltuples = 1;
855+
new_rel_reltup->relallvisible = 0;
853856
break;
854857
default:
855858
/* Views, etc, have no disk storage */
856859
new_rel_reltup->relpages = 0;
857860
new_rel_reltup->reltuples = 0;
861+
new_rel_reltup->relallvisible = 0;
858862
break;
859863
}
860864

src/backend/catalog/index.c

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "access/relscan.h"
2727
#include "access/sysattr.h"
2828
#include "access/transam.h"
29+
#include "access/visibilitymap.h"
2930
#include "access/xact.h"
3031
#include "bootstrap/bootstrap.h"
3132
#include "catalog/catalog.h"
@@ -1059,7 +1060,7 @@ index_create(Relation heapRelation,
10591060
true,
10601061
isprimary,
10611062
InvalidOid,
1062-
heapRelation->rd_rel->reltuples);
1063+
-1.0);
10631064
/* Make the above update visible */
10641065
CommandCounterIncrement();
10651066
}
@@ -1225,7 +1226,7 @@ index_constraint_create(Relation heapRelation,
12251226
true,
12261227
true,
12271228
InvalidOid,
1228-
heapRelation->rd_rel->reltuples);
1229+
-1.0);
12291230

12301231
/*
12311232
* If needed, mark the index as primary and/or deferred in pg_index.
@@ -1533,9 +1534,10 @@ FormIndexDatum(IndexInfo *indexInfo,
15331534
* isprimary: if true, set relhaspkey true; else no change
15341535
* reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
15351536
* else no change
1536-
* reltuples: set reltuples to this value
1537+
* reltuples: if >= 0, set reltuples to this value; else no change
15371538
*
1538-
* relpages is also updated (using RelationGetNumberOfBlocks()).
1539+
* If reltuples >= 0, relpages and relallvisible are also updated (using
1540+
* RelationGetNumberOfBlocks() and visibilitymap_count()).
15391541
*
15401542
* NOTE: an important side-effect of this operation is that an SI invalidation
15411543
* message is sent out to all backends --- including me --- causing relcache
@@ -1550,7 +1552,6 @@ index_update_stats(Relation rel,
15501552
bool hasindex, bool isprimary,
15511553
Oid reltoastidxid, double reltuples)
15521554
{
1553-
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
15541555
Oid relid = RelationGetRelid(rel);
15551556
Relation pg_class;
15561557
HeapTuple tuple;
@@ -1586,9 +1587,11 @@ index_update_stats(Relation rel,
15861587
* It is safe to use a non-transactional update even though our
15871588
* transaction could still fail before committing. Setting relhasindex
15881589
* true is safe even if there are no indexes (VACUUM will eventually fix
1589-
* it), likewise for relhaspkey. And of course the relpages and reltuples
1590-
* counts are correct (or at least more so than the old values)
1591-
* regardless.
1590+
* it), likewise for relhaspkey. And of course the new relpages and
1591+
* reltuples counts are correct regardless. However, we don't want to
1592+
* change relpages (or relallvisible) if the caller isn't providing an
1593+
* updated reltuples count, because that would bollix the
1594+
* reltuples/relpages ratio which is what's really important.
15921595
*/
15931596

15941597
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
@@ -1650,15 +1653,32 @@ index_update_stats(Relation rel,
16501653
dirty = true;
16511654
}
16521655
}
1653-
if (rd_rel->reltuples != (float4) reltuples)
1654-
{
1655-
rd_rel->reltuples = (float4) reltuples;
1656-
dirty = true;
1657-
}
1658-
if (rd_rel->relpages != (int32) relpages)
1656+
1657+
if (reltuples >= 0)
16591658
{
1660-
rd_rel->relpages = (int32) relpages;
1661-
dirty = true;
1659+
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1660+
BlockNumber relallvisible;
1661+
1662+
if (rd_rel->relkind != RELKIND_INDEX)
1663+
relallvisible = visibilitymap_count(rel);
1664+
else /* don't bother for indexes */
1665+
relallvisible = 0;
1666+
1667+
if (rd_rel->relpages != (int32) relpages)
1668+
{
1669+
rd_rel->relpages = (int32) relpages;
1670+
dirty = true;
1671+
}
1672+
if (rd_rel->reltuples != (float4) reltuples)
1673+
{
1674+
rd_rel->reltuples = (float4) reltuples;
1675+
dirty = true;
1676+
}
1677+
if (rd_rel->relallvisible != (int32) relallvisible)
1678+
{
1679+
rd_rel->relallvisible = (int32) relallvisible;
1680+
dirty = true;
1681+
}
16621682
}
16631683

16641684
/*

src/backend/commands/analyze.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "access/transam.h"
2020
#include "access/tupconvert.h"
2121
#include "access/tuptoaster.h"
22+
#include "access/visibilitymap.h"
2223
#include "access/xact.h"
2324
#include "catalog/index.h"
2425
#include "catalog/indexing.h"
@@ -534,7 +535,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
534535
if (!inh)
535536
vac_update_relstats(onerel,
536537
RelationGetNumberOfBlocks(onerel),
537-
totalrows, hasindex, InvalidTransactionId);
538+
totalrows,
539+
visibilitymap_count(onerel),
540+
hasindex,
541+
InvalidTransactionId);
538542

539543
/*
540544
* Same for indexes. Vacuum always scans all indexes, so if we're part of
@@ -551,7 +555,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
551555
totalindexrows = ceil(thisdata->tupleFract * totalrows);
552556
vac_update_relstats(Irel[ind],
553557
RelationGetNumberOfBlocks(Irel[ind]),
554-
totalindexrows, false, InvalidTransactionId);
558+
totalindexrows,
559+
0,
560+
false,
561+
InvalidTransactionId);
555562
}
556563
}
557564

src/backend/commands/cluster.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
12051205
{
12061206
int4 swap_pages;
12071207
float4 swap_tuples;
1208+
int4 swap_allvisible;
12081209

12091210
swap_pages = relform1->relpages;
12101211
relform1->relpages = relform2->relpages;
@@ -1213,6 +1214,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
12131214
swap_tuples = relform1->reltuples;
12141215
relform1->reltuples = relform2->reltuples;
12151216
relform2->reltuples = swap_tuples;
1217+
1218+
swap_allvisible = relform1->relallvisible;
1219+
relform1->relallvisible = relform2->relallvisible;
1220+
relform2->relallvisible = swap_allvisible;
12161221
}
12171222

12181223
/*

src/backend/commands/vacuum.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
569569
void
570570
vac_update_relstats(Relation relation,
571571
BlockNumber num_pages, double num_tuples,
572+
BlockNumber num_all_visible_pages,
572573
bool hasindex, TransactionId frozenxid)
573574
{
574575
Oid relid = RelationGetRelid(relation);
@@ -599,6 +600,11 @@ vac_update_relstats(Relation relation,
599600
pgcform->reltuples = (float4) num_tuples;
600601
dirty = true;
601602
}
603+
if (pgcform->relallvisible != (int32) num_all_visible_pages)
604+
{
605+
pgcform->relallvisible = (int32) num_all_visible_pages;
606+
dirty = true;
607+
}
602608
if (pgcform->relhasindex != hasindex)
603609
{
604610
pgcform->relhasindex = hasindex;

src/backend/commands/vacuumlazy.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
158158
TransactionId freezeTableLimit;
159159
BlockNumber new_rel_pages;
160160
double new_rel_tuples;
161+
BlockNumber new_rel_allvisible;
161162
TransactionId new_frozen_xid;
162163

163164
/* measure elapsed time iff autovacuum logging requires it */
@@ -222,6 +223,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
222223
* density") with nonzero relpages and reltuples=0 (which means "zero
223224
* tuple density") unless there's some actual evidence for the latter.
224225
*
226+
* We do update relallvisible even in the corner case, since if the
227+
* table is all-visible we'd definitely like to know that. But clamp
228+
* the value to be not more than what we're setting relpages to.
229+
*
225230
* Also, don't change relfrozenxid if we skipped any pages, since then
226231
* we don't know for certain that all tuples have a newer xmin.
227232
*/
@@ -233,12 +238,18 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
233238
new_rel_tuples = vacrelstats->old_rel_tuples;
234239
}
235240

241+
new_rel_allvisible = visibilitymap_count(onerel);
242+
if (new_rel_allvisible > new_rel_pages)
243+
new_rel_allvisible = new_rel_pages;
244+
236245
new_frozen_xid = FreezeLimit;
237246
if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
238247
new_frozen_xid = InvalidTransactionId;
239248

240249
vac_update_relstats(onerel,
241-
new_rel_pages, new_rel_tuples,
250+
new_rel_pages,
251+
new_rel_tuples,
252+
new_rel_allvisible,
242253
vacrelstats->hasindex,
243254
new_frozen_xid);
244255

@@ -1063,8 +1074,11 @@ lazy_cleanup_index(Relation indrel,
10631074
*/
10641075
if (!stats->estimated_count)
10651076
vac_update_relstats(indrel,
1066-
stats->num_pages, stats->num_index_tuples,
1067-
false, InvalidTransactionId);
1077+
stats->num_pages,
1078+
stats->num_index_tuples,
1079+
0,
1080+
false,
1081+
InvalidTransactionId);
10681082

10691083
ereport(elevel,
10701084
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,6 +1743,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
17431743
WRITE_NODE_FIELD(indexlist);
17441744
WRITE_UINT_FIELD(pages);
17451745
WRITE_FLOAT_FIELD(tuples, "%.0f");
1746+
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
17461747
WRITE_NODE_FIELD(subplan);
17471748
WRITE_NODE_FIELD(subroot);
17481749
WRITE_NODE_FIELD(baserestrictinfo);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy