Skip to content

Commit 696d784

Browse files
committed
tableam: Move heap specific logic from estimate_rel_size below tableam.
This just moves the table/matview[/toast] determination of relation size to a callback, and uses a copy of the existing logic to implement that callback for heap. It probably would make sense to also move the index specific logic into a callback, so the metapage handling (and probably more) can be index specific. But that's a separate task. Author: Andres Freund Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
1 parent 737a292 commit 696d784

File tree

4 files changed

+174
-44
lines changed

4 files changed

+174
-44
lines changed

src/backend/access/heap/heapam_handler.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
*/
2020
#include "postgres.h"
2121

22+
#include <math.h>
23+
2224
#include "miscadmin.h"
2325

2426
#include "access/genam.h"
@@ -33,6 +35,7 @@
3335
#include "catalog/storage_xlog.h"
3436
#include "commands/progress.h"
3537
#include "executor/executor.h"
38+
#include "optimizer/plancat.h"
3639
#include "pgstat.h"
3740
#include "storage/bufmgr.h"
3841
#include "storage/bufpage.h"
@@ -1870,6 +1873,114 @@ reform_and_rewrite_tuple(HeapTuple tuple,
18701873
}
18711874

18721875

1876+
/* ------------------------------------------------------------------------
1877+
* Planner related callbacks for the heap AM
1878+
* ------------------------------------------------------------------------
1879+
*/
1880+
1881+
static void
1882+
heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
1883+
BlockNumber *pages, double *tuples,
1884+
double *allvisfrac)
1885+
{
1886+
BlockNumber curpages;
1887+
BlockNumber relpages;
1888+
double reltuples;
1889+
BlockNumber relallvisible;
1890+
double density;
1891+
1892+
/* it has storage, ok to call the smgr */
1893+
curpages = RelationGetNumberOfBlocks(rel);
1894+
1895+
/* coerce values in pg_class to more desirable types */
1896+
relpages = (BlockNumber) rel->rd_rel->relpages;
1897+
reltuples = (double) rel->rd_rel->reltuples;
1898+
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
1899+
1900+
/*
1901+
* HACK: if the relation has never yet been vacuumed, use a minimum size
1902+
* estimate of 10 pages. The idea here is to avoid assuming a
1903+
* newly-created table is really small, even if it currently is, because
1904+
* that may not be true once some data gets loaded into it. Once a vacuum
1905+
* or analyze cycle has been done on it, it's more reasonable to believe
1906+
* the size is somewhat stable.
1907+
*
1908+
* (Note that this is only an issue if the plan gets cached and used again
1909+
* after the table has been filled. What we're trying to avoid is using a
1910+
* nestloop-type plan on a table that has grown substantially since the
1911+
* plan was made. Normally, autovacuum/autoanalyze will occur once enough
1912+
* inserts have happened and cause cached-plan invalidation; but that
1913+
* doesn't happen instantaneously, and it won't happen at all for cases
1914+
* such as temporary tables.)
1915+
*
1916+
* We approximate "never vacuumed" by "has relpages = 0", which means this
1917+
* will also fire on genuinely empty relations. Not great, but
1918+
* fortunately that's a seldom-seen case in the real world, and it
1919+
* shouldn't degrade the quality of the plan too much anyway to err in
1920+
* this direction.
1921+
*
1922+
* If the table has inheritance children, we don't apply this heuristic.
1923+
* Totally empty parent tables are quite common, so we should be willing
1924+
* to believe that they are empty.
1925+
*/
1926+
if (curpages < 10 &&
1927+
relpages == 0 &&
1928+
!rel->rd_rel->relhassubclass)
1929+
curpages = 10;
1930+
1931+
/* report estimated # pages */
1932+
*pages = curpages;
1933+
/* quick exit if rel is clearly empty */
1934+
if (curpages == 0)
1935+
{
1936+
*tuples = 0;
1937+
*allvisfrac = 0;
1938+
return;
1939+
}
1940+
1941+
/* estimate number of tuples from previous tuple density */
1942+
if (relpages > 0)
1943+
density = reltuples / (double) relpages;
1944+
else
1945+
{
1946+
/*
1947+
* When we have no data because the relation was truncated, estimate
1948+
* tuple width from attribute datatypes. We assume here that the
1949+
* pages are completely full, which is OK for tables (since they've
1950+
* presumably not been VACUUMed yet) but is probably an overestimate
1951+
* for indexes. Fortunately get_relation_info() can clamp the
1952+
* overestimate to the parent table's size.
1953+
*
1954+
* Note: this code intentionally disregards alignment considerations,
1955+
* because (a) that would be gilding the lily considering how crude
1956+
* the estimate is, and (b) it creates platform dependencies in the
1957+
* default plans which are kind of a headache for regression testing.
1958+
*/
1959+
int32 tuple_width;
1960+
1961+
tuple_width = get_rel_data_width(rel, attr_widths);
1962+
tuple_width += MAXALIGN(SizeofHeapTupleHeader);
1963+
tuple_width += sizeof(ItemIdData);
1964+
/* note: integer division is intentional here */
1965+
density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
1966+
}
1967+
*tuples = rint(density * (double) curpages);
1968+
1969+
/*
1970+
* We use relallvisible as-is, rather than scaling it up like we do for
1971+
* the pages and tuples counts, on the theory that any pages added since
1972+
* the last VACUUM are most likely not marked all-visible. But costsize.c
1973+
* wants it converted to a fraction.
1974+
*/
1975+
if (relallvisible == 0 || curpages <= 0)
1976+
*allvisfrac = 0;
1977+
else if ((double) relallvisible >= curpages)
1978+
*allvisfrac = 1;
1979+
else
1980+
*allvisfrac = (double) relallvisible / curpages;
1981+
}
1982+
1983+
18731984
/* ------------------------------------------------------------------------
18741985
* Definition of the heap table access method.
18751986
* ------------------------------------------------------------------------
@@ -1915,6 +2026,8 @@ static const TableAmRoutine heapam_methods = {
19152026
.scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
19162027
.index_build_range_scan = heapam_index_build_range_scan,
19172028
.index_validate_scan = heapam_index_validate_scan,
2029+
2030+
.relation_estimate_size = heapam_estimate_rel_size,
19182031
};
19192032

19202033

src/backend/optimizer/util/plancat.c

Lines changed: 25 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "access/genam.h"
2121
#include "access/htup_details.h"
2222
#include "access/nbtree.h"
23+
#include "access/tableam.h"
2324
#include "access/sysattr.h"
2425
#include "access/table.h"
2526
#include "access/transam.h"
@@ -64,7 +65,6 @@ static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
6465
Relation relation, bool inhparent);
6566
static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
6667
List *idxExprs);
67-
static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
6868
static List *get_relation_constraints(PlannerInfo *root,
6969
Oid relationObjectId, RelOptInfo *rel,
7070
bool include_notnull);
@@ -948,47 +948,26 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
948948
switch (rel->rd_rel->relkind)
949949
{
950950
case RELKIND_RELATION:
951-
case RELKIND_INDEX:
952951
case RELKIND_MATVIEW:
953952
case RELKIND_TOASTVALUE:
954-
/* it has storage, ok to call the smgr */
955-
curpages = RelationGetNumberOfBlocks(rel);
953+
table_relation_estimate_size(rel, attr_widths, pages, tuples,
954+
allvisfrac);
955+
break;
956+
957+
case RELKIND_INDEX:
956958

957959
/*
958-
* HACK: if the relation has never yet been vacuumed, use a
959-
* minimum size estimate of 10 pages. The idea here is to avoid
960-
* assuming a newly-created table is really small, even if it
961-
* currently is, because that may not be true once some data gets
962-
* loaded into it. Once a vacuum or analyze cycle has been done
963-
* on it, it's more reasonable to believe the size is somewhat
964-
* stable.
965-
*
966-
* (Note that this is only an issue if the plan gets cached and
967-
* used again after the table has been filled. What we're trying
968-
* to avoid is using a nestloop-type plan on a table that has
969-
* grown substantially since the plan was made. Normally,
970-
* autovacuum/autoanalyze will occur once enough inserts have
971-
* happened and cause cached-plan invalidation; but that doesn't
972-
* happen instantaneously, and it won't happen at all for cases
973-
* such as temporary tables.)
974-
*
975-
* We approximate "never vacuumed" by "has relpages = 0", which
976-
* means this will also fire on genuinely empty relations. Not
977-
* great, but fortunately that's a seldom-seen case in the real
978-
* world, and it shouldn't degrade the quality of the plan too
979-
* much anyway to err in this direction.
980-
*
981-
* There are two exceptions wherein we don't apply this heuristic.
982-
* One is if the table has inheritance children. Totally empty
983-
* parent tables are quite common, so we should be willing to
984-
* believe that they are empty. Also, we don't apply the 10-page
985-
* minimum to indexes.
960+
* XXX: It'd probably be good to move this into a callback,
961+
* individual index types e.g. know if they have a metapage.
986962
*/
987-
if (curpages < 10 &&
988-
rel->rd_rel->relpages == 0 &&
989-
!rel->rd_rel->relhassubclass &&
990-
rel->rd_rel->relkind != RELKIND_INDEX)
991-
curpages = 10;
963+
964+
/* it has storage, ok to call the smgr */
965+
curpages = RelationGetNumberOfBlocks(rel);
966+
967+
/* coerce values in pg_class to more desirable types */
968+
relpages = (BlockNumber) rel->rd_rel->relpages;
969+
reltuples = (double) rel->rd_rel->reltuples;
970+
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
992971

993972
/* report estimated # pages */
994973
*pages = curpages;
@@ -1005,13 +984,12 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
1005984
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
1006985

1007986
/*
1008-
* If it's an index, discount the metapage while estimating the
1009-
* number of tuples. This is a kluge because it assumes more than
1010-
* it ought to about index structure. Currently it's OK for
1011-
* btree, hash, and GIN indexes but suspect for GiST indexes.
987+
* Discount the metapage while estimating the number of tuples.
988+
* This is a kluge because it assumes more than it ought to about
989+
* index structure. Currently it's OK for btree, hash, and GIN
990+
* indexes but suspect for GiST indexes.
1012991
*/
1013-
if (rel->rd_rel->relkind == RELKIND_INDEX &&
1014-
relpages > 0)
992+
if (relpages > 0)
1015993
{
1016994
curpages--;
1017995
relpages--;
@@ -1036,6 +1014,8 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
10361014
* considering how crude the estimate is, and (b) it creates
10371015
* platform dependencies in the default plans which are kind
10381016
* of a headache for regression testing.
1017+
*
1018+
* XXX: Should this logic be more index specific?
10391019
*/
10401020
int32 tuple_width;
10411021

@@ -1060,6 +1040,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
10601040
else
10611041
*allvisfrac = (double) relallvisible / curpages;
10621042
break;
1043+
10631044
case RELKIND_SEQUENCE:
10641045
/* Sequences always have a known size */
10651046
*pages = 1;
@@ -1095,7 +1076,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
10951076
* since they might be mostly NULLs, treating them as zero-width is not
10961077
* necessarily the wrong thing anyway.
10971078
*/
1098-
static int32
1079+
int32
10991080
get_rel_data_width(Relation rel, int32 *attr_widths)
11001081
{
11011082
int32 tuple_width = 0;

src/include/access/tableam.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,22 @@ typedef struct TableAmRoutine
491491
Snapshot snapshot,
492492
struct ValidateIndexState *state);
493493

494+
495+
/* ------------------------------------------------------------------------
496+
* Planner related functions.
497+
* ------------------------------------------------------------------------
498+
*/
499+
500+
/*
501+
* See table_relation_estimate_size().
502+
*
503+
* While block oriented, it shouldn't be too hard to for an AM that
504+
* doesn't internally use blocks to convert into a usable representation.
505+
*/
506+
void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
507+
BlockNumber *pages, double *tuples,
508+
double *allvisfrac);
509+
494510
} TableAmRoutine;
495511

496512

@@ -1286,6 +1302,25 @@ table_index_validate_scan(Relation heap_rel,
12861302
}
12871303

12881304

1305+
/* ----------------------------------------------------------------------------
1306+
* Planner related functionality
1307+
* ----------------------------------------------------------------------------
1308+
*/
1309+
1310+
/*
1311+
* Estimate the current size of the relation, as an AM specific workhorse for
1312+
* estimate_rel_size(). Look there for an explanation of the parameters.
1313+
*/
1314+
static inline void
1315+
table_relation_estimate_size(Relation rel, int32 *attr_widths,
1316+
BlockNumber *pages, double *tuples,
1317+
double *allvisfrac)
1318+
{
1319+
rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1320+
allvisfrac);
1321+
}
1322+
1323+
12891324
/* ----------------------------------------------------------------------------
12901325
* Functions to make modifications a bit simpler.
12911326
* ----------------------------------------------------------------------------

src/include/optimizer/plancat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extern List *infer_arbiter_indexes(PlannerInfo *root);
3333
extern void estimate_rel_size(Relation rel, int32 *attr_widths,
3434
BlockNumber *pages, double *tuples, double *allvisfrac);
3535

36+
extern int32 get_rel_data_width(Relation rel, int32 *attr_widths);
3637
extern int32 get_relation_data_width(Oid relid, int32 *attr_widths);
3738

3839
extern bool relation_excluded_by_constraints(PlannerInfo *root,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy