Skip to content

Commit 32ea236

Browse files
committed
Improve the IndexVacuumInfo/IndexBulkDeleteResult API to allow somewhat sane
behavior in cases where we don't know the heap tuple count accurately; in particular partial vacuum, but this also makes the API a bit more useful for ANALYZE. This patch adds "estimated_count" flags to both structs so that an approximate count can be flagged as such, and adjusts the logic so that approximate counts are not used for updating pg_class.reltuples. This fixes my previous complaint that VACUUM was putting ridiculous values into pg_class.reltuples for indexes. The actual impact of that bug is limited, because the planner only pays attention to reltuples for an index if the index is partial; which probably explains why beta testers hadn't noticed a degradation in plan quality from it. But it needs to be fixed. The whole thing is a bit messy and should be redesigned in future, because reltuples now has the potential to drift quite far away from reality when a long period elapses with no non-partial vacuums. But this is as good as it's going to get for 8.4.
1 parent 4334695 commit 32ea236

File tree

10 files changed

+81
-39
lines changed

10 files changed

+81
-39
lines changed

src/backend/access/gin/ginvacuum.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.28 2009/03/24 20:17:11 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.29 2009/06/06 22:13:50 tgl Exp $
1212
*-------------------------------------------------------------------------
1313
*/
1414

@@ -741,6 +741,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
741741
* tell how many distinct heap entries are referenced by a GIN index.
742742
*/
743743
stats->num_index_tuples = info->num_heap_tuples;
744+
stats->estimated_count = info->estimated_count;
744745

745746
/*
746747
* If vacuum full, we already have exclusive lock on the index. Otherwise,

src/backend/access/gist/gistvacuum.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.43 2009/03/24 20:17:11 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.44 2009/06/06 22:13:50 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -524,8 +524,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
524524
{
525525
stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult));
526526
/* use heap's tuple count */
527-
Assert(info->num_heap_tuples >= 0);
528527
stats->std.num_index_tuples = info->num_heap_tuples;
528+
stats->std.estimated_count = info->estimated_count;
529529

530530
/*
531531
* XXX the above is wrong if index is partial. Would it be OK to just
@@ -679,6 +679,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
679679
if (stats == NULL)
680680
stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult));
681681
/* we'll re-count the tuples each time */
682+
stats->std.estimated_count = false;
682683
stats->std.num_index_tuples = 0;
683684

684685
stack = (GistBDItem *) palloc0(sizeof(GistBDItem));

src/backend/access/hash/hash.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.110 2009/05/05 19:36:32 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.111 2009/06/06 22:13:50 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -610,6 +610,8 @@ hashbulkdelete(PG_FUNCTION_ARGS)
610610
/*
611611
* Otherwise, our count is untrustworthy since we may have
612612
* double-scanned tuples in split buckets. Proceed by dead-reckoning.
613+
* (Note: we still return estimated_count = false, because using this
614+
* count is better than not updating reltuples at all.)
613615
*/
614616
if (metap->hashm_ntuples > tuples_removed)
615617
metap->hashm_ntuples -= tuples_removed;
@@ -623,6 +625,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
623625
/* return statistics */
624626
if (stats == NULL)
625627
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
628+
stats->estimated_count = false;
626629
stats->num_index_tuples = num_index_tuples;
627630
stats->tuples_removed += tuples_removed;
628631
/* hashvacuumcleanup will fill in num_pages */

src/backend/access/nbtree/nbtree.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.169 2009/05/05 19:36:32 tgl Exp $
15+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.170 2009/06/06 22:13:51 tgl Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -579,10 +579,11 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
579579
/*
580580
* During a non-FULL vacuum it's quite possible for us to be fooled by
581581
* concurrent page splits into double-counting some index tuples, so
582-
* disbelieve any total that exceeds the underlying heap's count. (We
583-
* can't check this during btbulkdelete.)
582+
* disbelieve any total that exceeds the underlying heap's count ...
583+
* if we know that accurately. Otherwise this might just make matters
584+
* worse.
584585
*/
585-
if (!info->vacuum_full)
586+
if (!info->vacuum_full && !info->estimated_count)
586587
{
587588
if (stats->num_index_tuples > info->num_heap_tuples)
588589
stats->num_index_tuples = info->num_heap_tuples;
@@ -618,6 +619,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
618619
* Reset counts that will be incremented during the scan; needed in case
619620
* of multiple scans during a single VACUUM command
620621
*/
622+
stats->estimated_count = false;
621623
stats->num_index_tuples = 0;
622624
stats->pages_deleted = 0;
623625

src/backend/catalog/index.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.316 2009/05/31 20:55:37 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.317 2009/06/06 22:13:51 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1938,8 +1938,9 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
19381938
ivinfo.index = indexRelation;
19391939
ivinfo.vacuum_full = false;
19401940
ivinfo.analyze_only = false;
1941+
ivinfo.estimated_count = true;
19411942
ivinfo.message_level = DEBUG2;
1942-
ivinfo.num_heap_tuples = -1;
1943+
ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
19431944
ivinfo.strategy = NULL;
19441945

19451946
state.tuplesort = tuplesort_begin_datum(TIDOID,

src/backend/commands/analyze.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.137 2009/05/19 08:30:00 heikki Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.138 2009/06/06 22:13:51 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -498,8 +498,9 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
498498
ivinfo.index = Irel[ind];
499499
ivinfo.vacuum_full = false;
500500
ivinfo.analyze_only = true;
501+
ivinfo.estimated_count = true;
501502
ivinfo.message_level = elevel;
502-
ivinfo.num_heap_tuples = -1; /* not known for sure */
503+
ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
503504
ivinfo.strategy = vac_strategy;
504505

505506
stats = index_vacuum_cleanup(&ivinfo, NULL);

src/backend/commands/vacuum.c

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.387 2009/03/31 22:12:48 tgl Exp $
16+
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.388 2009/06/06 22:13:51 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -3389,6 +3389,7 @@ scan_index(Relation indrel, double num_tuples)
33893389
ivinfo.index = indrel;
33903390
ivinfo.vacuum_full = true;
33913391
ivinfo.analyze_only = false;
3392+
ivinfo.estimated_count = false;
33923393
ivinfo.message_level = elevel;
33933394
ivinfo.num_heap_tuples = num_tuples;
33943395
ivinfo.strategy = vac_strategy;
@@ -3398,10 +3399,14 @@ scan_index(Relation indrel, double num_tuples)
33983399
if (!stats)
33993400
return;
34003401

3401-
/* now update statistics in pg_class */
3402-
vac_update_relstats(indrel,
3403-
stats->num_pages, stats->num_index_tuples,
3404-
false, InvalidTransactionId);
3402+
/*
3403+
* Now update statistics in pg_class, but only if the index says the
3404+
* count is accurate.
3405+
*/
3406+
if (!stats->estimated_count)
3407+
vac_update_relstats(indrel,
3408+
stats->num_pages, stats->num_index_tuples,
3409+
false, InvalidTransactionId);
34053410

34063411
ereport(elevel,
34073412
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
@@ -3417,7 +3422,8 @@ scan_index(Relation indrel, double num_tuples)
34173422
* Check for tuple count mismatch. If the index is partial, then it's OK
34183423
* for it to have fewer tuples than the heap; else we got trouble.
34193424
*/
3420-
if (stats->num_index_tuples != num_tuples)
3425+
if (!stats->estimated_count &&
3426+
stats->num_index_tuples != num_tuples)
34213427
{
34223428
if (stats->num_index_tuples > num_tuples ||
34233429
!vac_is_partial_index(indrel))
@@ -3456,6 +3462,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
34563462
ivinfo.index = indrel;
34573463
ivinfo.vacuum_full = true;
34583464
ivinfo.analyze_only = false;
3465+
ivinfo.estimated_count = false;
34593466
ivinfo.message_level = elevel;
34603467
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
34613468
ivinfo.strategy = vac_strategy;
@@ -3469,10 +3476,14 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
34693476
if (!stats)
34703477
return;
34713478

3472-
/* now update statistics in pg_class */
3473-
vac_update_relstats(indrel,
3474-
stats->num_pages, stats->num_index_tuples,
3475-
false, InvalidTransactionId);
3479+
/*
3480+
* Now update statistics in pg_class, but only if the index says the
3481+
* count is accurate.
3482+
*/
3483+
if (!stats->estimated_count)
3484+
vac_update_relstats(indrel,
3485+
stats->num_pages, stats->num_index_tuples,
3486+
false, InvalidTransactionId);
34763487

34773488
ereport(elevel,
34783489
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
@@ -3490,7 +3501,8 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
34903501
* Check for tuple count mismatch. If the index is partial, then it's OK
34913502
* for it to have fewer tuples than the heap; else we got trouble.
34923503
*/
3493-
if (stats->num_index_tuples != num_tuples + keep_tuples)
3504+
if (!stats->estimated_count &&
3505+
stats->num_index_tuples != num_tuples + keep_tuples)
34943506
{
34953507
if (stats->num_index_tuples > num_tuples + keep_tuples ||
34963508
!vac_is_partial_index(indrel))

src/backend/commands/vacuumlazy.c

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
*
3030
*
3131
* IDENTIFICATION
32-
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.119 2009/03/24 20:17:14 tgl Exp $
32+
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.120 2009/06/06 22:13:51 tgl Exp $
3333
*
3434
*-------------------------------------------------------------------------
3535
*/
@@ -84,9 +84,11 @@ typedef struct LVRelStats
8484
{
8585
/* hasindex = true means two-pass strategy; false means one-pass */
8686
bool hasindex;
87+
bool scanned_all; /* have we scanned all pages (this far)? */
8788
/* Overall statistics about rel */
8889
BlockNumber rel_pages;
89-
double rel_tuples;
90+
double old_rel_tuples; /* previous value of pg_class.reltuples */
91+
double rel_tuples; /* counts only tuples on scanned pages */
9092
BlockNumber pages_removed;
9193
double tuples_deleted;
9294
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
@@ -96,7 +98,6 @@ typedef struct LVRelStats
9698
int max_dead_tuples; /* # slots allocated in array */
9799
ItemPointer dead_tuples; /* array of ItemPointerData */
98100
int num_index_scans;
99-
bool scanned_all; /* have we scanned all pages (this far)? */
100101
} LVRelStats;
101102

102103

@@ -174,8 +175,9 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
174175

175176
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
176177

177-
vacrelstats->num_index_scans = 0;
178178
vacrelstats->scanned_all = true; /* will be cleared if we skip a page */
179+
vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
180+
vacrelstats->num_index_scans = 0;
179181

180182
/* Open all indexes of the relation */
181183
vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
@@ -876,9 +878,9 @@ lazy_vacuum_index(Relation indrel,
876878
ivinfo.index = indrel;
877879
ivinfo.vacuum_full = false;
878880
ivinfo.analyze_only = false;
881+
ivinfo.estimated_count = true;
879882
ivinfo.message_level = elevel;
880-
/* We don't yet know rel_tuples, so pass -1 */
881-
ivinfo.num_heap_tuples = -1;
883+
ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
882884
ivinfo.strategy = vac_strategy;
883885

884886
/* Do bulk deletion */
@@ -908,19 +910,25 @@ lazy_cleanup_index(Relation indrel,
908910
ivinfo.index = indrel;
909911
ivinfo.vacuum_full = false;
910912
ivinfo.analyze_only = false;
913+
ivinfo.estimated_count = !vacrelstats->scanned_all;
911914
ivinfo.message_level = elevel;
912-
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
915+
/* use rel_tuples only if we scanned all pages, else fall back */
916+
ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
913917
ivinfo.strategy = vac_strategy;
914918

915919
stats = index_vacuum_cleanup(&ivinfo, stats);
916920

917921
if (!stats)
918922
return;
919923

920-
/* now update statistics in pg_class */
921-
vac_update_relstats(indrel,
922-
stats->num_pages, stats->num_index_tuples,
923-
false, InvalidTransactionId);
924+
/*
925+
* Now update statistics in pg_class, but only if the index says the
926+
* count is accurate.
927+
*/
928+
if (!stats->estimated_count)
929+
vac_update_relstats(indrel,
930+
stats->num_pages, stats->num_index_tuples,
931+
false, InvalidTransactionId);
924932

925933
ereport(elevel,
926934
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",

src/backend/postmaster/pgstat.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Copyright (c) 2001-2009, PostgreSQL Global Development Group
1515
*
16-
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.187 2009/01/01 17:23:46 momjian Exp $
16+
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.188 2009/06/06 22:13:51 tgl Exp $
1717
* ----------
1818
*/
1919
#include "postgres.h"
@@ -3774,6 +3774,13 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
37743774
{
37753775
if (msg->m_scanned_all)
37763776
tabentry->last_anl_tuples = msg->m_tuples;
3777+
else
3778+
{
3779+
/* last_anl_tuples must never exceed n_live_tuples+n_dead_tuples */
3780+
tabentry->last_anl_tuples = Min(tabentry->last_anl_tuples,
3781+
tabentry->n_live_tuples);
3782+
}
3783+
37773784
if (msg->m_autovacuum)
37783785
tabentry->autovac_analyze_timestamp = msg->m_vacuumtime;
37793786
else

src/include/access/genam.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.76 2009/03/24 20:17:14 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.77 2009/06/06 22:13:52 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -34,14 +34,17 @@ typedef struct IndexBuildResult
3434
/*
3535
* Struct for input arguments passed to ambulkdelete and amvacuumcleanup
3636
*
37-
* Note that num_heap_tuples will not be valid during ambulkdelete,
38-
* only amvacuumcleanup.
37+
* num_heap_tuples is accurate only when estimated_count is false;
38+
* otherwise it's just an estimate (currently, the estimate is the
39+
* prior value of the relation's pg_class.reltuples field). It will
40+
* always just be an estimate during ambulkdelete.
3941
*/
4042
typedef struct IndexVacuumInfo
4143
{
4244
Relation index; /* the index being vacuumed */
4345
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
4446
bool analyze_only; /* ANALYZE (without any actual vacuum) */
47+
bool estimated_count; /* num_heap_tuples is an estimate */
4548
int message_level; /* ereport level for progress messages */
4649
double num_heap_tuples; /* tuples remaining in heap */
4750
BufferAccessStrategy strategy; /* access strategy for reads */
@@ -60,12 +63,15 @@ typedef struct IndexVacuumInfo
6063
*
6164
* Note: pages_removed is the amount by which the index physically shrank,
6265
* if any (ie the change in its total size on disk). pages_deleted and
63-
* pages_free refer to free space within the index file.
66+
* pages_free refer to free space within the index file. Some index AMs
67+
* may compute num_index_tuples by reference to num_heap_tuples, in which
68+
* case they should copy the estimated_count field from IndexVacuumInfo.
6469
*/
6570
typedef struct IndexBulkDeleteResult
6671
{
6772
BlockNumber num_pages; /* pages remaining in index */
6873
BlockNumber pages_removed; /* # removed during vacuum operation */
74+
bool estimated_count; /* num_index_tuples is an estimate */
6975
double num_index_tuples; /* tuples remaining */
7076
double tuples_removed; /* # removed during vacuum operation */
7177
BlockNumber pages_deleted; /* # unused pages in index */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy