Skip to content

Commit 14f84cd

Browse files
committed
Store -1 in attdisbursion to signal 'no duplicates in column'.
Centralize att_disbursion readout logic.
1 parent 5af4b04 commit 14f84cd

File tree

3 files changed

+43
-102
lines changed

3 files changed

+43
-102
lines changed

src/backend/commands/vacuum.c

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.117 1999/08/08 17:13:10 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.118 1999/08/09 03:16:47 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -2346,36 +2346,46 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
23462346
}
23472347
else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
23482348
{
2349-
/* looks like we have a unique-key attribute */
2350-
double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
2351-
2352-
selratio = 1.0 / total;
2353-
}
2354-
else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
2355-
{
2356-
/* exact result when there are just 1 or 2 values... */
2357-
double min_cnt_d = stats->min_cnt,
2358-
max_cnt_d = stats->max_cnt,
2359-
null_cnt_d = stats->null_cnt;
2360-
double total = ((double) stats->nonnull_cnt) + null_cnt_d;
2361-
2362-
selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
2349+
/* looks like we have a unique-key attribute ---
2350+
* flag this with special -1.0 flag value.
2351+
*
2352+
* The correct disbursion is 1.0/numberOfRows, but since
2353+
* the relation row count can get updated without
2354+
* recomputing disbursion, we want to store a "symbolic"
2355+
* value and figure 1.0/numberOfRows on the fly.
2356+
*/
2357+
selratio = -1;
23632358
}
23642359
else
23652360
{
2366-
double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
2367-
double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
2361+
if (VacAttrStatsLtGtValid(stats) &&
2362+
stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
2363+
{
2364+
/* exact result when there are just 1 or 2 values... */
2365+
double min_cnt_d = stats->min_cnt,
2366+
max_cnt_d = stats->max_cnt,
2367+
null_cnt_d = stats->null_cnt;
2368+
double total = ((double) stats->nonnull_cnt) + null_cnt_d;
23682369

2369-
/*
2370-
* we assume count of other values are 20% of best
2371-
* count in table
2372-
*/
2373-
selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
2370+
selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
2371+
}
2372+
else
2373+
{
2374+
double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
2375+
double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
2376+
2377+
/*
2378+
* we assume count of other values are 20% of best
2379+
* count in table
2380+
*/
2381+
selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
2382+
}
2383+
/* Make sure calculated values are in-range */
2384+
if (selratio < 0.0)
2385+
selratio = 0.0;
2386+
else if (selratio > 1.0)
2387+
selratio = 1.0;
23742388
}
2375-
if (selratio < 0.0)
2376-
selratio = 0.0;
2377-
else if (selratio > 1.0)
2378-
selratio = 1.0;
23792389
attp->attdisbursion = selratio;
23802390

23812391
/*

src/backend/optimizer/path/joinpath.c

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.43 1999/08/06 04:00:15 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.44 1999/08/09 03:16:43 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,7 +23,7 @@
2323
#include "optimizer/pathnode.h"
2424
#include "optimizer/paths.h"
2525
#include "parser/parsetree.h"
26-
#include "utils/syscache.h"
26+
#include "utils/lsyscache.h"
2727

2828
static Path *best_innerjoin(List *join_paths, List *outer_relid);
2929
static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
@@ -586,7 +586,6 @@ hash_inner_and_outer(Query *root,
586586

587587
/*
588588
* Estimate disbursion of the specified Var
589-
* Generate some kind of estimate, no matter what...
590589
*
591590
* We use a default of 0.1 if we can't figure out anything better.
592591
* This will typically discourage use of a hash rather strongly,
@@ -598,24 +597,11 @@ static Cost
598597
estimate_disbursion(Query *root, Var *var)
599598
{
600599
Oid relid;
601-
HeapTuple atp;
602-
double disbursion;
603600

604601
if (! IsA(var, Var))
605602
return 0.1;
606603

607604
relid = getrelid(var->varno, root->rtable);
608605

609-
atp = SearchSysCacheTuple(ATTNUM,
610-
ObjectIdGetDatum(relid),
611-
Int16GetDatum(var->varattno),
612-
0, 0);
613-
if (! HeapTupleIsValid(atp))
614-
return 0.1;
615-
616-
disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
617-
if (disbursion > 0.0)
618-
return disbursion;
619-
620-
return 0.1;
606+
return (Cost) get_attdisbursion(relid, var->varattno, 0.1);
621607
}

src/backend/utils/adt/selfuncs.c

Lines changed: 4 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.37 1999/08/02 02:05:41 tgl Exp $
13+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.38 1999/08/09 03:16:45 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -52,7 +52,6 @@ static bool getattstatistics(Oid relid, AttrNumber attnum,
5252
Datum *commonval,
5353
Datum *loval,
5454
Datum *hival);
55-
static double getattdisbursion(Oid relid, AttrNumber attnum);
5655

5756

5857
/*
@@ -172,7 +171,7 @@ eqsel(Oid opid,
172171
/* No VACUUM ANALYZE stats available, so make a guess using
173172
* the disbursion stat (if we have that, which is unlikely...)
174173
*/
175-
selec = getattdisbursion(relid, attno);
174+
selec = get_attdisbursion(relid, attno, 0.01);
176175
}
177176

178177
*result = (float64data) selec;
@@ -374,8 +373,8 @@ eqjoinsel(Oid opid,
374373
*result = 0.1;
375374
else
376375
{
377-
num1 = getattdisbursion(relid1, attno1);
378-
num2 = getattdisbursion(relid2, attno2);
376+
num1 = get_attdisbursion(relid1, attno1, 0.01);
377+
num2 = get_attdisbursion(relid2, attno2, 0.01);
379378
max = (num1 > num2) ? num1 : num2;
380379
if (max <= 0)
381380
*result = 1.0;
@@ -675,60 +674,6 @@ getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod,
675674
return true;
676675
}
677676

678-
/*
679-
* getattdisbursion
680-
* Retrieve the disbursion statistic for an attribute,
681-
* or produce an estimate if no info is available.
682-
*/
683-
static double
684-
getattdisbursion(Oid relid, AttrNumber attnum)
685-
{
686-
HeapTuple atp;
687-
double disbursion;
688-
int32 ntuples;
689-
690-
atp = SearchSysCacheTuple(ATTNUM,
691-
ObjectIdGetDatum(relid),
692-
Int16GetDatum(attnum),
693-
0, 0);
694-
if (!HeapTupleIsValid(atp))
695-
{
696-
/* this should not happen */
697-
elog(ERROR, "getattdisbursion: no attribute tuple %u %d",
698-
relid, attnum);
699-
return 0.1;
700-
}
701-
702-
disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
703-
if (disbursion > 0.0)
704-
return disbursion;
705-
706-
/* VACUUM ANALYZE has not stored a disbursion statistic for us.
707-
* Produce an estimate = 1/numtuples. This may produce
708-
* unreasonably small estimates for large tables, so limit
709-
* the estimate to no less than 0.01.
710-
*/
711-
atp = SearchSysCacheTuple(RELOID,
712-
ObjectIdGetDatum(relid),
713-
0, 0, 0);
714-
if (!HeapTupleIsValid(atp))
715-
{
716-
/* this should not happen */
717-
elog(ERROR, "getattdisbursion: no relation tuple %u", relid);
718-
return 0.1;
719-
}
720-
721-
ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
722-
723-
if (ntuples > 0)
724-
disbursion = 1.0 / (double) ntuples;
725-
726-
if (disbursion < 0.01)
727-
disbursion = 0.01;
728-
729-
return disbursion;
730-
}
731-
732677
float64
733678
btreesel(Oid operatorObjectId,
734679
Oid indrelid,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy