Skip to content

Commit 6f6d863

Browse files
committed
Create a type-specific typanalyze routine for tsvector, which collects stats
on the most common individual lexemes in place of the mostly-useless default behavior of counting duplicate tsvectors. Future work: create selectivity estimation functions that actually do something with these stats. (Some other things we ought to look at doing: using the Lossy Counting algorithm in compute_minimal_stats, and using the element-counting idea for stats on regular arrays.) Jan Urbanski
1 parent 6816577 commit 6f6d863

File tree

11 files changed

+467
-41
lines changed

11 files changed

+467
-41
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.167 2008/07/11 07:02:43 petere Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.168 2008/07/14 00:51:45 tgl Exp $ -->
22
<!--
33
Documentation of the system catalogs, directed toward PostgreSQL developers
44
-->
@@ -6516,6 +6516,8 @@
65166516
<entry>
65176517
A list of the most common values in the column. (NULL if
65186518
no values seem to be more common than any others.)
6519+
For some datatypes such as <type>tsvector</>, this is a list of
6520+
the most common element values rather than values of the type itself.
65196521
</entry>
65206522
</row>
65216523

@@ -6524,10 +6526,10 @@
65246526
<entry><type>real[]</type></entry>
65256527
<entry></entry>
65266528
<entry>
6527-
A list of the frequencies of the most common values,
6529+
A list of the frequencies of the most common values or elements,
65286530
i.e., number of occurrences of each divided by total number of rows.
65296531
(NULL when <structfield>most_common_vals</structfield> is.)
6530-
</entry>
6532+
</entry>
65316533
</row>
65326534

65336535
<row>

src/backend/catalog/system_views.sql

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* Copyright (c) 1996-2008, PostgreSQL Global Development Group
55
*
6-
* $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.52 2008/05/15 00:17:39 tgl Exp $
6+
* $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.53 2008/07/14 00:51:45 tgl Exp $
77
*/
88

99
CREATE VIEW pg_roles AS
@@ -110,30 +110,30 @@ CREATE VIEW pg_stats AS
110110
stanullfrac AS null_frac,
111111
stawidth AS avg_width,
112112
stadistinct AS n_distinct,
113-
CASE 1
114-
WHEN stakind1 THEN stavalues1
115-
WHEN stakind2 THEN stavalues2
116-
WHEN stakind3 THEN stavalues3
117-
WHEN stakind4 THEN stavalues4
118-
END AS most_common_vals,
119-
CASE 1
120-
WHEN stakind1 THEN stanumbers1
121-
WHEN stakind2 THEN stanumbers2
122-
WHEN stakind3 THEN stanumbers3
123-
WHEN stakind4 THEN stanumbers4
124-
END AS most_common_freqs,
125-
CASE 2
126-
WHEN stakind1 THEN stavalues1
127-
WHEN stakind2 THEN stavalues2
128-
WHEN stakind3 THEN stavalues3
129-
WHEN stakind4 THEN stavalues4
130-
END AS histogram_bounds,
131-
CASE 3
132-
WHEN stakind1 THEN stanumbers1[1]
133-
WHEN stakind2 THEN stanumbers2[1]
134-
WHEN stakind3 THEN stanumbers3[1]
135-
WHEN stakind4 THEN stanumbers4[1]
136-
END AS correlation
113+
CASE
114+
WHEN stakind1 IN (1, 4) THEN stavalues1
115+
WHEN stakind2 IN (1, 4) THEN stavalues2
116+
WHEN stakind3 IN (1, 4) THEN stavalues3
117+
WHEN stakind4 IN (1, 4) THEN stavalues4
118+
END AS most_common_vals,
119+
CASE
120+
WHEN stakind1 IN (1, 4) THEN stanumbers1
121+
WHEN stakind2 IN (1, 4) THEN stanumbers2
122+
WHEN stakind3 IN (1, 4) THEN stanumbers3
123+
WHEN stakind4 IN (1, 4) THEN stanumbers4
124+
END AS most_common_freqs,
125+
CASE
126+
WHEN stakind1 = 2 THEN stavalues1
127+
WHEN stakind2 = 2 THEN stavalues2
128+
WHEN stakind3 = 2 THEN stavalues3
129+
WHEN stakind4 = 2 THEN stavalues4
130+
END AS histogram_bounds,
131+
CASE
132+
WHEN stakind1 = 3 THEN stanumbers1[1]
133+
WHEN stakind2 = 3 THEN stanumbers2[1]
134+
WHEN stakind3 = 3 THEN stanumbers3[1]
135+
WHEN stakind4 = 3 THEN stanumbers4[1]
136+
END AS correlation
137137
FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
138138
JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum)
139139
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)

src/backend/tsearch/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# Copyright (c) 2006-2008, PostgreSQL Global Development Group
66
#
7-
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.6 2008/02/19 10:30:08 petere Exp $
7+
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.7 2008/07/14 00:51:45 tgl Exp $
88
#
99
#-------------------------------------------------------------------------
1010
subdir = src/backend/tsearch
@@ -19,7 +19,7 @@ DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \
1919
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
2020
dict_simple.o dict_synonym.o dict_thesaurus.o \
2121
dict_ispell.o regis.o spell.o \
22-
to_tsany.o ts_utils.o
22+
to_tsany.o ts_typanalyze.o ts_utils.o
2323

2424
include $(top_srcdir)/src/backend/common.mk
2525

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy