Skip to content

Commit f6d208d

Browse files
TABLESAMPLE, SQL Standard and extensible
Add a TABLESAMPLE clause to SELECT statements that allows user to specify random BERNOULLI sampling or block level SYSTEM sampling. Implementation allows for extensible sampling functions to be written, using a standard API. Basic version follows SQLStandard exactly. Usable concrete use cases for the sampling API follow in later commits. Petr Jelinek Reviewed by Michael Paquier and Simon Riggs
1 parent 11a83bb commit f6d208d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+2756
-40
lines changed

contrib/file_fdw/file_fdw.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
10971097
* Found a suitable tuple, so save it, replacing one old tuple
10981098
* at random
10991099
*/
1100-
int k = (int) (targrows * sampler_random_fract());
1100+
int k = (int) (targrows * sampler_random_fract(rstate.randstate));
11011101

11021102
Assert(k >= 0 && k < targrows);
11031103
heap_freetuple(rows[k]);

contrib/postgres_fdw/postgres_fdw.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2557,7 +2557,7 @@ analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
25572557
if (astate->rowstoskip <= 0)
25582558
{
25592559
/* Choose a random reservoir element to replace. */
2560-
pos = (int) (targrows * sampler_random_fract());
2560+
pos = (int) (targrows * sampler_random_fract(astate->rstate.randstate));
25612561
Assert(pos >= 0 && pos < targrows);
25622562
heap_freetuple(astate->rows[pos]);
25632563
}

doc/src/sgml/catalogs.sgml

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,11 @@
278278
<entry>planner statistics</entry>
279279
</row>
280280

281+
<row>
282+
<entry><link linkend="catalog-pg-tablesample-method"><structname>pg_tablesample_method</structname></link></entry>
283+
<entry>table sampling methods</entry>
284+
</row>
285+
281286
<row>
282287
<entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
283288
<entry>tablespaces within this database cluster</entry>
@@ -6127,6 +6132,121 @@
61276132
</sect1>
61286133

61296134

6135+
<sect1 id="catalog-pg-tablesample-method">
6136+
<title><structname>pg_tabesample_method</structname></title>
6137+
6138+
<indexterm zone="catalog-pg-tablesample-method">
6139+
<primary>pg_am</primary>
6140+
</indexterm>
6141+
6142+
<para>
6143+
The catalog <structname>pg_tablesample_method</structname> stores
6144+
information about table sampling methods which can be used in
6145+
<command>TABLESAMPLE</command> clause of a <command>SELECT</command>
6146+
statement.
6147+
</para>
6148+
6149+
<table>
6150+
<title><structname>pg_tablesample_method</> Columns</title>
6151+
6152+
<tgroup cols="4">
6153+
<thead>
6154+
<row>
6155+
<entry>Name</entry>
6156+
<entry>Type</entry>
6157+
<entry>References</entry>
6158+
<entry>Description</entry>
6159+
</row>
6160+
</thead>
6161+
<tbody>
6162+
6163+
<row>
6164+
<entry><structfield>oid</structfield></entry>
6165+
<entry><type>oid</type></entry>
6166+
<entry></entry>
6167+
<entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
6168+
</row>
6169+
6170+
<row>
6171+
<entry><structfield>tsmname</structfield></entry>
6172+
<entry><type>name</type></entry>
6173+
<entry></entry>
6174+
<entry>Name of the sampling method</entry>
6175+
</row>
6176+
6177+
<row>
6178+
<entry><structfield>tsmseqscan</structfield></entry>
6179+
<entry><type>bool</type></entry>
6180+
<entry></entry>
6181+
<entry>If true, the sampling method scans the whole table sequentially.
6182+
</entry>
6183+
</row>
6184+
6185+
<row>
6186+
<entry><structfield>tsmpagemode</structfield></entry>
6187+
<entry><type>bool</type></entry>
6188+
<entry></entry>
6189+
<entry>If true, the sampling method always reads the pages completely.
6190+
</entry>
6191+
</row>
6192+
6193+
<row>
6194+
<entry><structfield>tsminit</structfield></entry>
6195+
<entry><type>regproc</type></entry>
6196+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
6197+
<entry><quote>Initialize the sampling scan</quote> function</entry>
6198+
</row>
6199+
6200+
<row>
6201+
<entry><structfield>tsmnextblock</structfield></entry>
6202+
<entry><type>regproc</type></entry>
6203+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
6204+
<entry><quote>Get next block number</quote> function</entry>
6205+
</row>
6206+
6207+
<row>
6208+
<entry><structfield>tsmnexttuple</structfield></entry>
6209+
<entry><type>regproc</type></entry>
6210+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
6211+
<entry><quote>Get next tuple offset</quote> function</entry>
6212+
</row>
6213+
6214+
<row>
6215+
<entry><structfield>tsmexaminetuple</structfield></entry>
6216+
<entry><type>regproc</type></entry>
6217+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
6218+
<entry>Function which examines the tuple contents and decides if to
6219+
return it, or zero if none</entry>
6220+
</row>
6221+
6222+
<row>
6223+
<entry><structfield>tsmend</structfield></entry>
6224+
<entry><type>regproc</type></entry>
6225+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
6226+
<entry><quote>End the sampling scan</quote> function</entry>
6227+
</row>
6228+
6229+
<row>
6230+
<entry><structfield>tsmreset</structfield></entry>
6231+
<entry><type>regproc</type></entry>
6232+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
6233+
<entry><quote>Restart the state of sampling scan</quote> function</entry>
6234+
</row>
6235+
6236+
<row>
6237+
<entry><structfield>tsmcost</structfield></entry>
6238+
<entry><type>regproc</type></entry>
6239+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
6240+
<entry>Costing function</entry>
6241+
</row>
6242+
6243+
</tbody>
6244+
</tgroup>
6245+
</table>
6246+
6247+
</sect1>
6248+
6249+
61306250
<sect1 id="catalog-pg-tablespace">
61316251
<title><structname>pg_tablespace</structname></title>
61326252

doc/src/sgml/ref/select.sgml

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
4949

5050
<phrase>where <replaceable class="parameter">from_item</replaceable> can be one of:</phrase>
5151

52-
[ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
52+
[ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ] [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
5353
[ LATERAL ] ( <replaceable class="parameter">select</replaceable> ) [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ]
5454
<replaceable class="parameter">with_query_name</replaceable> [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
5555
[ LATERAL ] <replaceable class="parameter">function_name</replaceable> ( [ <replaceable class="parameter">argument</replaceable> [, ...] ] )
@@ -316,6 +316,50 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
316316
</listitem>
317317
</varlistentry>
318318

319+
<varlistentry>
320+
<term>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</term>
321+
<listitem>
322+
<para>
323+
Table sample clause after
324+
<replaceable class="parameter">table_name</replaceable> indicates that
325+
a <replaceable class="parameter">sampling_method</replaceable> should
326+
be used to retrieve subset of rows in the table.
327+
The <replaceable class="parameter">sampling_method</replaceable> can be
328+
any sampling method installed in the database. There are currently two
329+
sampling methods available in the standard
330+
<productname>PostgreSQL</productname> distribution:
331+
<itemizedlist>
332+
<listitem>
333+
<para><literal>SYSTEM</literal></para>
334+
</listitem>
335+
<listitem>
336+
<para><literal>BERNOULLI</literal></para>
337+
</listitem>
338+
</itemizedlist>
339+
Both of these sampling methods currently accept only single argument
340+
which is the percent (floating point from 0 to 100) of the rows to
341+
be returned.
342+
The <literal>SYSTEM</literal> sampling method does block level
343+
sampling with each block having the same chance of being selected and
344+
returns all rows from each selected block.
345+
The <literal>BERNOULLI</literal> scans whole table and returns
346+
individual rows with equal probability. Additional sampling methods
347+
may be installed in the database via extensions.
348+
</para>
349+
<para>
350+
The optional parameter <literal>REPEATABLE</literal> uses the seed
351+
parameter, which can be a number or expression producing a number, as
352+
a random seed for sampling. Note that subsequent commands may return
353+
different results even if same <literal>REPEATABLE</literal> clause was
354+
specified. This happens because <acronym>DML</acronym> statements and
355+
maintenance operations such as <command>VACUUM</> may affect physical
356+
distribution of data. The <function>setseed()</> function will not
357+
affect the sampling result when the <literal>REPEATABLE</literal>
358+
parameter is used.
359+
</para>
360+
</listitem>
361+
</varlistentry>
362+
319363
<varlistentry>
320364
<term><replaceable class="parameter">alias</replaceable></term>
321365
<listitem>
@@ -1927,5 +1971,20 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
19271971
<literal>ROWS FROM( ... )</> is an extension of the SQL standard.
19281972
</para>
19291973
</refsect2>
1974+
1975+
<refsect2>
1976+
<title><literal>TABLESAMPLE</literal> clause</title>
1977+
1978+
<para>
1979+
The <literal>TABLESAMPLE</> clause is currently accepted only on physical
1980+
relations and materialized views.
1981+
</para>
1982+
1983+
<para>
1984+
Additional modules allow you to install custom sampling methods and use
1985+
them instead of the SQL standard methods.
1986+
</para>
1987+
</refsect2>
1988+
19301989
</refsect1>
19311990
</refentry>

src/backend/access/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ subdir = src/backend/access
88
top_builddir = ../../..
99
include $(top_builddir)/src/Makefile.global
1010

11-
SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist transam
11+
SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist \
12+
tablesample transam
1213

1314
include $(top_srcdir)/src/backend/common.mk

src/backend/access/heap/heapam.c

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,9 @@ bool synchronize_seqscans = true;
8080
static HeapScanDesc heap_beginscan_internal(Relation relation,
8181
Snapshot snapshot,
8282
int nkeys, ScanKey key,
83-
bool allow_strat, bool allow_sync,
84-
bool is_bitmapscan, bool temp_snap);
83+
bool allow_strat, bool allow_sync, bool allow_pagemode,
84+
bool is_bitmapscan, bool is_samplescan,
85+
bool temp_snap);
8586
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
8687
TransactionId xid, CommandId cid, int options);
8788
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
@@ -294,9 +295,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
294295

295296
/*
296297
* Currently, we don't have a stats counter for bitmap heap scans (but the
297-
* underlying bitmap index scans will be counted).
298+
* underlying bitmap index scans will be counted) or sample scans (we only
299+
* update stats for tuple fetches there)
298300
*/
299-
if (!scan->rs_bitmapscan)
301+
if (!scan->rs_bitmapscan && !scan->rs_samplescan)
300302
pgstat_count_heap_scan(scan->rs_rd);
301303
}
302304

@@ -315,7 +317,7 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
315317
* In page-at-a-time mode it performs additional work, namely determining
316318
* which tuples on the page are visible.
317319
*/
318-
static void
320+
void
319321
heapgetpage(HeapScanDesc scan, BlockNumber page)
320322
{
321323
Buffer buffer;
@@ -1310,14 +1312,17 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
13101312
* HeapScanDesc for a bitmap heap scan. Although that scan technology is
13111313
* really quite unlike a standard seqscan, there is just enough commonality
13121314
* to make it worth using the same data structure.
1315+
*
1316+
* heap_beginscan_samplingscan is alternate entry point for setting up a
1317+
* HeapScanDesc for a TABLESAMPLE scan.
13131318
* ----------------
13141319
*/
13151320
HeapScanDesc
13161321
heap_beginscan(Relation relation, Snapshot snapshot,
13171322
int nkeys, ScanKey key)
13181323
{
13191324
return heap_beginscan_internal(relation, snapshot, nkeys, key,
1320-
true, true, false, false);
1325+
true, true, true, false, false, false);
13211326
}
13221327

13231328
HeapScanDesc
@@ -1327,7 +1332,7 @@ heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
13271332
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
13281333

13291334
return heap_beginscan_internal(relation, snapshot, nkeys, key,
1330-
true, true, false, true);
1335+
true, true, true, false, false, true);
13311336
}
13321337

13331338
HeapScanDesc
@@ -1336,22 +1341,33 @@ heap_beginscan_strat(Relation relation, Snapshot snapshot,
13361341
bool allow_strat, bool allow_sync)
13371342
{
13381343
return heap_beginscan_internal(relation, snapshot, nkeys, key,
1339-
allow_strat, allow_sync, false, false);
1344+
allow_strat, allow_sync, true,
1345+
false, false, false);
13401346
}
13411347

13421348
HeapScanDesc
13431349
heap_beginscan_bm(Relation relation, Snapshot snapshot,
13441350
int nkeys, ScanKey key)
13451351
{
13461352
return heap_beginscan_internal(relation, snapshot, nkeys, key,
1347-
false, false, true, false);
1353+
false, false, true, true, false, false);
1354+
}
1355+
1356+
HeapScanDesc
1357+
heap_beginscan_sampling(Relation relation, Snapshot snapshot,
1358+
int nkeys, ScanKey key,
1359+
bool allow_strat, bool allow_pagemode)
1360+
{
1361+
return heap_beginscan_internal(relation, snapshot, nkeys, key,
1362+
allow_strat, false, allow_pagemode,
1363+
false, true, false);
13481364
}
13491365

13501366
static HeapScanDesc
13511367
heap_beginscan_internal(Relation relation, Snapshot snapshot,
13521368
int nkeys, ScanKey key,
1353-
bool allow_strat, bool allow_sync,
1354-
bool is_bitmapscan, bool temp_snap)
1369+
bool allow_strat, bool allow_sync, bool allow_pagemode,
1370+
bool is_bitmapscan, bool is_samplescan, bool temp_snap)
13551371
{
13561372
HeapScanDesc scan;
13571373

@@ -1373,6 +1389,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
13731389
scan->rs_snapshot = snapshot;
13741390
scan->rs_nkeys = nkeys;
13751391
scan->rs_bitmapscan = is_bitmapscan;
1392+
scan->rs_samplescan = is_samplescan;
13761393
scan->rs_strategy = NULL; /* set in initscan */
13771394
scan->rs_allow_strat = allow_strat;
13781395
scan->rs_allow_sync = allow_sync;
@@ -1381,7 +1398,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
13811398
/*
13821399
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
13831400
*/
1384-
scan->rs_pageatatime = IsMVCCSnapshot(snapshot);
1401+
scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
13851402

13861403
/*
13871404
* For a seqscan in a serializable transaction, acquire a predicate lock
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#-------------------------------------------------------------------------
2+
#
3+
# Makefile--
4+
# Makefile for utils/tablesample
5+
#
6+
# IDENTIFICATION
7+
# src/backend/utils/tablesample/Makefile
8+
#
9+
#-------------------------------------------------------------------------
10+
11+
subdir = src/backend/access/tablesample
12+
top_builddir = ../../../..
13+
include $(top_builddir)/src/Makefile.global
14+
15+
OBJS = tablesample.o system.o bernoulli.o
16+
17+
include $(top_srcdir)/src/backend/common.mk

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy