Skip to content

Commit 147e372

Browse files
committed
tableam: Avoid relying on relation size to determine validity of tids.
Instead add a tableam callback to do so. To avoid adding per validation overhead, pass a scan to tuple_tid_valid. In heap's case we'd otherwise incurred a RelationGetNumberOfBlocks() call for each tid - which'd have added noticable overhead to nodeTidscan.c. Author: Andres Freund Reviewed-By: Ashwin Agrawal Discussion: https://postgr.es/m/20190515185447.gno2jtqxyktylyvs@alap3.anarazel.de
1 parent 7f44ede commit 147e372

File tree

7 files changed

+129
-57
lines changed

7 files changed

+129
-57
lines changed

src/backend/access/heap/heapam.c

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,37 +1654,29 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
16541654
/*
16551655
* heap_get_latest_tid - get the latest tid of a specified tuple
16561656
*
1657-
* Actually, this gets the latest version that is visible according to
1658-
* the passed snapshot. You can pass SnapshotDirty to get the very latest,
1657+
* Actually, this gets the latest version that is visible according to the
1658+
* scan's snapshot. Create a scan using SnapshotDirty to get the very latest,
16591659
* possibly uncommitted version.
16601660
*
16611661
* *tid is both an input and an output parameter: it is updated to
16621662
* show the latest version of the row. Note that it will not be changed
16631663
* if no version of the row passes the snapshot test.
16641664
*/
16651665
void
1666-
heap_get_latest_tid(Relation relation,
1667-
Snapshot snapshot,
1666+
heap_get_latest_tid(TableScanDesc sscan,
16681667
ItemPointer tid)
16691668
{
1670-
BlockNumber blk;
1669+
Relation relation = sscan->rs_rd;
1670+
Snapshot snapshot = sscan->rs_snapshot;
16711671
ItemPointerData ctid;
16721672
TransactionId priorXmax;
16731673

1674-
/* this is to avoid Assert failures on bad input */
1675-
if (!ItemPointerIsValid(tid))
1676-
return;
1677-
16781674
/*
1679-
* Since this can be called with user-supplied TID, don't trust the input
1680-
* too much. (RelationGetNumberOfBlocks is an expensive check, so we
1681-
* don't check t_ctid links again this way. Note that it would not do to
1682-
* call it just once and save the result, either.)
1675+
* table_get_latest_tid verified that the passed in tid is valid. Assume
1676+
* that t_ctid links are valid however - there shouldn't be invalid ones
1677+
* in the table.
16831678
*/
1684-
blk = ItemPointerGetBlockNumber(tid);
1685-
if (blk >= RelationGetNumberOfBlocks(relation))
1686-
elog(ERROR, "block number %u is out of range for relation \"%s\"",
1687-
blk, RelationGetRelationName(relation));
1679+
Assert(ItemPointerIsValid(tid));
16881680

16891681
/*
16901682
* Loop to chase down t_ctid links. At top of loop, ctid is the tuple we

src/backend/access/heap/heapam_handler.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,15 @@ heapam_fetch_row_version(Relation relation,
204204
return false;
205205
}
206206

207+
static bool
208+
heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
209+
{
210+
HeapScanDesc hscan = (HeapScanDesc) scan;
211+
212+
return ItemPointerIsValid(tid) &&
213+
ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
214+
}
215+
207216
static bool
208217
heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
209218
Snapshot snapshot)
@@ -2568,6 +2577,7 @@ static const TableAmRoutine heapam_methods = {
25682577

25692578
.tuple_fetch_row_version = heapam_fetch_row_version,
25702579
.tuple_get_latest_tid = heap_get_latest_tid,
2580+
.tuple_tid_valid = heapam_tuple_tid_valid,
25712581
.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
25722582
.compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
25732583

src/backend/access/table/tableam.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,33 @@ table_index_fetch_tuple_check(Relation rel,
213213
}
214214

215215

216+
/* ------------------------------------------------------------------------
217+
* Functions for non-modifying operations on individual tuples
218+
* ------------------------------------------------------------------------
219+
*/
220+
221+
void
222+
table_get_latest_tid(TableScanDesc scan, ItemPointer tid)
223+
{
224+
Relation rel = scan->rs_rd;
225+
const TableAmRoutine *tableam = rel->rd_tableam;
226+
227+
/*
228+
* Since this can be called with user-supplied TID, don't trust the input
229+
* too much.
230+
*/
231+
if (!tableam->tuple_tid_valid(scan, tid))
232+
ereport(ERROR,
233+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
234+
errmsg("tid (%u, %u) is not valid for relation for relation \"%s\"",
235+
ItemPointerGetBlockNumberNoCheck(tid),
236+
ItemPointerGetOffsetNumberNoCheck(tid),
237+
RelationGetRelationName(rel))));
238+
239+
return tableam->tuple_get_latest_tid(scan, tid);
240+
}
241+
242+
216243
/* ----------------------------------------------------------------------------
217244
* Functions to make modifications a bit simpler.
218245
* ----------------------------------------------------------------------------

src/backend/executor/nodeTidscan.c

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -129,19 +129,23 @@ static void
129129
TidListEval(TidScanState *tidstate)
130130
{
131131
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
132-
BlockNumber nblocks;
132+
TableScanDesc scan;
133133
ItemPointerData *tidList;
134134
int numAllocTids;
135135
int numTids;
136136
ListCell *l;
137137

138138
/*
139-
* We silently discard any TIDs that are out of range at the time of scan
140-
* start. (Since we hold at least AccessShareLock on the table, it won't
141-
* be possible for someone to truncate away the blocks we intend to
142-
* visit.)
139+
* Start scan on-demand - initializing a scan isn't free (e.g. heap stats
140+
* the size of the table), so it makes sense to delay that until needed -
141+
* the node might never get executed.
143142
*/
144-
nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
143+
if (tidstate->ss.ss_currentScanDesc == NULL)
144+
tidstate->ss.ss_currentScanDesc =
145+
table_beginscan(tidstate->ss.ss_currentRelation,
146+
tidstate->ss.ps.state->es_snapshot,
147+
0, NULL);
148+
scan = tidstate->ss.ss_currentScanDesc;
145149

146150
/*
147151
* We initialize the array with enough slots for the case that all quals
@@ -165,19 +169,27 @@ TidListEval(TidScanState *tidstate)
165169
DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
166170
econtext,
167171
&isNull));
168-
if (!isNull &&
169-
ItemPointerIsValid(itemptr) &&
170-
ItemPointerGetBlockNumber(itemptr) < nblocks)
172+
if (isNull)
173+
continue;
174+
175+
/*
176+
* We silently discard any TIDs that the AM considers invalid
177+
* (E.g. for heap, they could be out of range at the time of scan
178+
* start. Since we hold at least AccessShareLock on the table, it
179+
* won't be possible for someone to truncate away the blocks we
180+
* intend to visit.).
181+
*/
182+
if (!table_tuple_tid_valid(scan, itemptr))
183+
continue;
184+
185+
if (numTids >= numAllocTids)
171186
{
172-
if (numTids >= numAllocTids)
173-
{
174-
numAllocTids *= 2;
175-
tidList = (ItemPointerData *)
176-
repalloc(tidList,
177-
numAllocTids * sizeof(ItemPointerData));
178-
}
179-
tidList[numTids++] = *itemptr;
187+
numAllocTids *= 2;
188+
tidList = (ItemPointerData *)
189+
repalloc(tidList,
190+
numAllocTids * sizeof(ItemPointerData));
180191
}
192+
tidList[numTids++] = *itemptr;
181193
}
182194
else if (tidexpr->exprstate && tidexpr->isarray)
183195
{
@@ -206,13 +218,15 @@ TidListEval(TidScanState *tidstate)
206218
}
207219
for (i = 0; i < ndatums; i++)
208220
{
209-
if (!ipnulls[i])
210-
{
211-
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
212-
if (ItemPointerIsValid(itemptr) &&
213-
ItemPointerGetBlockNumber(itemptr) < nblocks)
214-
tidList[numTids++] = *itemptr;
215-
}
221+
if (ipnulls[i])
222+
continue;
223+
224+
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
225+
226+
if (!table_tuple_tid_valid(scan, itemptr))
227+
continue;
228+
229+
tidList[numTids++] = *itemptr;
216230
}
217231
pfree(ipdatums);
218232
pfree(ipnulls);
@@ -306,6 +320,7 @@ TidNext(TidScanState *node)
306320
EState *estate;
307321
ScanDirection direction;
308322
Snapshot snapshot;
323+
TableScanDesc scan;
309324
Relation heapRelation;
310325
TupleTableSlot *slot;
311326
ItemPointerData *tidList;
@@ -327,6 +342,7 @@ TidNext(TidScanState *node)
327342
if (node->tss_TidList == NULL)
328343
TidListEval(node);
329344

345+
scan = node->ss.ss_currentScanDesc;
330346
tidList = node->tss_TidList;
331347
numTids = node->tss_NumTids;
332348

@@ -365,7 +381,7 @@ TidNext(TidScanState *node)
365381
* current according to our snapshot.
366382
*/
367383
if (node->tss_isCurrentOf)
368-
table_get_latest_tid(heapRelation, snapshot, &tid);
384+
table_get_latest_tid(scan, &tid);
369385

370386
if (table_fetch_row_version(heapRelation, &tid, snapshot, slot))
371387
return slot;
@@ -442,6 +458,10 @@ ExecReScanTidScan(TidScanState *node)
442458
node->tss_NumTids = 0;
443459
node->tss_TidPtr = -1;
444460

461+
/* not really necessary, but seems good form */
462+
if (node->ss.ss_currentScanDesc)
463+
table_rescan(node->ss.ss_currentScanDesc, NULL);
464+
445465
ExecScanReScan(&node->ss);
446466
}
447467

@@ -455,6 +475,9 @@ ExecReScanTidScan(TidScanState *node)
455475
void
456476
ExecEndTidScan(TidScanState *node)
457477
{
478+
if (node->ss.ss_currentScanDesc)
479+
table_endscan(node->ss.ss_currentScanDesc);
480+
458481
/*
459482
* Free the exprcontext
460483
*/

src/backend/utils/adt/tid.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
358358
Relation rel;
359359
AclResult aclresult;
360360
Snapshot snapshot;
361+
TableScanDesc scan;
361362

362363
result = (ItemPointer) palloc(sizeof(ItemPointerData));
363364
if (!reloid)
@@ -380,7 +381,9 @@ currtid_byreloid(PG_FUNCTION_ARGS)
380381
ItemPointerCopy(tid, result);
381382

382383
snapshot = RegisterSnapshot(GetLatestSnapshot());
383-
table_get_latest_tid(rel, snapshot, result);
384+
scan = table_beginscan(rel, snapshot, 0, NULL);
385+
table_get_latest_tid(scan, result);
386+
table_endscan(scan);
384387
UnregisterSnapshot(snapshot);
385388

386389
table_close(rel, AccessShareLock);
@@ -398,6 +401,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
398401
Relation rel;
399402
AclResult aclresult;
400403
Snapshot snapshot;
404+
TableScanDesc scan;
401405

402406
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
403407
rel = table_openrv(relrv, AccessShareLock);
@@ -415,7 +419,9 @@ currtid_byrelname(PG_FUNCTION_ARGS)
415419
ItemPointerCopy(tid, result);
416420

417421
snapshot = RegisterSnapshot(GetLatestSnapshot());
418-
table_get_latest_tid(rel, snapshot, result);
422+
scan = table_beginscan(rel, snapshot, 0, NULL);
423+
table_get_latest_tid(scan, result);
424+
table_endscan(scan);
419425
UnregisterSnapshot(snapshot);
420426

421427
table_close(rel, AccessShareLock);

src/include/access/heapam.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,7 @@ extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
134134
Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
135135
bool *all_dead, bool first_call);
136136

137-
extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
138-
ItemPointer tid);
137+
extern void heap_get_latest_tid(TableScanDesc scan, ItemPointer tid);
139138
extern void setLastTid(const ItemPointer tid);
140139

141140
extern BulkInsertState GetBulkInsertState(void);

src/include/access/tableam.h

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -308,12 +308,17 @@ typedef struct TableAmRoutine
308308
Snapshot snapshot,
309309
TupleTableSlot *slot);
310310

311+
/*
312+
* Is tid valid for a scan of this relation.
313+
*/
314+
bool (*tuple_tid_valid) (TableScanDesc scan,
315+
ItemPointer tid);
316+
311317
/*
312318
* Return the latest version of the tuple at `tid`, by updating `tid` to
313319
* point at the newest version.
314320
*/
315-
void (*tuple_get_latest_tid) (Relation rel,
316-
Snapshot snapshot,
321+
void (*tuple_get_latest_tid) (TableScanDesc scan,
317322
ItemPointer tid);
318323

319324
/*
@@ -548,10 +553,10 @@ typedef struct TableAmRoutine
548553
/*
549554
* See table_relation_size().
550555
*
551-
* Note that currently a few callers use the MAIN_FORKNUM size to vet the
552-
* validity of tids (e.g. nodeTidscans.c), and others use it to figure out
553-
* the range of potentially interesting blocks (brin, analyze). The
554-
* abstraction around this will need to be improved in the near future.
556+
* Note that currently a few callers use the MAIN_FORKNUM size to figure
557+
* out the range of potentially interesting blocks (brin, analyze). It's
558+
* probable that we'll need to revise the interface for those at some
559+
* point.
555560
*/
556561
uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
557562

@@ -986,15 +991,25 @@ table_fetch_row_version(Relation rel,
986991
}
987992

988993
/*
989-
* Return the latest version of the tuple at `tid`, by updating `tid` to
990-
* point at the newest version.
994+
* Verify that `tid` is a potentially valid tuple identifier. That doesn't
995+
* mean that the pointed to row needs to exist or be visible, but that
996+
* attempting to fetch the row (e.g. with table_get_latest_tid() or
997+
* table_fetch_row_version()) should not error out if called with that tid.
998+
*
999+
* `scan` needs to have been started via table_beginscan().
9911000
*/
992-
static inline void
993-
table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
1001+
static inline bool
1002+
table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
9941003
{
995-
rel->rd_tableam->tuple_get_latest_tid(rel, snapshot, tid);
1004+
return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
9961005
}
9971006

1007+
/*
1008+
* Return the latest version of the tuple at `tid`, by updating `tid` to
1009+
* point at the newest version.
1010+
*/
1011+
extern void table_get_latest_tid(TableScanDesc scan, ItemPointer tid);
1012+
9981013
/*
9991014
* Return true iff tuple in slot satisfies the snapshot.
10001015
*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy