Skip to content

Commit 85e2ced

Browse files
committed
Improve bulk-insert performance by keeping the current target buffer pinned
(but not locked, as that would risk deadlocks). Also, make it work in a small ring of buffers to avoid having bulk inserts trash the whole buffer arena. Robert Haas, after an idea of Simon Riggs'.
1 parent cdc197c commit 85e2ced

File tree

12 files changed

+201
-82
lines changed

12 files changed

+201
-82
lines changed

src/backend/access/heap/heapam.c

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.269 2008/11/06 20:51:14 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1799,23 +1799,53 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
17991799
}
18001800

18011801

1802+
/*
1803+
* GetBulkInsertState - prepare status object for a bulk insert
1804+
*/
1805+
BulkInsertState
1806+
GetBulkInsertState(void)
1807+
{
1808+
BulkInsertState bistate;
1809+
1810+
bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1811+
bistate->strategy = GetAccessStrategy(BAS_BULKWRITE);
1812+
bistate->current_buf = InvalidBuffer;
1813+
return bistate;
1814+
}
1815+
1816+
/*
1817+
* FreeBulkInsertState - clean up after finishing a bulk insert
1818+
*/
1819+
void
1820+
FreeBulkInsertState(BulkInsertState bistate)
1821+
{
1822+
if (bistate->current_buf != InvalidBuffer)
1823+
ReleaseBuffer(bistate->current_buf);
1824+
FreeAccessStrategy(bistate->strategy);
1825+
pfree(bistate);
1826+
}
1827+
1828+
18021829
/*
18031830
* heap_insert - insert tuple into a heap
18041831
*
18051832
* The new tuple is stamped with current transaction ID and the specified
18061833
* command ID.
18071834
*
1808-
* If use_wal is false, the new tuple is not logged in WAL, even for a
1809-
* non-temp relation. Safe usage of this behavior requires that we arrange
1810-
* that all new tuples go into new pages not containing any tuples from other
1811-
* transactions, and that the relation gets fsync'd before commit.
1812-
* (See also heap_sync() comments)
1835+
* If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
1836+
* logged in WAL, even for a non-temp relation. Safe usage of this behavior
1837+
* requires that we arrange that all new tuples go into new pages not
1838+
* containing any tuples from other transactions, and that the relation gets
1839+
* fsync'd before commit. (See also heap_sync() comments)
1840+
*
1841+
* The HEAP_INSERT_SKIP_FSM option is passed directly to
1842+
* RelationGetBufferForTuple, which see for more info.
18131843
*
1814-
* use_fsm is passed directly to RelationGetBufferForTuple, which see for
1815-
* more info.
1844+
* Note that these options will be applied when inserting into the heap's
1845+
* TOAST table, too, if the tuple requires any out-of-line data.
18161846
*
1817-
* Note that use_wal and use_fsm will be applied when inserting into the
1818-
* heap's TOAST table, too, if the tuple requires any out-of-line data.
1847+
* The BulkInsertState object (if any; bistate can be NULL for default
1848+
* behavior) is also just passed through to RelationGetBufferForTuple.
18191849
*
18201850
* The return value is the OID assigned to the tuple (either here or by the
18211851
* caller), or InvalidOid if no OID. The header fields of *tup are updated
@@ -1825,7 +1855,7 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
18251855
*/
18261856
Oid
18271857
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
1828-
bool use_wal, bool use_fsm)
1858+
int options, BulkInsertState bistate)
18291859
{
18301860
TransactionId xid = GetCurrentTransactionId();
18311861
HeapTuple heaptup;
@@ -1877,14 +1907,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
18771907
heaptup = tup;
18781908
}
18791909
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
1880-
heaptup = toast_insert_or_update(relation, tup, NULL,
1881-
use_wal, use_fsm);
1910+
heaptup = toast_insert_or_update(relation, tup, NULL, options);
18821911
else
18831912
heaptup = tup;
18841913

18851914
/* Find buffer to insert this tuple into */
18861915
buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
1887-
InvalidBuffer, use_fsm);
1916+
InvalidBuffer, options, bistate);
18881917

18891918
/* NO EREPORT(ERROR) from here till changes are logged */
18901919
START_CRIT_SECTION();
@@ -1905,7 +1934,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
19051934
MarkBufferDirty(buffer);
19061935

19071936
/* XLOG stuff */
1908-
if (use_wal && !relation->rd_istemp)
1937+
if (!(options & HEAP_INSERT_SKIP_WAL) && !relation->rd_istemp)
19091938
{
19101939
xl_heap_insert xlrec;
19111940
xl_heap_header xlhdr;
@@ -2000,7 +2029,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
20002029
Oid
20012030
simple_heap_insert(Relation relation, HeapTuple tup)
20022031
{
2003-
return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
2032+
return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
20042033
}
20052034

20062035
/*
@@ -2595,8 +2624,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
25952624
if (need_toast)
25962625
{
25972626
/* Note we always use WAL and FSM during updates */
2598-
heaptup = toast_insert_or_update(relation, newtup, &oldtup,
2599-
true, true);
2627+
heaptup = toast_insert_or_update(relation, newtup, &oldtup, 0);
26002628
newtupsize = MAXALIGN(heaptup->t_len);
26012629
}
26022630
else
@@ -2623,7 +2651,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
26232651
{
26242652
/* Assume there's no chance to put heaptup on same page. */
26252653
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
2626-
buffer, true);
2654+
buffer, 0, NULL);
26272655
}
26282656
else
26292657
{
@@ -2640,7 +2668,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
26402668
*/
26412669
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
26422670
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
2643-
buffer, true);
2671+
buffer, 0, NULL);
26442672
}
26452673
else
26462674
{

src/backend/access/heap/hio.c

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
1515

1616
#include "postgres.h"
1717

18+
#include "access/heapam.h"
1819
#include "access/hio.h"
1920
#include "storage/bufmgr.h"
2021
#include "storage/freespace.h"
@@ -56,6 +57,43 @@ RelationPutHeapTuple(Relation relation,
5657
((HeapTupleHeader) item)->t_ctid = tuple->t_self;
5758
}
5859

60+
/*
61+
* Read in a buffer, using bulk-insert strategy if bistate isn't NULL.
62+
*/
63+
static Buffer
64+
ReadBufferBI(Relation relation, BlockNumber targetBlock,
65+
BulkInsertState bistate)
66+
{
67+
Buffer buffer;
68+
69+
/* If not bulk-insert, exactly like ReadBuffer */
70+
if (!bistate)
71+
return ReadBuffer(relation, targetBlock);
72+
73+
/* If we have the desired block already pinned, re-pin and return it */
74+
if (bistate->current_buf != InvalidBuffer)
75+
{
76+
if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
77+
{
78+
IncrBufferRefCount(bistate->current_buf);
79+
return bistate->current_buf;
80+
}
81+
/* ... else drop the old buffer */
82+
ReleaseBuffer(bistate->current_buf);
83+
bistate->current_buf = InvalidBuffer;
84+
}
85+
86+
/* Perform a read using the buffer strategy */
87+
buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
88+
RBM_NORMAL, bistate->strategy);
89+
90+
/* Save the selected block as target for future inserts */
91+
IncrBufferRefCount(buffer);
92+
bistate->current_buf = buffer;
93+
94+
return buffer;
95+
}
96+
5997
/*
6098
* RelationGetBufferForTuple
6199
*
@@ -80,20 +118,26 @@ RelationPutHeapTuple(Relation relation,
80118
* happen if space is freed in that page after heap_update finds there's not
81119
* enough there). In that case, the page will be pinned and locked only once.
82120
*
83-
* If use_fsm is true (the normal case), we use FSM to help us find free
84-
* space. If use_fsm is false, we always append a new empty page to the
85-
* end of the relation if the tuple won't fit on the current target page.
121+
* We normally use FSM to help us find free space. However,
122+
* if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to
123+
* the end of the relation if the tuple won't fit on the current target page.
86124
* This can save some cycles when we know the relation is new and doesn't
87125
* contain useful amounts of free space.
88126
*
89-
* The use_fsm = false case is also useful for non-WAL-logged additions to a
127+
* HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
90128
* relation, if the caller holds exclusive lock and is careful to invalidate
91129
* relation->rd_targblock before the first insertion --- that ensures that
92130
* all insertions will occur into newly added pages and not be intermixed
93131
* with tuples from other transactions. That way, a crash can't risk losing
94132
* any committed data of other transactions. (See heap_insert's comments
95133
* for additional constraints needed for safe usage of this behavior.)
96134
*
135+
* The caller can also provide a BulkInsertState object to optimize many
136+
* insertions into the same relation. This keeps a pin on the current
137+
* insertion target page (to save pin/unpin cycles) and also passes a
138+
* BULKWRITE buffer selection strategy object to the buffer manager.
139+
* Passing NULL for bistate selects the default behavior.
140+
*
97141
* We always try to avoid filling existing pages further than the fillfactor.
98142
* This is OK since this routine is not consulted when updating a tuple and
99143
* keeping it on the same page, which is the scenario fillfactor is meant
@@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation,
104148
*/
105149
Buffer
106150
RelationGetBufferForTuple(Relation relation, Size len,
107-
Buffer otherBuffer, bool use_fsm)
151+
Buffer otherBuffer, int options,
152+
struct BulkInsertStateData *bistate)
108153
{
154+
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
109155
Buffer buffer = InvalidBuffer;
110156
Page page;
111157
Size pageFreeSpace,
@@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len,
116162

117163
len = MAXALIGN(len); /* be conservative */
118164

165+
/* Bulk insert is not supported for updates, only inserts. */
166+
Assert(otherBuffer == InvalidBuffer || !bistate);
167+
119168
/*
120169
* If we're gonna fail for oversize tuple, do it right away
121170
*/
@@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len,
137186

138187
/*
139188
* We first try to put the tuple on the same page we last inserted a tuple
140-
* on, as cached in the relcache entry. If that doesn't work, we ask the
141-
* shared Free Space Map to locate a suitable page. Since the FSM's info
142-
* might be out of date, we have to be prepared to loop around and retry
143-
* multiple times. (To insure this isn't an infinite loop, we must update
144-
* the FSM with the correct amount of free space on each page that proves
145-
* not to be suitable.) If the FSM has no record of a page with enough
146-
* free space, we give up and extend the relation.
189+
* on, as cached in the BulkInsertState or relcache entry. If that
190+
* doesn't work, we ask the Free Space Map to locate a suitable page.
191+
* Since the FSM's info might be out of date, we have to be prepared to
192+
* loop around and retry multiple times. (To insure this isn't an infinite
193+
* loop, we must update the FSM with the correct amount of free space on
194+
* each page that proves not to be suitable.) If the FSM has no record of
195+
* a page with enough free space, we give up and extend the relation.
147196
*
148197
* When use_fsm is false, we either put the tuple onto the existing target
149198
* page or extend the relation.
150199
*/
151-
if (len + saveFreeSpace <= MaxHeapTupleSize)
152-
targetBlock = relation->rd_targblock;
153-
else
200+
if (len + saveFreeSpace > MaxHeapTupleSize)
154201
{
155-
/* can't fit, don't screw up FSM request tracking by trying */
202+
/* can't fit, don't bother asking FSM */
156203
targetBlock = InvalidBlockNumber;
157204
use_fsm = false;
158205
}
206+
else if (bistate && bistate->current_buf != InvalidBuffer)
207+
targetBlock = BufferGetBlockNumber(bistate->current_buf);
208+
else
209+
targetBlock = relation->rd_targblock;
159210

160211
if (targetBlock == InvalidBlockNumber && use_fsm)
161212
{
@@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
189240
if (otherBuffer == InvalidBuffer)
190241
{
191242
/* easy case */
192-
buffer = ReadBuffer(relation, targetBlock);
243+
buffer = ReadBufferBI(relation, targetBlock, bistate);
193244
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
194245
}
195246
else if (otherBlock == targetBlock)
@@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
274325
* it worth keeping an accurate file length in shared memory someplace,
275326
* rather than relying on the kernel to do it for us?
276327
*/
277-
buffer = ReadBuffer(relation, P_NEW);
328+
buffer = ReadBufferBI(relation, P_NEW, bistate);
278329

279330
/*
280331
* We can be certain that locking the otherBuffer first is OK, since it

src/backend/access/heap/rewriteheap.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
* Portions Copyright (c) 1994-5, Regents of the University of California
9797
*
9898
* IDENTIFICATION
99-
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $
99+
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $
100100
*
101101
*-------------------------------------------------------------------------
102102
*/
@@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
575575
}
576576
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
577577
heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
578-
state->rs_use_wal, false);
578+
HEAP_INSERT_SKIP_FSM |
579+
(state->rs_use_wal ?
580+
0 : HEAP_INSERT_SKIP_WAL));
579581
else
580582
heaptup = tup;
581583

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy