Skip to content

Commit d25f519

Browse files
committed
tableam: relation creation, VACUUM FULL/CLUSTER, SET TABLESPACE.
This moves the responsibility for: - creating the storage necessary for a relation, including creating a new relfilenode for a relation with existing storage - non-transactional truncation of a relation - VACUUM FULL / CLUSTER's rewrite of a table below tableam. This is fairly straight forward, with a bit of complexity smattered in to move the computation of xid / multixid horizons below the AM, as they don't make sense for every table AM. Author: Andres Freund Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
1 parent 7e69323 commit d25f519

File tree

13 files changed

+856
-579
lines changed

13 files changed

+856
-579
lines changed

src/backend/access/heap/heapam_handler.c

Lines changed: 451 additions & 0 deletions
Large diffs are not rendered by default.

src/backend/bootstrap/bootparse.y

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ Boot_CreateStmt:
209209

210210
if ($4)
211211
{
212+
TransactionId relfrozenxid;
213+
MultiXactId relminmxid;
214+
212215
if (boot_reldesc)
213216
{
214217
elog(DEBUG4, "create bootstrap: warning, open relation exists, closing first");
@@ -226,7 +229,9 @@ Boot_CreateStmt:
226229
RELPERSISTENCE_PERMANENT,
227230
shared_relation,
228231
mapped_relation,
229-
true);
232+
true,
233+
&relfrozenxid,
234+
&relminmxid);
230235
elog(DEBUG4, "bootstrap relation created");
231236
}
232237
else

src/backend/catalog/heap.c

Lines changed: 49 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "access/relation.h"
3636
#include "access/sysattr.h"
3737
#include "access/table.h"
38+
#include "access/tableam.h"
3839
#include "access/transam.h"
3940
#include "access/xact.h"
4041
#include "access/xlog.h"
@@ -98,6 +99,8 @@ static void AddNewRelationTuple(Relation pg_class_desc,
9899
Oid reloftype,
99100
Oid relowner,
100101
char relkind,
102+
TransactionId relfrozenxid,
103+
TransactionId relminmxid,
101104
Datum relacl,
102105
Datum reloptions);
103106
static ObjectAddress AddNewRelationType(const char *typeName,
@@ -300,7 +303,9 @@ heap_create(const char *relname,
300303
char relpersistence,
301304
bool shared_relation,
302305
bool mapped_relation,
303-
bool allow_system_table_mods)
306+
bool allow_system_table_mods,
307+
TransactionId *relfrozenxid,
308+
MultiXactId *relminmxid)
304309
{
305310
bool create_storage;
306311
Relation rel;
@@ -327,6 +332,9 @@ heap_create(const char *relname,
327332
get_namespace_name(relnamespace), relname),
328333
errdetail("System catalog modifications are currently disallowed.")));
329334

335+
*relfrozenxid = InvalidTransactionId;
336+
*relminmxid = InvalidMultiXactId;
337+
330338
/* Handle reltablespace for specific relkinds. */
331339
switch (relkind)
332340
{
@@ -400,13 +408,36 @@ heap_create(const char *relname,
400408
/*
401409
* Have the storage manager create the relation's disk file, if needed.
402410
*
403-
* We only create the main fork here, other forks will be created on
404-
* demand.
411+
* For relations the callback creates both the main and the init fork, for
412+
* indexes only the main fork is created. The other forks will be created
413+
* on demand.
405414
*/
406415
if (create_storage)
407416
{
408417
RelationOpenSmgr(rel);
409-
RelationCreateStorage(rel->rd_node, relpersistence);
418+
419+
switch (rel->rd_rel->relkind)
420+
{
421+
case RELKIND_VIEW:
422+
case RELKIND_COMPOSITE_TYPE:
423+
case RELKIND_FOREIGN_TABLE:
424+
case RELKIND_PARTITIONED_TABLE:
425+
case RELKIND_PARTITIONED_INDEX:
426+
Assert(false);
427+
break;
428+
429+
case RELKIND_INDEX:
430+
case RELKIND_SEQUENCE:
431+
RelationCreateStorage(rel->rd_node, relpersistence);
432+
break;
433+
434+
case RELKIND_RELATION:
435+
case RELKIND_TOASTVALUE:
436+
case RELKIND_MATVIEW:
437+
table_relation_set_new_filenode(rel, relpersistence,
438+
relfrozenxid, relminmxid);
439+
break;
440+
}
410441
}
411442

412443
return rel;
@@ -892,6 +923,8 @@ AddNewRelationTuple(Relation pg_class_desc,
892923
Oid reloftype,
893924
Oid relowner,
894925
char relkind,
926+
TransactionId relfrozenxid,
927+
TransactionId relminmxid,
895928
Datum relacl,
896929
Datum reloptions)
897930
{
@@ -928,40 +961,8 @@ AddNewRelationTuple(Relation pg_class_desc,
928961
break;
929962
}
930963

931-
/* Initialize relfrozenxid and relminmxid */
932-
if (relkind == RELKIND_RELATION ||
933-
relkind == RELKIND_MATVIEW ||
934-
relkind == RELKIND_TOASTVALUE)
935-
{
936-
/*
937-
* Initialize to the minimum XID that could put tuples in the table.
938-
* We know that no xacts older than RecentXmin are still running, so
939-
* that will do.
940-
*/
941-
new_rel_reltup->relfrozenxid = RecentXmin;
942-
943-
/*
944-
* Similarly, initialize the minimum Multixact to the first value that
945-
* could possibly be stored in tuples in the table. Running
946-
* transactions could reuse values from their local cache, so we are
947-
* careful to consider all currently running multis.
948-
*
949-
* XXX this could be refined further, but is it worth the hassle?
950-
*/
951-
new_rel_reltup->relminmxid = GetOldestMultiXactId();
952-
}
953-
else
954-
{
955-
/*
956-
* Other relation types will not contain XIDs, so set relfrozenxid to
957-
* InvalidTransactionId. (Note: a sequence does contain a tuple, but
958-
* we force its xmin to be FrozenTransactionId always; see
959-
* commands/sequence.c.)
960-
*/
961-
new_rel_reltup->relfrozenxid = InvalidTransactionId;
962-
new_rel_reltup->relminmxid = InvalidMultiXactId;
963-
}
964-
964+
new_rel_reltup->relfrozenxid = relfrozenxid;
965+
new_rel_reltup->relminmxid = relminmxid;
965966
new_rel_reltup->relowner = relowner;
966967
new_rel_reltup->reltype = new_type_oid;
967968
new_rel_reltup->reloftype = reloftype;
@@ -1089,6 +1090,8 @@ heap_create_with_catalog(const char *relname,
10891090
Oid new_type_oid;
10901091
ObjectAddress new_type_addr;
10911092
Oid new_array_oid = InvalidOid;
1093+
TransactionId relfrozenxid;
1094+
MultiXactId relminmxid;
10921095

10931096
pg_class_desc = table_open(RelationRelationId, RowExclusiveLock);
10941097

@@ -1220,7 +1223,9 @@ heap_create_with_catalog(const char *relname,
12201223
relpersistence,
12211224
shared_relation,
12221225
mapped_relation,
1223-
allow_system_table_mods);
1226+
allow_system_table_mods,
1227+
&relfrozenxid,
1228+
&relminmxid);
12241229

12251230
Assert(relid == RelationGetRelid(new_rel_desc));
12261231

@@ -1319,6 +1324,8 @@ heap_create_with_catalog(const char *relname,
13191324
reloftypeid,
13201325
ownerid,
13211326
relkind,
1327+
relfrozenxid,
1328+
relminmxid,
13221329
PointerGetDatum(relacl),
13231330
reloptions);
13241331

@@ -1407,14 +1414,6 @@ heap_create_with_catalog(const char *relname,
14071414
if (oncommit != ONCOMMIT_NOOP)
14081415
register_on_commit_action(relid, oncommit);
14091416

1410-
/*
1411-
* Unlogged objects need an init fork, except for partitioned tables which
1412-
* have no storage at all.
1413-
*/
1414-
if (relpersistence == RELPERSISTENCE_UNLOGGED &&
1415-
relkind != RELKIND_PARTITIONED_TABLE)
1416-
heap_create_init_fork(new_rel_desc);
1417-
14181417
/*
14191418
* ok, the relation has been cataloged, so close our relations and return
14201419
* the OID of the newly created relation.
@@ -1425,27 +1424,6 @@ heap_create_with_catalog(const char *relname,
14251424
return relid;
14261425
}
14271426

1428-
/*
1429-
* Set up an init fork for an unlogged table so that it can be correctly
1430-
* reinitialized on restart. An immediate sync is required even if the
1431-
* page has been logged, because the write did not go through
1432-
* shared_buffers and therefore a concurrent checkpoint may have moved
1433-
* the redo pointer past our xlog record. Recovery may as well remove it
1434-
* while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
1435-
* record. Therefore, logging is necessary even if wal_level=minimal.
1436-
*/
1437-
void
1438-
heap_create_init_fork(Relation rel)
1439-
{
1440-
Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
1441-
rel->rd_rel->relkind == RELKIND_MATVIEW ||
1442-
rel->rd_rel->relkind == RELKIND_TOASTVALUE);
1443-
RelationOpenSmgr(rel);
1444-
smgrcreate(rel->rd_smgr, INIT_FORKNUM, false);
1445-
log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
1446-
smgrimmedsync(rel->rd_smgr, INIT_FORKNUM);
1447-
}
1448-
14491427
/*
14501428
* RelationRemoveInheritance
14511429
*
@@ -3168,8 +3146,8 @@ heap_truncate_one_rel(Relation rel)
31683146
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
31693147
return;
31703148

3171-
/* Truncate the actual file (and discard buffers) */
3172-
RelationTruncate(rel, 0);
3149+
/* Truncate the underlying relation */
3150+
table_relation_nontransactional_truncate(rel);
31733151

31743152
/* If the relation has indexes, truncate the indexes too */
31753153
RelationTruncateIndexes(rel);
@@ -3180,7 +3158,7 @@ heap_truncate_one_rel(Relation rel)
31803158
{
31813159
Relation toastrel = table_open(toastrelid, AccessExclusiveLock);
31823160

3183-
RelationTruncate(toastrel, 0);
3161+
table_relation_nontransactional_truncate(toastrel);
31843162
RelationTruncateIndexes(toastrel);
31853163
/* keep the lock... */
31863164
table_close(toastrel, NoLock);

src/backend/catalog/index.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,8 @@ index_create(Relation heapRelation,
739739
bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
740740
bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
741741
char relkind;
742+
TransactionId relfrozenxid;
743+
MultiXactId relminmxid;
742744

743745
/* constraint flags can only be set when a constraint is requested */
744746
Assert((constr_flags == 0) ||
@@ -899,8 +901,12 @@ index_create(Relation heapRelation,
899901
relpersistence,
900902
shared_relation,
901903
mapped_relation,
902-
allow_system_table_mods);
904+
allow_system_table_mods,
905+
&relfrozenxid,
906+
&relminmxid);
903907

908+
Assert(relfrozenxid == InvalidTransactionId);
909+
Assert(relminmxid == InvalidMultiXactId);
904910
Assert(indexRelationId == RelationGetRelid(indexRelation));
905911

906912
/*
@@ -2850,8 +2856,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
28502856
}
28512857

28522858
/* We'll build a new physical relation for the index */
2853-
RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
2854-
InvalidMultiXactId);
2859+
RelationSetNewRelfilenode(iRel, persistence);
28552860

28562861
/* Initialize the index and rebuild */
28572862
/* Note: we do not need to re-establish pkey setting */

src/backend/catalog/storage.c

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#include "postgres.h"
2121

22+
#include "miscadmin.h"
23+
2224
#include "access/visibilitymap.h"
2325
#include "access/xact.h"
2426
#include "access/xlog.h"
@@ -290,6 +292,92 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
290292
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
291293
}
292294

295+
/*
296+
* Copy a fork's data, block by block.
297+
*/
298+
void
299+
RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
300+
ForkNumber forkNum, char relpersistence)
301+
{
302+
PGAlignedBlock buf;
303+
Page page;
304+
bool use_wal;
305+
bool copying_initfork;
306+
BlockNumber nblocks;
307+
BlockNumber blkno;
308+
309+
page = (Page) buf.data;
310+
311+
/*
312+
* The init fork for an unlogged relation in many respects has to be
313+
* treated the same as normal relation, changes need to be WAL logged and
314+
* it needs to be synced to disk.
315+
*/
316+
copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
317+
forkNum == INIT_FORKNUM;
318+
319+
/*
320+
* We need to log the copied data in WAL iff WAL archiving/streaming is
321+
* enabled AND it's a permanent relation.
322+
*/
323+
use_wal = XLogIsNeeded() &&
324+
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
325+
326+
nblocks = smgrnblocks(src, forkNum);
327+
328+
for (blkno = 0; blkno < nblocks; blkno++)
329+
{
330+
/* If we got a cancel signal during the copy of the data, quit */
331+
CHECK_FOR_INTERRUPTS();
332+
333+
smgrread(src, forkNum, blkno, buf.data);
334+
335+
if (!PageIsVerified(page, blkno))
336+
ereport(ERROR,
337+
(errcode(ERRCODE_DATA_CORRUPTED),
338+
errmsg("invalid page in block %u of relation %s",
339+
blkno,
340+
relpathbackend(src->smgr_rnode.node,
341+
src->smgr_rnode.backend,
342+
forkNum))));
343+
344+
/*
345+
* WAL-log the copied page. Unfortunately we don't know what kind of a
346+
* page this is, so we have to log the full page including any unused
347+
* space.
348+
*/
349+
if (use_wal)
350+
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
351+
352+
PageSetChecksumInplace(page, blkno);
353+
354+
/*
355+
* Now write the page. We say isTemp = true even if it's not a temp
356+
* rel, because there's no need for smgr to schedule an fsync for this
357+
* write; we'll do it ourselves below.
358+
*/
359+
smgrextend(dst, forkNum, blkno, buf.data, true);
360+
}
361+
362+
/*
363+
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
364+
* to ensure that the toast table gets fsync'd too. (For a temp or
365+
* unlogged rel we don't care since the data will be gone after a crash
366+
* anyway.)
367+
*
368+
* It's obvious that we must do this when not WAL-logging the copy. It's
369+
* less obvious that we have to do it even if we did WAL-log the copied
370+
* pages. The reason is that since we're copying outside shared buffers, a
371+
* CHECKPOINT occurring during the copy has no way to flush the previously
372+
* written data to disk (indeed it won't know the new rel even exists). A
373+
* crash later on would replay WAL from the checkpoint, therefore it
374+
* wouldn't replay our earlier WAL entries. If we do not fsync those pages
375+
* here, they might still not be on disk when the crash occurs.
376+
*/
377+
if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
378+
smgrimmedsync(dst, forkNum);
379+
}
380+
293381
/*
294382
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
295383
*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy