diff --git a/.gitignore b/.gitignore index b46b4ef..4990aa6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ .deps *.so *.o -ptrack--2.0.sql Dockerfile - diff --git a/Makefile b/Makefile index 8544f90..ba9ce1d 100644 --- a/Makefile +++ b/Makefile @@ -2,13 +2,11 @@ MODULE_big = ptrack OBJS = ptrack.o datapagemap.o engine.o $(WIN32RES) -EXTENSION = ptrack -EXTVERSION = 2.1 -DATA = ptrack.sql ptrack--2.0--2.1.sql -DATA_built = $(EXTENSION)--$(EXTVERSION).sql PGFILEDESC = "ptrack - block-level incremental backup engine" -EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql +EXTENSION = ptrack +EXTVERSION = 2.2 +DATA = ptrack--2.1.sql ptrack--2.0--2.1.sql ptrack--2.1--2.2.sql TAP_TESTS = 1 @@ -22,13 +20,3 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif - -$(EXTENSION)--$(EXTVERSION).sql: ptrack.sql - cat $^ > $@ - -# temp-install: EXTRA_INSTALL=contrib/ptrack - -# check-tap: temp-install -# $(prove_check) - -# check: check-tap diff --git a/README.md b/README.md index 57a7c5c..39ea00b 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,8 @@ To disable `ptrack` and clean up all remaining service files set `ptrack.map_siz * ptrack_version() — returns ptrack version string. * ptrack_init_lsn() — returns LSN of the last ptrack map initialization. - * ptrack_get_pagemapset('LSN') — returns a set of changed data files with bitmaps of changed blocks since specified LSN. + * ptrack_get_pagemapset(start_lsn pg_lsn) — returns a set of changed data files with bitmaps of changed blocks since specified `start_lsn`. + * ptrack_get_change_stat(start_lsn pg_lsn) — returns statistic of changes (number of files, pages and size in MB) since specified `start_lsn`. Usage example: @@ -102,6 +103,10 @@ Usually, you have to only install new version of `ptrack` and do `ALTER EXTENSIO * Do `ALTER EXTENSION 'ptrack' UPDATE;`. * Restart your server. +#### Upgrading from 2.1.* to 2.2.*: + +Since version 2.2 we use a different algorithm for tracking changed pages. Thus, data recorded in the `ptrack.map` using pre 2.2 versions of `ptrack` is incompatible with newer versions. After extension upgrade and server restart old `ptrack.map` will be discarded with `WARNING` and initialized from the scratch. + ## Limitations 1. You can only use `ptrack` safely with `wal_level >= 'replica'`. Otherwise, you can lose tracking of some changes if crash-recovery occurs, since [certain commands are designed not to write WAL at all if wal_level is minimal](https://www.postgresql.org/docs/12/populate.html#POPULATE-PITR), but we only durably flush `ptrack` map at checkpoint time. diff --git a/engine.c b/engine.c index 35cc14c..89217a9 100644 --- a/engine.c +++ b/engine.c @@ -156,6 +156,8 @@ ptrackMapInit(void) sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH); +ptrack_map_reinit: + /* Remove old PTRACK_MMAP_PATH file, if exists */ if (ptrack_file_exists(ptrack_mmap_path)) durable_unlink(ptrack_mmap_path, LOG); @@ -175,18 +177,15 @@ ptrackMapInit(void) if (stat(ptrack_path, &stat_buf) == 0) { copy_file(ptrack_path, ptrack_mmap_path); - is_new_map = false; /* flag to check checksum */ + is_new_map = false; /* flag to check map file format and checksum */ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY); - if (ptrack_fd < 0) - elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); } else - { /* Create new file for PTRACK_MMAP_PATH */ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY); - if (ptrack_fd < 0) - elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); - } + + if (ptrack_fd < 0) + elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); #ifdef WIN32 { @@ -227,7 +226,20 @@ ptrackMapInit(void) elog(ERROR, "ptrack init: wrong map format of file \"%s\"", ptrack_path); /* Check ptrack version inside old ptrack map */ - /* No-op for now, but may be used for future compatibility checks */ + if (ptrack_map->version_num != PTRACK_VERSION_NUM) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("ptrack init: map format version %d in the file \"%s\" is incompatible with loaded version %d", + ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM), + errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path))); + + /* Clean up everything and try again */ + ptrackCleanFilesAndMap(); + + is_new_map = true; + goto ptrack_map_reinit; + } /* Check CRC */ INIT_CRC32C(crc); @@ -378,7 +390,7 @@ ptrackCheckpoint(void) /* * We are writing ptrack map values to file, but we want to simply map it * into the memory with mmap after a crash/restart. That way, we have to - * write values taking into account all paddings/allignments. + * write values taking into account all paddings/alignments. * * Write both magic and varsion_num at once. */ @@ -435,7 +447,7 @@ ptrackCheckpoint(void) * going to overflow. */ /* - * We should not have any allignment issues here, since sizeof() + * We should not have any alignment issues here, since sizeof() * takes into account all paddings for us. */ ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz); @@ -446,7 +458,7 @@ ptrackCheckpoint(void) } } - /* Write if anythig left */ + /* Write if anything left */ if ((i + 1) % PTRACK_BUF_SIZE != 0) { size_t writesz = sizeof(pg_atomic_uint64) * j; @@ -641,48 +653,56 @@ void ptrack_mark_block(RelFileNodeBackend smgr_rnode, ForkNumber forknum, BlockNumber blocknum) { + PtBlockId bid; size_t hash; + size_t slot1; + size_t slot2; XLogRecPtr new_lsn; - PtBlockId bid; /* * We use pg_atomic_uint64 here only for alignment purposes, because - * pg_atomic_uint64 is forcely aligned on 8 bytes during the MSVC build. + * pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build. */ pg_atomic_uint64 old_lsn; pg_atomic_uint64 old_init_lsn; - if (ptrack_map_size != 0 && (ptrack_map != NULL) && - smgr_rnode.backend == InvalidBackendId) /* do not track temporary - * relations */ - { - bid.relnode = smgr_rnode.node; - bid.forknum = forknum; - bid.blocknum = blocknum; - hash = BID_HASH_FUNC(bid); - - if (RecoveryInProgress()) - new_lsn = GetXLogReplayRecPtr(NULL); - else - new_lsn = GetXLogInsertRecPtr(); - - old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[hash]); + if (ptrack_map_size == 0 + || ptrack_map == NULL + || smgr_rnode.backend != InvalidBackendId) /* do not track temporary + * relations */ + return; - /* Atomically assign new init LSN value */ - old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn); + bid.relnode = smgr_rnode.node; + bid.forknum = forknum; + bid.blocknum = blocknum; - if (old_init_lsn.value == InvalidXLogRecPtr) - { - elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn); + hash = BID_HASH_FUNC(bid); + slot1 = hash % PtrackContentNblocks; + slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; - while (old_init_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn)); - } + if (RecoveryInProgress()) + new_lsn = GetXLogReplayRecPtr(NULL); + else + new_lsn = GetXLogInsertRecPtr(); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn.value, new_lsn); + /* Atomically assign new init LSN value */ + old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn); + if (old_init_lsn.value == InvalidXLogRecPtr) + { + elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn); - /* Atomically assign new LSN value */ - while (old_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->entries[hash], (uint64 *) &old_lsn.value, new_lsn)); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[hash])); + while (old_init_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn)); } + + /* Atomically assign new LSN value to the first slot */ + old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn); + while (old_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn)); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[slot1])); + + /* And to the second */ + old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + while (old_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn)); } diff --git a/engine.h b/engine.h index 34cf15f..e46f803 100644 --- a/engine.h +++ b/engine.h @@ -50,7 +50,7 @@ typedef struct PtrackMapHdr { /* * Three magic bytes (+ \0) to be sure, that we are reading ptrack.map - * with a right PtrackMapHdr strucutre. + * with a right PtrackMapHdr structure. */ char magic[PTRACK_MAGIC_SIZE]; @@ -72,7 +72,6 @@ typedef struct PtrackMapHdr typedef PtrackMapHdr * PtrackMap; -/* TODO: check MAXALIGN usage below */ /* Number of elements in ptrack map (LSN array) */ #define PtrackContentNblocks \ ((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64)) @@ -84,9 +83,10 @@ typedef PtrackMapHdr * PtrackMap; /* CRC32 value offset in order to directly access it in the mmap'ed memory chunk */ #define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c)) -/* Map block address 'bid' to map slot */ +/* Block address 'bid' to hash. To get slot position in map should be divided + * with '% PtrackContentNblocks' */ #define BID_HASH_FUNC(bid) \ - (size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks) + (size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0))) /* * Per process pointer to shared ptrack_map diff --git a/ptrack--2.1--2.2.sql b/ptrack--2.1--2.2.sql new file mode 100644 index 0000000..b09c15e --- /dev/null +++ b/ptrack--2.1--2.2.sql @@ -0,0 +1,35 @@ +/* ptrack/ptrack--2.1--2.2.sql */ + +-- Complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file.\ quit + +DROP FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn); +CREATE FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn) +RETURNS TABLE (path text, + pagecount bigint, + pagemap bytea) +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn) + RETURNS TABLE ( + files bigint, + pages numeric, + "size, MB" numeric + ) AS +$func$ +DECLARE +block_size bigint; +BEGIN + block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size'); + + RETURN QUERY + SELECT changed_files, + changed_pages, + block_size * changed_pages / (1024.0 * 1024) + FROM + (SELECT count(path) AS changed_files, + sum(pagecount) AS changed_pages + FROM ptrack_get_pagemapset(start_lsn)) s; +END +$func$ LANGUAGE plpgsql; diff --git a/ptrack.sql b/ptrack--2.1.sql similarity index 94% rename from ptrack.sql rename to ptrack--2.1.sql index 80ae927..c963964 100644 --- a/ptrack.sql +++ b/ptrack--2.1.sql @@ -1,3 +1,5 @@ +/* ptrack/ptrack--2.1.sql */ + -- Complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION ptrack" to load this file. \quit diff --git a/ptrack.c b/ptrack.c index d897ecf..40630e7 100644 --- a/ptrack.c +++ b/ptrack.c @@ -137,7 +137,7 @@ _PG_fini(void) /* * Ptrack follow up for copydir() routine. It parses database OID - * and tablespace OID from path string. We do not need to recoursively + * and tablespace OID from path string. We do not need to recursively * walk subdirs here, copydir() will do it for us if needed. */ static void @@ -420,11 +420,11 @@ PG_FUNCTION_INFO_V1(ptrack_get_pagemapset); Datum ptrack_get_pagemapset(PG_FUNCTION_ARGS) { + PtScanCtx *ctx; FuncCallContext *funcctx; - PtScanCtx *ctx; MemoryContext oldcontext; - XLogRecPtr update_lsn; datapagemap_t pagemap; + int64 pagecount = 0; char gather_path[MAXPGPATH]; /* Exit immediately if there is no map */ @@ -445,12 +445,13 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) /* Make tuple descriptor */ #if PG_VERSION_NUM >= 120000 - tupdesc = CreateTemplateTupleDesc(2); + tupdesc = CreateTemplateTupleDesc(3); #else - tupdesc = CreateTemplateTupleDesc(2, false); + tupdesc = CreateTemplateTupleDesc(3, false); #endif TupleDescInitEntry(tupdesc, (AttrNumber) 1, "path", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagemap", BYTEAOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagecount", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "pagemap", BYTEAOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = ctx; @@ -486,14 +487,20 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) while (true) { + size_t hash; + size_t slot1; + size_t slot2; + XLogRecPtr update_lsn1; + XLogRecPtr update_lsn2; + /* Stop traversal if there are no more segments */ if (ctx->bid.blocknum > ctx->relsize) { /* We completed a segment and there is a bitmap to return */ if (pagemap.bitmap != NULL) { - Datum values[2]; - bool nulls[2] = {false}; + Datum values[3]; + bool nulls[3] = {false}; char pathname[MAXPGPATH]; bytea *result = NULL; Size result_sz = pagemap.bitmapsize + VARHDRSZ; @@ -507,11 +514,13 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) strcpy(pathname, ctx->relpath); values[0] = CStringGetTextDatum(pathname); - values[1] = PointerGetDatum(result); + values[1] = Int64GetDatum(pagecount); + values[2] = PointerGetDatum(result); pfree(pagemap.bitmap); pagemap.bitmap = NULL; pagemap.bitmapsize = 0; + pagecount = 0; htup = heap_form_tuple(funcctx->tuple_desc, values, nulls); if (htup) @@ -525,16 +534,34 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) } } - update_lsn = pg_atomic_read_u64(&ptrack_map->entries[BID_HASH_FUNC(ctx->bid)]); + hash = BID_HASH_FUNC(ctx->bid); + slot1 = hash % PtrackContentNblocks; + + update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]); - if (update_lsn != InvalidXLogRecPtr) - elog(DEBUG3, "ptrack: update_lsn %X/%X of blckno %u of file %s", - (uint32) (update_lsn >> 32), (uint32) update_lsn, + if (update_lsn1 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn1, ctx->bid.blocknum, ctx->relpath); - /* Block has been changed since specified LSN. Mark it in the bitmap */ - if (update_lsn >= ctx->lsn) - datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + /* Only probe the second slot if the first one is marked */ + if (update_lsn1 >= ctx->lsn) + { + slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; + update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + + if (update_lsn2 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn2, + ctx->bid.blocknum, ctx->relpath); + + /* Block has been changed since specified LSN. Mark it in the bitmap */ + if (update_lsn2 >= ctx->lsn) + { + pagecount += 1; + datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + } + } ctx->bid.blocknum += 1; } diff --git a/ptrack.control b/ptrack.control index d2d8792..ec0af9d 100644 --- a/ptrack.control +++ b/ptrack.control @@ -1,5 +1,5 @@ # ptrack extension comment = 'block-level incremental backup engine' -default_version = '2.1' +default_version = '2.2' module_pathname = '$libdir/ptrack' relocatable = true diff --git a/ptrack.h b/ptrack.h index 7e6b6e5..d205115 100644 --- a/ptrack.h +++ b/ptrack.h @@ -22,9 +22,9 @@ #include "utils/relcache.h" /* Ptrack version as a string */ -#define PTRACK_VERSION "2.1" +#define PTRACK_VERSION "2.2" /* Ptrack version as a number */ -#define PTRACK_VERSION_NUM 210 +#define PTRACK_VERSION_NUM 220 /* * Structure identifying block on the disk. diff --git a/t/001_basic.pl b/t/001_basic.pl index 1abc788..bac81f2 100644 --- a/t/001_basic.pl +++ b/t/001_basic.pl @@ -10,7 +10,7 @@ use TestLib; use Test::More; -plan tests => 23; +plan tests => 24; my $node; my $res; @@ -115,6 +115,10 @@ qr/$rel_oid/, 'ptrack pagemapset should contain new relation oid'); +# Check change stats +$res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')"); +is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes'); + # We should be able to change ptrack map size (but loose all changes) $node->append_conf( 'postgresql.conf', q{ pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy