Skip to content

Commit 05b555d

Browse files
committed
Fix tar files emitted by pg_dump and pg_basebackup to be POSIX conformant.
Both programs got the "magic" string wrong, causing standard-conforming tar implementations to believe the output was just legacy tar format without any POSIX extensions. This doesn't actually matter that much, especially since pg_dump failed to fill the POSIX fields anyway, but still there is little point in emitting tar format if we can't be compliant with the standard. In addition, pg_dump failed to write the EOF marker correctly (there should be 2 blocks of zeroes not just one), pg_basebackup put the numeric group ID in the wrong place, and both programs had a pretty brain-dead idea of how to compute the checksum. Fix all that and improve the comments a bit. pg_restore is modified to accept either the correct POSIX-compliant "magic" string or the previous value. This part of the change will need to be back-patched to avoid an unnecessary compatibility break when a previous version tries to read tar-format output from 9.3 pg_dump. Brian Weaver and Tom Lane
1 parent edc9109 commit 05b555d

File tree

4 files changed

+110
-69
lines changed

4 files changed

+110
-69
lines changed

doc/src/sgml/protocol.sgml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1759,9 +1759,11 @@ The commands accepted in walsender mode are:
17591759
After the second regular result set, one or more CopyResponse results
17601760
will be sent, one for PGDATA and one for each additional tablespace other
17611761
than <literal>pg_default</> and <literal>pg_global</>. The data in
1762-
the CopyResponse results will be a tar format (using ustar00
1763-
extensions) dump of the tablespace contents. After the tar data is
1764-
complete, a final ordinary result set will be sent.
1762+
the CopyResponse results will be a tar format (following the
1763+
<quote>ustar interchange format</> specified in the POSIX 1003.1-2008
1764+
standard) dump of the tablespace contents, except that the two trailing
1765+
blocks of zeroes specified in the standard are omitted.
1766+
After the tar data is complete, a final ordinary result set will be sent.
17651767
</para>
17661768

17671769
<para>

src/backend/replication/basebackup.c

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ sendFileWithContent(const char *filename, const char *content)
568568

569569
/*
570570
* Include all files from the given directory in the output tar stream. If
571-
* 'sizeonly' is true, we just calculate a total length and return ig, without
571+
* 'sizeonly' is true, we just calculate a total length and return it, without
572572
* actually sending anything.
573573
*/
574574
static int64
@@ -763,11 +763,16 @@ _tarChecksum(char *header)
763763
int i,
764764
sum;
765765

766-
sum = 0;
766+
/*
767+
* Per POSIX, the checksum is the simple sum of all bytes in the header,
768+
* treating the bytes as unsigned, and treating the checksum field (at
769+
* offset 148) as though it contained 8 spaces.
770+
*/
771+
sum = 8 * ' '; /* presumed value for checksum field */
767772
for (i = 0; i < 512; i++)
768773
if (i < 148 || i >= 156)
769774
sum += 0xFF & header[i];
770-
return sum + 256; /* Assume 8 blanks in checksum field */
775+
return sum;
771776
}
772777

773778
/* Given the member, write the TAR header & send the file */
@@ -846,9 +851,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
846851
struct stat * statbuf)
847852
{
848853
char h[512];
849-
int lastSum = 0;
850-
int sum;
851854

855+
/*
856+
* Note: most of the fields in a tar header are not supposed to be
857+
* null-terminated. We use sprintf, which will write a null after the
858+
* required bytes; that null goes into the first byte of the next field.
859+
* This is okay as long as we fill the fields in order.
860+
*/
852861
memset(h, 0, sizeof(h));
853862

854863
/* Name 100 */
@@ -860,8 +869,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
860869
* indicated in the tar format by adding a slash at the end of the
861870
* name, the same as for regular directories.
862871
*/
863-
h[strlen(filename)] = '/';
864-
h[strlen(filename) + 1] = '\0';
872+
int flen = strlen(filename);
873+
874+
flen = Min(flen, 99);
875+
h[flen] = '/';
876+
h[flen + 1] = '\0';
865877
}
866878

867879
/* Mode 8 */
@@ -871,9 +883,9 @@ _tarWriteHeader(const char *filename, const char *linktarget,
871883
sprintf(&h[108], "%07o ", statbuf->st_uid);
872884

873885
/* Group 8 */
874-
sprintf(&h[117], "%07o ", statbuf->st_gid);
886+
sprintf(&h[116], "%07o ", statbuf->st_gid);
875887

876-
/* File size 12 - 11 digits, 1 space, no NUL */
888+
/* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
877889
if (linktarget != NULL || S_ISDIR(statbuf->st_mode))
878890
/* Symbolic link or directory has size zero */
879891
print_val(&h[124], 0, 8, 11);
@@ -884,13 +896,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
884896
/* Mod Time 12 */
885897
sprintf(&h[136], "%011o ", (int) statbuf->st_mtime);
886898

887-
/* Checksum 8 */
888-
sprintf(&h[148], "%06o ", lastSum);
899+
/* Checksum 8 cannot be calculated until we've filled all other fields */
889900

890901
if (linktarget != NULL)
891902
{
892903
/* Type - Symbolic link */
893904
sprintf(&h[156], "2");
905+
/* Link Name 100 */
894906
sprintf(&h[157], "%.99s", linktarget);
895907
}
896908
else if (S_ISDIR(statbuf->st_mode))
@@ -900,10 +912,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
900912
/* Type - regular file */
901913
sprintf(&h[156], "0");
902914

903-
/* Link tag 100 (NULL) */
915+
/* Magic 6 */
916+
sprintf(&h[257], "ustar");
904917

905-
/* Magic 6 + Version 2 */
906-
sprintf(&h[257], "ustar00");
918+
/* Version 2 */
919+
sprintf(&h[263], "00");
907920

908921
/* User 32 */
909922
/* XXX: Do we need to care about setting correct username? */
@@ -913,17 +926,21 @@ _tarWriteHeader(const char *filename, const char *linktarget,
913926
/* XXX: Do we need to care about setting correct group name? */
914927
sprintf(&h[297], "%.31s", "postgres");
915928

916-
/* Maj Dev 8 */
917-
sprintf(&h[329], "%6o ", 0);
929+
/* Major Dev 8 */
930+
sprintf(&h[329], "%07o ", 0);
918931

919-
/* Min Dev 8 */
920-
sprintf(&h[337], "%6o ", 0);
932+
/* Minor Dev 8 */
933+
sprintf(&h[337], "%07o ", 0);
921934

922-
while ((sum = _tarChecksum(h)) != lastSum)
923-
{
924-
sprintf(&h[148], "%06o ", sum);
925-
lastSum = sum;
926-
}
935+
/* Prefix 155 - not used, leave as nulls */
936+
937+
/*
938+
* We mustn't overwrite the next field while inserting the checksum.
939+
* Fortunately, the checksum can't exceed 6 octal digits, so we just write
940+
* 6 digits, a space, and a null, which is legal per POSIX.
941+
*/
942+
sprintf(&h[148], "%06o ", _tarChecksum(h));
927943

944+
/* Now send the completed header. */
928945
pq_putmessage('d', h, 512);
929946
}

src/bin/pg_dump/pg_backup_tar.c

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -882,8 +882,10 @@ _CloseArchive(ArchiveHandle *AH)
882882

883883
tarClose(AH, th);
884884

885-
/* Add a block of NULLs since it's de-rigeur. */
886-
for (i = 0; i < 512; i++)
885+
/*
886+
* EOF marker for tar files is two blocks of NULLs.
887+
*/
888+
for (i = 0; i < 512 * 2; i++)
887889
{
888890
if (fputc(0, ctx->tarFH) == EOF)
889891
exit_horribly(modulename,
@@ -1032,11 +1034,16 @@ _tarChecksum(char *header)
10321034
int i,
10331035
sum;
10341036

1035-
sum = 0;
1037+
/*
1038+
* Per POSIX, the checksum is the simple sum of all bytes in the header,
1039+
* treating the bytes as unsigned, and treating the checksum field (at
1040+
* offset 148) as though it contained 8 spaces.
1041+
*/
1042+
sum = 8 * ' '; /* presumed value for checksum field */
10361043
for (i = 0; i < 512; i++)
10371044
if (i < 148 || i >= 156)
10381045
sum += 0xFF & header[i];
1039-
return sum + 256; /* Assume 8 blanks in checksum field */
1046+
return sum;
10401047
}
10411048

10421049
bool
@@ -1050,11 +1057,15 @@ isValidTarHeader(char *header)
10501057
if (sum != chk)
10511058
return false;
10521059

1053-
/* POSIX format */
1054-
if (strncmp(&header[257], "ustar00", 7) == 0)
1060+
/* POSIX tar format */
1061+
if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1062+
memcmp(&header[263], "00", 2) == 0)
10551063
return true;
1056-
/* older format */
1057-
if (strncmp(&header[257], "ustar ", 7) == 0)
1064+
/* GNU tar format */
1065+
if (memcmp(&header[257], "ustar \0", 8) == 0)
1066+
return true;
1067+
/* not-quite-POSIX format written by pre-9.3 pg_dump */
1068+
if (memcmp(&header[257], "ustar00\0", 8) == 0)
10581069
return true;
10591070

10601071
return false;
@@ -1329,63 +1340,71 @@ static void
13291340
_tarWriteHeader(TAR_MEMBER *th)
13301341
{
13311342
char h[512];
1332-
int lastSum = 0;
1333-
int sum;
13341343

1344+
/*
1345+
* Note: most of the fields in a tar header are not supposed to be
1346+
* null-terminated. We use sprintf, which will write a null after the
1347+
* required bytes; that null goes into the first byte of the next field.
1348+
* This is okay as long as we fill the fields in order.
1349+
*/
13351350
memset(h, 0, sizeof(h));
13361351

13371352
/* Name 100 */
13381353
sprintf(&h[0], "%.99s", th->targetFile);
13391354

13401355
/* Mode 8 */
1341-
sprintf(&h[100], "100600 ");
1356+
sprintf(&h[100], "0000600 ");
13421357

13431358
/* User ID 8 */
1344-
sprintf(&h[108], "004000 ");
1359+
sprintf(&h[108], "0004000 ");
13451360

13461361
/* Group 8 */
1347-
sprintf(&h[116], "002000 ");
1362+
sprintf(&h[116], "0002000 ");
13481363

1349-
/* File size 12 - 11 digits, 1 space, no NUL */
1364+
/* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
13501365
print_val(&h[124], th->fileLen, 8, 11);
13511366
sprintf(&h[135], " ");
13521367

13531368
/* Mod Time 12 */
13541369
sprintf(&h[136], "%011o ", (int) time(NULL));
13551370

1356-
/* Checksum 8 */
1357-
sprintf(&h[148], "%06o ", lastSum);
1371+
/* Checksum 8 cannot be calculated until we've filled all other fields */
13581372

13591373
/* Type - regular file */
13601374
sprintf(&h[156], "0");
13611375

1362-
/* Link tag 100 (NULL) */
1376+
/* Link Name 100 (leave as nulls) */
13631377

1364-
/* Magic 6 + Version 2 */
1365-
sprintf(&h[257], "ustar00");
1378+
/* Magic 6 */
1379+
sprintf(&h[257], "ustar");
1380+
1381+
/* Version 2 */
1382+
sprintf(&h[263], "00");
13661383

1367-
#if 0
13681384
/* User 32 */
1369-
sprintf(&h[265], "%.31s", ""); /* How do I get username reliably? Do
1370-
* I need to? */
1385+
/* XXX: Do we need to care about setting correct username? */
1386+
sprintf(&h[265], "%.31s", "postgres");
13711387

13721388
/* Group 32 */
1373-
sprintf(&h[297], "%.31s", ""); /* How do I get group reliably? Do I
1374-
* need to? */
1389+
/* XXX: Do we need to care about setting correct group name? */
1390+
sprintf(&h[297], "%.31s", "postgres");
13751391

1376-
/* Maj Dev 8 */
1377-
sprintf(&h[329], "%6o ", 0);
1392+
/* Major Dev 8 */
1393+
sprintf(&h[329], "%07o ", 0);
13781394

1379-
/* Min Dev 8 */
1380-
sprintf(&h[337], "%6o ", 0);
1381-
#endif
1395+
/* Minor Dev 8 */
1396+
sprintf(&h[337], "%07o ", 0);
13821397

1383-
while ((sum = _tarChecksum(h)) != lastSum)
1384-
{
1385-
sprintf(&h[148], "%06o ", sum);
1386-
lastSum = sum;
1387-
}
1398+
/* Prefix 155 - not used, leave as nulls */
1399+
1400+
/*
1401+
* We mustn't overwrite the next field while inserting the checksum.
1402+
* Fortunately, the checksum can't exceed 6 octal digits, so we just write
1403+
* 6 digits, a space, and a null, which is legal per POSIX.
1404+
*/
1405+
sprintf(&h[148], "%06o ", _tarChecksum(h));
13881406

1407+
/* Now write the completed header. */
13891408
if (fwrite(h, 1, 512, th->tarFH) != 512)
13901409
exit_horribly(modulename, "could not write to output file: %s\n", strerror(errno));
13911410
}

src/bin/pg_dump/pg_backup_tar.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,31 @@
11
/*
22
* src/bin/pg_dump/pg_backup_tar.h
33
*
4-
* TAR Header
4+
* TAR Header (see "ustar interchange format" in POSIX 1003.1)
55
*
66
* Offset Length Contents
77
* 0 100 bytes File name ('\0' terminated, 99 maximum length)
88
* 100 8 bytes File mode (in octal ascii)
99
* 108 8 bytes User ID (in octal ascii)
1010
* 116 8 bytes Group ID (in octal ascii)
11-
* 124 12 bytes File size (s) (in octal ascii)
12-
* 136 12 bytes Modify time (in octal ascii)
11+
* 124 12 bytes File size (in octal ascii)
12+
* 136 12 bytes Modify time (Unix timestamp in octal ascii)
1313
* 148 8 bytes Header checksum (in octal ascii)
14-
* 156 1 bytes Link flag
15-
* 157 100 bytes Linkname ('\0' terminated, 99 maximum length)
16-
* 257 8 bytes Magic ("ustar \0")
14+
* 156 1 bytes Type flag (see below)
15+
* 157 100 bytes Linkname, if symlink ('\0' terminated, 99 maximum length)
16+
* 257 6 bytes Magic ("ustar\0")
17+
* 263 2 bytes Version ("00")
1718
* 265 32 bytes User name ('\0' terminated, 31 maximum length)
1819
* 297 32 bytes Group name ('\0' terminated, 31 maximum length)
1920
* 329 8 bytes Major device ID (in octal ascii)
2021
* 337 8 bytes Minor device ID (in octal ascii)
21-
* 345 167 bytes Padding
22-
* 512 (s+p)bytes File contents (s+p) := (((s) + 511) & ~511), round up to 512 bytes
22+
* 345 155 bytes File name prefix (not used in our implementation)
23+
* 500 12 bytes Padding
24+
*
25+
* 512 (s+p)bytes File contents, padded out to 512-byte boundary
2326
*/
2427

25-
/* The linkflag defines the type of file */
28+
/* The type flag defines the type of file */
2629
#define LF_OLDNORMAL '\0' /* Normal disk file, Unix compatible */
2730
#define LF_NORMAL '0' /* Normal disk file */
2831
#define LF_LINK '1' /* Link to previously dumped file */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy