postgrespro
diff --git a/‎doc/src/sgml/backup.sgml
Lines changed: 18 additions & 0 deletions b/‎doc/src/sgml/backup.sgml
Lines changed: 18 additions & 0 deletions
diff --git a/‎doc/src/sgml/perform.sgml
Lines changed: 9 additions & 0 deletions b/‎doc/src/sgml/perform.sgml
Lines changed: 9 additions & 0 deletions
diff --git a/‎doc/src/sgml/ref/pg_dump.sgml
Lines changed: 84 additions & 5 deletions b/‎doc/src/sgml/ref/pg_dump.sgml
Lines changed: 84 additions & 5 deletions
diff --git a/‎src/bin/pg_dump/Makefile
Lines changed: 1 addition & 1 deletion b/‎src/bin/pg_dump/Makefile
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/bin/pg_dump/compress_io.c
Lines changed: 10 additions & 0 deletions b/‎src/bin/pg_dump/compress_io.c
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/bin/pg_dump/dumputils.c
Lines changed: 73 additions & 13 deletions b/‎src/bin/pg_dump/dumputils.c
Lines changed: 73 additions & 13 deletions
@@ -310,6 +310,24 @@ pg_restore -d <replaceable class="parameter">dbname</replaceable> <replaceable c
     with one of the other two approaches.
    </para>
 
+   <formalpara>
+    <title>Use <application>pg_dump</>'s parallel dump feature.</title>
+    <para>
+     To speed up the dump of a large database, you can use
+     <application>pg_dump</application>'s parallel mode. This will dump
+     multiple tables at the same time. You can control the degree of
+     parallelism with the <command>-j</command> parameter. Parallel dumps
+     are only supported for the "directory" archive format.
+
+<programlisting>
+pg_dump -j <replaceable class="parameter">num</replaceable> -F d -f <replaceable class="parameter">out.dir</replaceable> <replaceable class="parameter">dbname</replaceable>
+</programlisting>
+
+     You can use <command>pg_restore -j</command> to restore a dump in parallel.
+     This will work for any archive of either the "custom" or the "directory"
+     archive mode, whether or not it has been created with <command>pg_dump -j</command>.
+    </para>
+   </formalpara>
   </sect2>
  </sect1>
 
 
@@ -1433,6 +1433,15 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
        base backup.
       </para>
      </listitem>
+     <listitem>
+      <para>
+       Experiment with the parallel dump and restore modes of both
+       <application>pg_dump</> and <application>pg_restore</> and find the
+       optimal number of concurrent jobs to use. Dumping and restoring in
+       parallel by means of the <option>-j</> option should give you a
+       significantly higher performance over the serial mode.
+      </para>
+     </listitem>
      <listitem>
       <para>
        Consider whether the whole dump should be restored as a single
 
@@ -73,10 +73,12 @@ PostgreSQL documentation
    transfer mechanism. <application>pg_dump</application> can be used to
    backup an entire database, then <application>pg_restore</application>
    can be used to examine the archive and/or select which parts of the
-   database are to be restored. The most flexible output file format is
-   the <quote>custom</quote> format (<option>-Fc</option>). It allows
-   for selection and reordering of all archived items, and is compressed
-   by default.
+   database are to be restored. The most flexible output file formats are
+   the <quote>custom</quote> format (<option>-Fc</option>) and the
+   <quote>directory</quote> format(<option>-Fd</option>). They allow
+   for selection and reordering of all archived items, support parallel
+   restoration, and are compressed by default. The <quote>directory</quote>
+   format is the only format that supports parallel dumps.
   </para>
 
   <para>
@@ -251,7 +253,8 @@ PostgreSQL documentation
            can read. A directory format archive can be manipulated with
            standard Unix tools; for example, files in an uncompressed archive
            can be compressed with the <application>gzip</application> tool.
-           This format is compressed by default.
+           This format is compressed by default and also supports parallel
+           dumps.
           </para>
          </listitem>
         </varlistentry>
@@ -285,6 +288,62 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-j <replaceable class="parameter">njobs</replaceable></></term>
+      <term><option>--jobs=<replaceable class="parameter">njobs</replaceable></></term>
+      <listitem>
+       <para>
+        Run the dump in parallel by dumping <replaceable class="parameter">njobs</replaceable>
+        tables simultaneously. This option reduces the time of the dump but it also
+        increases the load on the database server. You can only use this option with the
+        directory output format because this is the only output format where multiple processes
+        can write their data at the same time.
+       </para>
+       <para>
+        <application>pg_dump</> will open <replaceable class="parameter">njobs</replaceable>
+        + 1 connections to the database, so make sure your <xref linkend="guc-max-connections">
+        setting is high enough to accommodate all connections.
+       </para>
+       <para>
+        Requesting exclusive locks on database objects while running a parallel dump could
+        cause the dump to fail. The reason is that the <application>pg_dump</> master process
+        requests shared locks on the objects that the worker processes are going to dump later
+        in order to
+        make sure that nobody deletes them and makes them go away while the dump is running.
+        If another client then requests an exclusive lock on a table, that lock will not be
+        granted but will be queued waiting for the shared lock of the master process to be
+        released.. Consequently any other access to the table will not be granted either and
+        will queue after the exclusive lock request. This includes the worker process trying
+        to dump the table. Without any precautions this would be a classic deadlock situation.
+        To detect this conflict, the <application>pg_dump</> worker process requests another
+        shared lock using the <literal>NOWAIT</> option. If the worker process is not granted
+        this shared lock, somebody else must have requested an exclusive lock in the meantime
+        and there is no way to continue with the dump, so <application>pg_dump</> has no choice
+        but to abort the dump.
+       </para>
+       <para>
+        For a consistent backup, the database server needs to support synchronized snapshots,
+        a feature that was introduced in <productname>PostgreSQL</productname> 9.2. With this
+        feature, database clients can ensure they see the same dataset even though they use
+        different connections. <command>pg_dump -j</command> uses multiple database 
+        connections; it connects to the database once with the master process and
+        once again for each worker job. Without the sychronized snapshot feature, the
+        different worker jobs wouldn't be guaranteed to see the same data in each connection,
+        which could lead to an inconsistent backup.
+       </para>
+       <para>
+        If you want to run a parallel dump of a pre-9.2 server, you need to make sure that the
+        database content doesn't change from between the time the master connects to the
+        database until the last worker job has connected to the database. The easiest way to
+        do this is to halt any data modifying processes (DDL and DML) accessing the database
+        before starting the backup. You also need to specify the
+        <option>--no-synchronized-snapshots</option> parameter when running
+        <command>pg_dump -j</command> against a pre-9.2 <productname>PostgreSQL</productname>
+        server.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-n <replaceable class="parameter">schema</replaceable></option></term>
       <term><option>--schema=<replaceable class="parameter">schema</replaceable></option></term>
@@ -690,6 +749,17 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--no-synchronized-snapshots</></term>
+      <listitem>
+       <para>
+        This option allows running <command>pg_dump -j</> against a pre-9.2
+        server, see the documentation of the <option>-j</option> parameter
+        for more details.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--no-tablespaces</option></term>
       <listitem>
@@ -1082,6 +1152,15 @@ CREATE DATABASE foo WITH TEMPLATE template0;
 </screen>
   </para>
 
+  <para>
+   To dump a database into a directory-format archive in parallel with
+   5 worker jobs:
+
+<screen>
+<prompt>$</prompt> <userinput>pg_dump -Fd mydb -j 5 -f dumpdir</userinput>
+</screen>
+  </para>
+
   <para>
    To reload an archive file into a (freshly created) database named
    <literal>newdb</>:
 
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
 override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
 
 OBJS=	pg_backup_archiver.o pg_backup_db.o pg_backup_custom.o \
-	pg_backup_null.o pg_backup_tar.o \
+	pg_backup_null.o pg_backup_tar.o parallel.o \
 	pg_backup_directory.o dumputils.o compress_io.o $(WIN32RES)
 
 KEYWRDOBJS = keywords.o kwlookup.o
 
@@ -54,6 +54,7 @@
 
 #include "compress_io.h"
 #include "dumputils.h"
+#include "parallel.h"
 
 /*----------------------
  * Compressor API
@@ -182,6 +183,9 @@ size_t
 WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs,
 				   const void *data, size_t dLen)
 {
+	/* Are we aborting? */
+	checkAborting(AH);
+
 	switch (cs->comprAlg)
 	{
 		case COMPR_ALG_LIBZ:
@@ -351,6 +355,9 @@ ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
 	/* no minimal chunk size for zlib */
 	while ((cnt = readF(AH, &buf, &buflen)))
 	{
+		/* Are we aborting? */
+		checkAborting(AH);
+
 		zp->next_in = (void *) buf;
 		zp->avail_in = cnt;
 
@@ -411,6 +418,9 @@ ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF)
 
 	while ((cnt = readF(AH, &buf, &buflen)))
 	{
+		/* Are we aborting? */
+		checkAborting(AH);
+
 		ahwrite(buf, 1, cnt, AH);
 	}
 
 
@@ -38,6 +38,7 @@ static struct
 }	on_exit_nicely_list[MAX_ON_EXIT_NICELY];
 
 static int	on_exit_nicely_index;
+void		(*on_exit_msg_func) (const char *modulename, const char *fmt, va_list ap) = vwrite_msg;
 
 #define supports_grant_options(version) ((version) >= 70400)
 
@@ -48,11 +49,21 @@ static bool parseAclItem(const char *item, const char *type,
 static char *copyAclUserName(PQExpBuffer output, char *input);
 static void AddAcl(PQExpBuffer aclbuf, const char *keyword,
 	   const char *subname);
+static PQExpBuffer getThreadLocalPQExpBuffer(void);
 
 #ifdef WIN32
+static void shutdown_parallel_dump_utils(int code, void *unused);
 static bool parallel_init_done = false;
 static DWORD tls_index;
 static DWORD mainThreadId;
+
+static void
+shutdown_parallel_dump_utils(int code, void *unused)
+{
+	/* Call the cleanup function only from the main thread */
+	if (mainThreadId == GetCurrentThreadId())
+		WSACleanup();
+}
 #endif
 
 void
@@ -61,23 +72,29 @@ init_parallel_dump_utils(void)
 #ifdef WIN32
 	if (!parallel_init_done)
 	{
+		WSADATA		wsaData;
+		int			err;
+
 		tls_index = TlsAlloc();
-		parallel_init_done = true;
 		mainThreadId = GetCurrentThreadId();
+		err = WSAStartup(MAKEWORD(2, 2), &wsaData);
+		if (err != 0)
+		{
+			fprintf(stderr, _("WSAStartup failed: %d\n"), err);
+			exit_nicely(1);
+		}
+		on_exit_nicely(shutdown_parallel_dump_utils, NULL);
+		parallel_init_done = true;
 	}
 #endif
 }
 
 /*
- *	Quotes input string if it's not a legitimate SQL identifier as-is.
- *
- *	Note that the returned string must be used before calling fmtId again,
- *	since we re-use the same return buffer each time.  Non-reentrant but
- *	reduces memory leakage. (On Windows the memory leakage will be one buffer
- *	per thread, which is at least better than one per call).
+ * Non-reentrant but reduces memory leakage. (On Windows the memory leakage
+ * will be one buffer per thread, which is at least better than one per call).
  */
-const char *
-fmtId(const char *rawid)
+static PQExpBuffer
+getThreadLocalPQExpBuffer(void)
 {
 	/*
 	 * The Tls code goes awry if we use a static var, so we provide for both
@@ -86,9 +103,6 @@ fmtId(const char *rawid)
 	static PQExpBuffer s_id_return = NULL;
 	PQExpBuffer id_return;
 
-	const char *cp;
-	bool		need_quotes = false;
-
 #ifdef WIN32
 	if (parallel_init_done)
 		id_return = (PQExpBuffer) TlsGetValue(tls_index);		/* 0 when not set */
@@ -118,6 +132,23 @@ fmtId(const char *rawid)
 
 	}
 
+	return id_return;
+}
+
+/*
+ *	Quotes input string if it's not a legitimate SQL identifier as-is.
+ *
+ *	Note that the returned string must be used before calling fmtId again,
+ *	since we re-use the same return buffer each time.
+ */
+const char *
+fmtId(const char *rawid)
+{
+	PQExpBuffer id_return = getThreadLocalPQExpBuffer();
+
+	const char *cp;
+	bool		need_quotes = false;
+
 	/*
 	 * These checks need to match the identifier production in scan.l. Don't
 	 * use islower() etc.
@@ -185,6 +216,35 @@ fmtId(const char *rawid)
 	return id_return->data;
 }
 
+/*
+ * fmtQualifiedId - convert a qualified name to the proper format for
+ * the source database.
+ *
+ * Like fmtId, use the result before calling again.
+ *
+ * Since we call fmtId and it also uses getThreadLocalPQExpBuffer() we cannot
+ * use it until we're finished with calling fmtId().
+ */
+const char *
+fmtQualifiedId(int remoteVersion, const char *schema, const char *id)
+{
+	PQExpBuffer id_return;
+	PQExpBuffer lcl_pqexp = createPQExpBuffer();
+
+	/* Suppress schema name if fetching from pre-7.3 DB */
+	if (remoteVersion >= 70300 && schema && *schema)
+	{
+		appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
+	}
+	appendPQExpBuffer(lcl_pqexp, "%s", fmtId(id));
+
+	id_return = getThreadLocalPQExpBuffer();
+
+	appendPQExpBuffer(id_return, "%s", lcl_pqexp->data);
+	destroyPQExpBuffer(lcl_pqexp);
+
+	return id_return->data;
+}
 
 /*
  * Convert a string value to an SQL string literal and append it to
@@ -1315,7 +1375,7 @@ exit_horribly(const char *modulename, const char *fmt,...)
 	va_list		ap;
 
 	va_start(ap, fmt);
-	vwrite_msg(modulename, fmt, ap);
+	on_exit_msg_func(modulename, fmt, ap);
 	va_end(ap);
 
 	exit_nicely(1);