Content-Length: 36582 | pFad | http://github.com/postgres/postgres/pull/4.patch

thub.com From 541e4632af0ee0cb991f3e4f54f341ad94b8d870 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 13:32:19 -0700 Subject: [PATCH 1/9] Add "decompressor" option to `file_fdw` Parse the new option and validate the the file it references actually exists and is executable. --- contrib/file_fdw/file_fdw.c | 52 +++++++++++++++++++------ contrib/file_fdw/output/file_fdw.source | 2 +- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index c5c797c1a4c76..929525ae6edd1 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -68,6 +68,7 @@ static const struct FileFdwOption valid_options[] = { {"escape", ForeignTableRelationId}, {"null", ForeignTableRelationId}, {"encoding", ForeignTableRelationId}, + {"decompressor", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, /* @@ -186,6 +187,7 @@ file_fdw_validator(PG_FUNCTION_ARGS) List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); Oid catalog = PG_GETARG_OID(1); char *filename = NULL; + char *decompressor = NULL; DefElem *force_not_null = NULL; List *other_options = NIL; ListCell *cell; @@ -243,9 +245,9 @@ file_fdw_validator(PG_FUNCTION_ARGS) } /* - * Separate out filename and force_not_null, since ProcessCopyOptions - * won't accept them. (force_not_null only comes in a boolean - * per-column flavor here.) + * Separate out filename, decompressor, and force_not_null, since + * ProcessCopyOptions won't accept them. (force_not_null only comes in + * a boolean per-column flavor here.) */ if (strcmp(def->defname, "filename") == 0) { @@ -255,6 +257,14 @@ file_fdw_validator(PG_FUNCTION_ARGS) errmsg("conflicting or redundant options"))); filename = defGetString(def); } + else if (strcmp(def->defname, "decompressor") == 0) + { + if (decompressor) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + decompressor = defGetString(def); + } else if (strcmp(def->defname, "force_not_null") == 0) { if (force_not_null) @@ -274,13 +284,28 @@ file_fdw_validator(PG_FUNCTION_ARGS) */ ProcessCopyOptions(NULL, true, other_options); - /* - * Filename option is required for file_fdw foreign tables. - */ - if (catalog == ForeignTableRelationId && filename == NULL) - ereport(ERROR, - (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), - errmsg("filename is required for file_fdw foreign tables"))); + if (catalog == ForeignTableRelationId) + { + /* + * Filename option is required for file_fdw foreign tables. + */ + if (filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), + errmsg("filename is required for file_fdw foreign tables"))); + + + /* + * Decompressors must be executable. + */ + if (decompressor && !access(decompressor, R_OK | X_OK)) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("decompressor must be readable/executable \"%s\": %m", + decompressor))); + } + } PG_RETURN_VOID(); } @@ -338,7 +363,7 @@ fileGetOptions(Oid foreigntableid, options = list_concat(options, get_file_fdw_attribute_options(foreigntableid)); /* - * Separate out the filename. + * Separate out the filename and decompressor. */ *filename = NULL; prev = NULL; @@ -352,6 +377,11 @@ fileGetOptions(Oid foreigntableid, options = list_delete_cell(options, lc, prev); break; } + else if (strcmp(def->defname, "decompressor") == 0) + { + options = list_delete_cell(options, lc, prev); + break; + } prev = lc; } diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 4f90baebd6b09..ff797f02c2fb6 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -123,7 +123,7 @@ ERROR: invalid option "force_not_null" HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR ERROR: invalid option "force_not_null" -HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding, decompressor -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; a | b From 5a06bef4614fd99efb5d17496c6627f8705398cc Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 14:03:35 -0700 Subject: [PATCH 2/9] Populate decompressor in `fileGetOptions` Certain callers will need this, so provide it if found. --- contrib/file_fdw/file_fdw.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 929525ae6edd1..da6016f0d0b8e 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -138,7 +138,7 @@ static bool fileAnalyzeForeignTable(Relation relation, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options); + char **filename, char **decompressor, List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, @@ -330,12 +330,12 @@ is_valid_option(const char *option, Oid context) /* * Fetch the options for a file_fdw foreign table. * - * We have to separate out "filename" from the other options because - * it must not appear in the options list passed to the core COPY code. + * We have to separate out "filename" and "decompressor" from the other options + * because they must not appear in the options passed to the core COPY code. */ static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options) + char **filename, char **decompressor, List **other_options) { ForeignTable *table; ForeignServer *server; @@ -379,6 +379,7 @@ fileGetOptions(Oid foreigntableid, } else if (strcmp(def->defname, "decompressor") == 0) { + *decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); break; } @@ -463,14 +464,15 @@ fileGetForeignRelSize(PlannerInfo *root, Oid foreigntableid) { FileFdwPlanState *fdw_private; + char *decompressor; /* * Fetch options. We only need filename at this point, but we might as * well get everything and not need to re-fetch it later in planning. */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); - fileGetOptions(foreigntableid, - &fdw_private->filename, &fdw_private->options); + fileGetOptions(foreigntableid, &fdw_private->filename, &decompressor, + &fdw_private->options); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -567,11 +569,12 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; + char *decompressor; List *options; /* Fetch options --- we only need filename at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &decompressor, &options); ExplainPropertyText("Foreign File", filename, es); @@ -595,6 +598,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; + char *decompressor; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -607,7 +611,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &decompressor, &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -720,11 +724,12 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; + char *decompressor; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &options); + fileGetOptions(RelationGetRelid(relation), &filename, &decompressor, &options); /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -1006,6 +1011,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; + char *decompressor; List *options; CopyState cstate; ErrorContextCallback errcallback; @@ -1020,7 +1026,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &options); + fileGetOptions(RelationGetRelid(onerel), &filename, &decompressor, &options); /* * Create CopyState from FDW options. From c50688aff63492ea2e398adfd0696f4c05363cdd Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 15:42:45 -0700 Subject: [PATCH 3/9] Switch to storing full program/args; pass it along I was planning to concatenate the program name and file name before each `BeginCopyFrom` invocation, but it seems better to do it in the function that parses options. It's not being done yet but this sets up all the callers to expect it. --- contrib/file_fdw/file_fdw.c | 40 +++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index da6016f0d0b8e..34c074aad7ff8 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -96,6 +96,7 @@ typedef struct FileFdwPlanState typedef struct FileFdwExecutionState { char *filename; /* file to read */ + char *program; /* program/args to use if using compression */ List *options; /* merged COPY options, excluding filename */ CopyState cstate; /* state of reading file */ } FileFdwExecutionState; @@ -464,14 +465,14 @@ fileGetForeignRelSize(PlannerInfo *root, Oid foreigntableid) { FileFdwPlanState *fdw_private; - char *decompressor; + char *program; /* * Fetch options. We only need filename at this point, but we might as * well get everything and not need to re-fetch it later in planning. */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); - fileGetOptions(foreigntableid, &fdw_private->filename, &decompressor, + fileGetOptions(foreigntableid, &fdw_private->filename, &program, &fdw_private->options); baserel->fdw_private = (void *) fdw_private; @@ -569,15 +570,18 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; - char *decompressor; + char *program; List *options; /* Fetch options --- we only need filename at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &decompressor, &options); + &filename, &program, &options); ExplainPropertyText("Foreign File", filename, es); + if (program != NULL) + ExplainPropertyText("Foreign Program", program, es); + /* Suppress file size if we're not showing cost details */ if (es->costs) { @@ -598,7 +602,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; - char *decompressor; + char *program; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -611,7 +615,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &decompressor, &options); + &filename, &program, &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -621,8 +625,8 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) * as to match the expected ScanTupleSlot signature. */ cstate = BeginCopyFrom(node->ss.ss_currentRelation, - filename, - false, + (program != NULL) ? program : filename, + (program != NULL), NIL, options); @@ -632,6 +636,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) */ festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState)); festate->filename = filename; + festate->program = program; festate->options = options; festate->cstate = cstate; @@ -690,12 +695,16 @@ static void fileReScanForeignScan(ForeignScanState *node) { FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; + char *filename_or_program; EndCopyFrom(festate->cstate); + filename_or_program = + (festate->program != NULL) ? festate->program : festate->filename; + festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation, - festate->filename, - false, + filename_or_program, + (festate->program != NULL), NIL, festate->options); } @@ -724,12 +733,12 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; - char *decompressor; + char *program; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &decompressor, &options); + fileGetOptions(RelationGetRelid(relation), &filename, &program, &options); /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -1011,7 +1020,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; - char *decompressor; + char *program; List *options; CopyState cstate; ErrorContextCallback errcallback; @@ -1026,12 +1035,13 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &decompressor, &options); + fileGetOptions(RelationGetRelid(onerel), &filename, &program, &options); /* * Create CopyState from FDW options. */ - cstate = BeginCopyFrom(onerel, filename, false, NIL, options); + cstate = BeginCopyFrom(onerel, (program != NULL) ? program : filename, + (program != NULL), NIL, options); /* * Use per-tuple memory context to prevent leak of memory used to read From 427cfc854ad0efc7e867a9ef6f9879d57773d2f3 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 17:26:41 -0700 Subject: [PATCH 4/9] Parse decompressor option and build program This requires escaping the filename. I went with wrapping it in single quotes and replacing single quotes with "'\''" whenever they occur. This may not be entirely appropriate for Windows installs, but this is a good-enough solution for now. See: http://stackoverflow.com/a/3669819 --- contrib/file_fdw/file_fdw.c | 48 +++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 34c074aad7ff8..a5e44b8a3fe5b 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -139,7 +139,7 @@ static bool fileAnalyzeForeignTable(Relation relation, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, char **decompressor, List **other_options); + char **filename, char **program, List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, @@ -332,11 +332,13 @@ is_valid_option(const char *option, Oid context) * Fetch the options for a file_fdw foreign table. * * We have to separate out "filename" and "decompressor" from the other options - * because they must not appear in the options passed to the core COPY code. + * because they must not appear in the options passed to the core COPY code. If + * a decompressor is present, a string consisting of it concatenated to the + * escaped file name is stored at `program`. */ static void fileGetOptions(Oid foreigntableid, - char **filename, char **decompressor, List **other_options) + char **filename, char **program, List **other_options) { ForeignTable *table; ForeignServer *server; @@ -345,6 +347,9 @@ fileGetOptions(Oid foreigntableid, ListCell *lc, *prev; + char *decompressor = NULL; + char *write_ptr, *token, *input, *read_ptr; + /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -380,7 +385,7 @@ fileGetOptions(Oid foreigntableid, } else if (strcmp(def->defname, "decompressor") == 0) { - *decompressor = defGetString(def); + decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); break; } @@ -394,6 +399,41 @@ fileGetOptions(Oid foreigntableid, if (*filename == NULL) elog(ERROR, "filename is required for file_fdw foreign tables"); + /* + * Set up the decompressor if present. + */ + if (decompressor != NULL) + { + /* + * We will escape the filename by wrapping it in single quotes. To deal + * with single quotes in the name itself, we will replace all single + * quotes with the string "'\''", which is four characters long. Strings + * of only single quotes will need four times as much space, plus the + * room for the quotes, a space, and a null terminator. + */ + *program = palloc0( + (strlen(decompressor) + (4 * strlen(filename)) + 4) + * sizeof(char)); + + write_ptr = stpcpy(program, decompressor); + write_ptr = stpcpy(write_ptr, " '"); + + /* We're mutating filename so copy it */ + input = read_ptr = pstrdup(filename); + + write_ptr = stpcpy(write_ptr, strsep(&read_ptr, "'")); + + while ((token = strsep(&read_ptr, "'")) != NULL) + { + write_ptr = stpcpy(write_ptr, "'\\''"); + write_ptr = stpcpy(write_ptr, token); + } + + stpcpy(write_ptr, "'"); + + pfree(input); + } + *other_options = options; } From a5102c9712d105ba008f7b958d1753279b06959f Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 18:29:11 -0700 Subject: [PATCH 5/9] Bugfixes for decompressor file_fdw option Found some issues here and there. --- contrib/file_fdw/file_fdw.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index a5e44b8a3fe5b..bb491500633da 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -299,7 +299,7 @@ file_fdw_validator(PG_FUNCTION_ARGS) /* * Decompressors must be executable. */ - if (decompressor && !access(decompressor, R_OK | X_OK)) + if (decompressor && (access(decompressor, R_OK | X_OK) != 0)) { ereport(ERROR, (errcode_for_file_access(), @@ -350,6 +350,9 @@ fileGetOptions(Oid foreigntableid, char *decompressor = NULL; char *write_ptr, *token, *input, *read_ptr; + bool gzip_found, filename_found; + gzip_found = filename_found = FALSE; + /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -381,14 +384,17 @@ fileGetOptions(Oid foreigntableid, { *filename = defGetString(def); options = list_delete_cell(options, lc, prev); - break; + filename_found = TRUE; } else if (strcmp(def->defname, "decompressor") == 0) { decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); - break; + gzip_found = TRUE; } + + if (filename_found && gzip_found) break; + prev = lc; } @@ -412,14 +418,14 @@ fileGetOptions(Oid foreigntableid, * room for the quotes, a space, and a null terminator. */ *program = palloc0( - (strlen(decompressor) + (4 * strlen(filename)) + 4) + (strlen(decompressor) + (4 * strlen(*filename)) + 4) * sizeof(char)); - write_ptr = stpcpy(program, decompressor); + write_ptr = stpcpy(*program, decompressor); write_ptr = stpcpy(write_ptr, " '"); /* We're mutating filename so copy it */ - input = read_ptr = pstrdup(filename); + input = read_ptr = pstrdup(*filename); write_ptr = stpcpy(write_ptr, strsep(&read_ptr, "'")); From 51fa91adf1479767c674611d44f9aef9679637e5 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 12:16:31 -0700 Subject: [PATCH 6/9] Fix population of program, null list issue Turns out it's unsafe to modify a list while iterating over it, since the delete method actually frees the node (and possibly the list, too!) rather than just updating the next/prev pointers. --- contrib/file_fdw/file_fdw.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index bb491500633da..f1ee3e6653e3b 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -347,12 +347,9 @@ fileGetOptions(Oid foreigntableid, ListCell *lc, *prev; - char *decompressor = NULL; + char *decompressor; char *write_ptr, *token, *input, *read_ptr; - bool gzip_found, filename_found; - gzip_found = filename_found = FALSE; - /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -372,7 +369,7 @@ fileGetOptions(Oid foreigntableid, options = list_concat(options, get_file_fdw_attribute_options(foreigntableid)); /* - * Separate out the filename and decompressor. + * Separate out the filename. */ *filename = NULL; prev = NULL; @@ -384,17 +381,29 @@ fileGetOptions(Oid foreigntableid, { *filename = defGetString(def); options = list_delete_cell(options, lc, prev); - filename_found = TRUE; + break; } - else if (strcmp(def->defname, "decompressor") == 0) + + prev = lc; + } + + /* + * Separate out the decompressor, which will be used to calculate program. + */ + decompressor = NULL; + *program = NULL; + prev = NULL; + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "decompressor") == 0) { decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); - gzip_found = TRUE; + break; } - if (filename_found && gzip_found) break; - prev = lc; } From 04039740057fdaab6577eadabcd8dd9b70ec71b8 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 12:45:18 -0700 Subject: [PATCH 7/9] Add rudimentary `EXPLAIN` support for programs The compression guess is really only used for finding out the foreign relation size if no `ANALYZE` has yet been performed. --- contrib/file_fdw/file_fdw.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index f1ee3e6653e3b..65a5bf9d7c889 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -46,6 +46,9 @@ struct FileFdwOption Oid optcontext; /* Oid of catalog in which option may appear */ }; +/* Totally made-up compression ratio */ +static const double program_compression_ratio = 2.7708899835032f; + /* * Valid options for file_fdw. * These options are based on the options for COPY FROM command. @@ -85,6 +88,7 @@ static const struct FileFdwOption valid_options[] = { typedef struct FileFdwPlanState { char *filename; /* file to read */ + bool is_program; /* whether a program is used to read the file */ List *options; /* merged COPY options, excluding filename */ BlockNumber pages; /* estimate of file's physical size */ double ntuples; /* estimate of number of rows in file */ @@ -96,7 +100,7 @@ typedef struct FileFdwPlanState typedef struct FileFdwExecutionState { char *filename; /* file to read */ - char *program; /* program/args to use if using compression */ + char *program; /* optional program to use to read file */ List *options; /* merged COPY options, excluding filename */ CopyState cstate; /* state of reading file */ } FileFdwExecutionState; @@ -529,6 +533,7 @@ fileGetForeignRelSize(PlannerInfo *root, fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); fileGetOptions(foreigntableid, &fdw_private->filename, &program, &fdw_private->options); + fdw_private->is_program = (program != NULL); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -635,7 +640,11 @@ fileExplainForeignScan(ForeignScanState *node, ExplainState *es) ExplainPropertyText("Foreign File", filename, es); if (program != NULL) + { ExplainPropertyText("Foreign Program", program, es); + ExplainPropertyFloat("Foreign Program Compression Est.", + program_compression_ratio, 4, es); + } /* Suppress file size if we're not showing cost details */ if (es->costs) @@ -999,6 +1008,8 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel, MAXALIGN(sizeof(HeapTupleHeaderData)); ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width); + if (fdw_private->is_program) + ntuples *= program_compression_ratio; } fdw_private->ntuples = ntuples; From 32ab3e2388a5ca69a4a57f2e0217557bcc2311d8 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 13:24:51 -0700 Subject: [PATCH 8/9] Add decompressor tests to file_fdw suite Duplicates the agg.csv-based tests, but using a decompressor. Includes a Perl-based decompressor since the codebase already depends on Perl and I didn't want to hardcode a path to the gunzip executable. --- contrib/file_fdw/data/agg.csv.gz | Bin 0 -> 69 bytes contrib/file_fdw/input/file_fdw.source | 15 ++++++++ contrib/file_fdw/output/file_fdw.source | 47 +++++++++++++++++++++++- contrib/file_fdw/scripts/gunzip.pl | 10 +++++ doc/src/sgml/file-fdw.sgml | 12 ++++++ 5 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 contrib/file_fdw/data/agg.csv.gz create mode 100755 contrib/file_fdw/scripts/gunzip.pl diff --git a/contrib/file_fdw/data/agg.csv.gz b/contrib/file_fdw/data/agg.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..83773d7f3c83f9b5cc8f98f7a0fe58f894315c81 GIT binary patch literal 69 zcmV-L0J{GliwFp;&Ad_o17T-pE@N|c01e795&$p+1i;>7%#k2QbN_!;bX#oXu=G(3 b7{Az9bJB6E|E&u=kg;g5} literal 0 HcmV?d00001 diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index f7fd28d44d7be..4185b97cb8d00 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -72,6 +72,11 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -97,7 +102,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; SELECT * FROM agg_csv ORDER BY a; +SELECT * FROM agg_csv_gz ORDER BY a; SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; -- error context report tests SELECT * FROM agg_bad; -- ERROR @@ -111,6 +118,14 @@ EXECUTE st(100); EXECUTE st(100); DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); +EXECUTE st(100); +DEALLOCATE st; + -- tableoid SELECT tableoid::regclass, b FROM agg_csv; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index ff797f02c2fb6..ca1547cdc1c15 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -88,6 +88,11 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -140,6 +145,14 @@ SELECT * FROM agg_csv ORDER BY a; 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 100 | 99.097 +(3 rows) + SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; a | b | a | b -----+---------+-----+--------- @@ -148,6 +161,14 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; 100 | 99.097 | 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + a | b | a | b +-----+---------+-----+--------- + 0 | 0.09561 | 0 | 0.09561 + 42 | 324.78 | 42 | 324.78 + 100 | 99.097 | 100 | 99.097 +(3 rows) + -- error context report tests SELECT * FROM agg_bad; -- ERROR ERROR: invalid input syntax for type real: "aaa" @@ -173,6 +194,29 @@ EXECUTE st(100); 100 | 99.097 (1 row) +DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; + Foreign Scan on public.agg_csv_gz + Output: a, b + Foreign File: @abs_srcdir@/data/agg.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/agg.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + DEALLOCATE st; -- tableoid SELECT tableoid::regclass, b FROM agg_csv; @@ -243,13 +287,14 @@ SET ROLE file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 8 other objects +NOTICE: drop cascades to 9 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for file_fdw_user drop cascades to user mapping for file_fdw_superuser drop cascades to user mapping for no_priv_user drop cascades to foreign table agg_text drop cascades to foreign table agg_csv +drop cascades to foreign table agg_csv_gz drop cascades to foreign table agg_bad drop cascades to foreign table text_csv DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/scripts/gunzip.pl b/contrib/file_fdw/scripts/gunzip.pl new file mode 100755 index 0000000000000..14b1e6d15182c --- /dev/null +++ b/contrib/file_fdw/scripts/gunzip.pl @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +# Decompress the gzipped file at the path specified by the ARGV[0] +# Usage: gunzip.pl /path/to/compressed/file.gz + +use strict; + +use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; + +gunzip $ARGV[0] => '-' or die "could not decompress: GunzipError\n"; diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 9385b26d34d51..4bfdca0aaafdf 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -32,6 +32,18 @@ + + decompressor + + + + Specifies an external program to be used to decompress the file. Such + programs should accept the filename as an argument and decompress data to + stdout. The program must be readable and executable by the server process. + + + + format From b4f5ceef55999769142c3a87ae3cd2654d21419c Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 13:36:40 -0700 Subject: [PATCH 9/9] Add filename escaping test for file_fdw It's OK to have single quotes in filenames. --- contrib/file_fdw/data/it's_ok.csv.gz | Bin 0 -> 69 bytes contrib/file_fdw/input/file_fdw.source | 9 +++++++++ contrib/file_fdw/output/file_fdw.source | 17 ++++++++++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 contrib/file_fdw/data/it's_ok.csv.gz diff --git a/contrib/file_fdw/data/it's_ok.csv.gz b/contrib/file_fdw/data/it's_ok.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..83773d7f3c83f9b5cc8f98f7a0fe58f894315c81 GIT binary patch literal 69 zcmV-L0J{GliwFp;&Ad_o17T-pE@N|c01e795&$p+1i;>7%#k2QbN_!;bX#oXu=G(3 b7{Az9bJB6E|E&u=kg;g5} literal 0 HcmV?d00001 diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index 4185b97cb8d00..7980619b6ecc8 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -77,6 +77,11 @@ CREATE FOREIGN TABLE agg_csv_gz ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -126,6 +131,10 @@ EXECUTE st(100); EXECUTE st(100); DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; +\t off + -- tableoid SELECT tableoid::regclass, b FROM agg_csv; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index ca1547cdc1c15..c08d7bd19be44 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -93,6 +93,11 @@ CREATE FOREIGN TABLE agg_csv_gz ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -218,6 +223,15 @@ EXECUTE st(100); (1 row) DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; + Foreign Scan on public.it_is_ok + Output: a, b + Foreign File: @abs_srcdir@/data/it's_ok.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/it'\''s_ok.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off -- tableoid SELECT tableoid::regclass, b FROM agg_csv; tableoid | b @@ -287,7 +301,7 @@ SET ROLE file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 9 other objects +NOTICE: drop cascades to 10 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for file_fdw_user drop cascades to user mapping for file_fdw_superuser @@ -295,6 +309,7 @@ drop cascades to user mapping for no_priv_user drop cascades to foreign table agg_text drop cascades to foreign table agg_csv drop cascades to foreign table agg_csv_gz +drop cascades to foreign table it_is_ok drop cascades to foreign table agg_bad drop cascades to foreign table text_csv DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user;








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/postgres/postgres/pull/4.patch

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy