From 541e4632af0ee0cb991f3e4f54f341ad94b8d870 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 13:32:19 -0700 Subject: [PATCH 1/9] Add "decompressor" option to `file_fdw` Parse the new option and validate the the file it references actually exists and is executable. --- contrib/file_fdw/file_fdw.c | 52 +++++++++++++++++++------ contrib/file_fdw/output/file_fdw.source | 2 +- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index c5c797c1a4c76..929525ae6edd1 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -68,6 +68,7 @@ static const struct FileFdwOption valid_options[] = { {"escape", ForeignTableRelationId}, {"null", ForeignTableRelationId}, {"encoding", ForeignTableRelationId}, + {"decompressor", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, /* @@ -186,6 +187,7 @@ file_fdw_validator(PG_FUNCTION_ARGS) List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); Oid catalog = PG_GETARG_OID(1); char *filename = NULL; + char *decompressor = NULL; DefElem *force_not_null = NULL; List *other_options = NIL; ListCell *cell; @@ -243,9 +245,9 @@ file_fdw_validator(PG_FUNCTION_ARGS) } /* - * Separate out filename and force_not_null, since ProcessCopyOptions - * won't accept them. (force_not_null only comes in a boolean - * per-column flavor here.) + * Separate out filename, decompressor, and force_not_null, since + * ProcessCopyOptions won't accept them. (force_not_null only comes in + * a boolean per-column flavor here.) */ if (strcmp(def->defname, "filename") == 0) { @@ -255,6 +257,14 @@ file_fdw_validator(PG_FUNCTION_ARGS) errmsg("conflicting or redundant options"))); filename = defGetString(def); } + else if (strcmp(def->defname, "decompressor") == 0) + { + if (decompressor) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + decompressor = defGetString(def); + } else if (strcmp(def->defname, "force_not_null") == 0) { if (force_not_null) @@ -274,13 +284,28 @@ file_fdw_validator(PG_FUNCTION_ARGS) */ ProcessCopyOptions(NULL, true, other_options); - /* - * Filename option is required for file_fdw foreign tables. - */ - if (catalog == ForeignTableRelationId && filename == NULL) - ereport(ERROR, - (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), - errmsg("filename is required for file_fdw foreign tables"))); + if (catalog == ForeignTableRelationId) + { + /* + * Filename option is required for file_fdw foreign tables. + */ + if (filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), + errmsg("filename is required for file_fdw foreign tables"))); + + + /* + * Decompressors must be executable. + */ + if (decompressor && !access(decompressor, R_OK | X_OK)) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("decompressor must be readable/executable \"%s\": %m", + decompressor))); + } + } PG_RETURN_VOID(); } @@ -338,7 +363,7 @@ fileGetOptions(Oid foreigntableid, options = list_concat(options, get_file_fdw_attribute_options(foreigntableid)); /* - * Separate out the filename. + * Separate out the filename and decompressor. */ *filename = NULL; prev = NULL; @@ -352,6 +377,11 @@ fileGetOptions(Oid foreigntableid, options = list_delete_cell(options, lc, prev); break; } + else if (strcmp(def->defname, "decompressor") == 0) + { + options = list_delete_cell(options, lc, prev); + break; + } prev = lc; } diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 4f90baebd6b09..ff797f02c2fb6 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -123,7 +123,7 @@ ERROR: invalid option "force_not_null" HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR ERROR: invalid option "force_not_null" -HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding, decompressor -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; a | b From 5a06bef4614fd99efb5d17496c6627f8705398cc Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 14:03:35 -0700 Subject: [PATCH 2/9] Populate decompressor in `fileGetOptions` Certain callers will need this, so provide it if found. --- contrib/file_fdw/file_fdw.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 929525ae6edd1..da6016f0d0b8e 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -138,7 +138,7 @@ static bool fileAnalyzeForeignTable(Relation relation, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options); + char **filename, char **decompressor, List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, @@ -330,12 +330,12 @@ is_valid_option(const char *option, Oid context) /* * Fetch the options for a file_fdw foreign table. * - * We have to separate out "filename" from the other options because - * it must not appear in the options list passed to the core COPY code. + * We have to separate out "filename" and "decompressor" from the other options + * because they must not appear in the options passed to the core COPY code. */ static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options) + char **filename, char **decompressor, List **other_options) { ForeignTable *table; ForeignServer *server; @@ -379,6 +379,7 @@ fileGetOptions(Oid foreigntableid, } else if (strcmp(def->defname, "decompressor") == 0) { + *decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); break; } @@ -463,14 +464,15 @@ fileGetForeignRelSize(PlannerInfo *root, Oid foreigntableid) { FileFdwPlanState *fdw_private; + char *decompressor; /* * Fetch options. We only need filename at this point, but we might as * well get everything and not need to re-fetch it later in planning. */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); - fileGetOptions(foreigntableid, - &fdw_private->filename, &fdw_private->options); + fileGetOptions(foreigntableid, &fdw_private->filename, &decompressor, + &fdw_private->options); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -567,11 +569,12 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; + char *decompressor; List *options; /* Fetch options --- we only need filename at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &decompressor, &options); ExplainPropertyText("Foreign File", filename, es); @@ -595,6 +598,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; + char *decompressor; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -607,7 +611,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &decompressor, &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -720,11 +724,12 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; + char *decompressor; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &options); + fileGetOptions(RelationGetRelid(relation), &filename, &decompressor, &options); /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -1006,6 +1011,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; + char *decompressor; List *options; CopyState cstate; ErrorContextCallback errcallback; @@ -1020,7 +1026,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &options); + fileGetOptions(RelationGetRelid(onerel), &filename, &decompressor, &options); /* * Create CopyState from FDW options. From c50688aff63492ea2e398adfd0696f4c05363cdd Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 15:42:45 -0700 Subject: [PATCH 3/9] Switch to storing full program/args; pass it along I was planning to concatenate the program name and file name before each `BeginCopyFrom` invocation, but it seems better to do it in the function that parses options. It's not being done yet but this sets up all the callers to expect it. --- contrib/file_fdw/file_fdw.c | 40 +++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index da6016f0d0b8e..34c074aad7ff8 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -96,6 +96,7 @@ typedef struct FileFdwPlanState typedef struct FileFdwExecutionState { char *filename; /* file to read */ + char *program; /* program/args to use if using compression */ List *options; /* merged COPY options, excluding filename */ CopyState cstate; /* state of reading file */ } FileFdwExecutionState; @@ -464,14 +465,14 @@ fileGetForeignRelSize(PlannerInfo *root, Oid foreigntableid) { FileFdwPlanState *fdw_private; - char *decompressor; + char *program; /* * Fetch options. We only need filename at this point, but we might as * well get everything and not need to re-fetch it later in planning. */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); - fileGetOptions(foreigntableid, &fdw_private->filename, &decompressor, + fileGetOptions(foreigntableid, &fdw_private->filename, &program, &fdw_private->options); baserel->fdw_private = (void *) fdw_private; @@ -569,15 +570,18 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; - char *decompressor; + char *program; List *options; /* Fetch options --- we only need filename at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &decompressor, &options); + &filename, &program, &options); ExplainPropertyText("Foreign File", filename, es); + if (program != NULL) + ExplainPropertyText("Foreign Program", program, es); + /* Suppress file size if we're not showing cost details */ if (es->costs) { @@ -598,7 +602,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; - char *decompressor; + char *program; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -611,7 +615,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &decompressor, &options); + &filename, &program, &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -621,8 +625,8 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) * as to match the expected ScanTupleSlot signature. */ cstate = BeginCopyFrom(node->ss.ss_currentRelation, - filename, - false, + (program != NULL) ? program : filename, + (program != NULL), NIL, options); @@ -632,6 +636,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) */ festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState)); festate->filename = filename; + festate->program = program; festate->options = options; festate->cstate = cstate; @@ -690,12 +695,16 @@ static void fileReScanForeignScan(ForeignScanState *node) { FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; + char *filename_or_program; EndCopyFrom(festate->cstate); + filename_or_program = + (festate->program != NULL) ? festate->program : festate->filename; + festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation, - festate->filename, - false, + filename_or_program, + (festate->program != NULL), NIL, festate->options); } @@ -724,12 +733,12 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; - char *decompressor; + char *program; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &decompressor, &options); + fileGetOptions(RelationGetRelid(relation), &filename, &program, &options); /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -1011,7 +1020,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; - char *decompressor; + char *program; List *options; CopyState cstate; ErrorContextCallback errcallback; @@ -1026,12 +1035,13 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &decompressor, &options); + fileGetOptions(RelationGetRelid(onerel), &filename, &program, &options); /* * Create CopyState from FDW options. */ - cstate = BeginCopyFrom(onerel, filename, false, NIL, options); + cstate = BeginCopyFrom(onerel, (program != NULL) ? program : filename, + (program != NULL), NIL, options); /* * Use per-tuple memory context to prevent leak of memory used to read From 427cfc854ad0efc7e867a9ef6f9879d57773d2f3 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 17:26:41 -0700 Subject: [PATCH 4/9] Parse decompressor option and build program This requires escaping the filename. I went with wrapping it in single quotes and replacing single quotes with "'\''" whenever they occur. This may not be entirely appropriate for Windows installs, but this is a good-enough solution for now. See: http://stackoverflow.com/a/3669819 --- contrib/file_fdw/file_fdw.c | 48 +++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 34c074aad7ff8..a5e44b8a3fe5b 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -139,7 +139,7 @@ static bool fileAnalyzeForeignTable(Relation relation, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, char **decompressor, List **other_options); + char **filename, char **program, List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, @@ -332,11 +332,13 @@ is_valid_option(const char *option, Oid context) * Fetch the options for a file_fdw foreign table. * * We have to separate out "filename" and "decompressor" from the other options - * because they must not appear in the options passed to the core COPY code. + * because they must not appear in the options passed to the core COPY code. If + * a decompressor is present, a string consisting of it concatenated to the + * escaped file name is stored at `program`. */ static void fileGetOptions(Oid foreigntableid, - char **filename, char **decompressor, List **other_options) + char **filename, char **program, List **other_options) { ForeignTable *table; ForeignServer *server; @@ -345,6 +347,9 @@ fileGetOptions(Oid foreigntableid, ListCell *lc, *prev; + char *decompressor = NULL; + char *write_ptr, *token, *input, *read_ptr; + /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -380,7 +385,7 @@ fileGetOptions(Oid foreigntableid, } else if (strcmp(def->defname, "decompressor") == 0) { - *decompressor = defGetString(def); + decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); break; } @@ -394,6 +399,41 @@ fileGetOptions(Oid foreigntableid, if (*filename == NULL) elog(ERROR, "filename is required for file_fdw foreign tables"); + /* + * Set up the decompressor if present. + */ + if (decompressor != NULL) + { + /* + * We will escape the filename by wrapping it in single quotes. To deal + * with single quotes in the name itself, we will replace all single + * quotes with the string "'\''", which is four characters long. Strings + * of only single quotes will need four times as much space, plus the + * room for the quotes, a space, and a null terminator. + */ + *program = palloc0( + (strlen(decompressor) + (4 * strlen(filename)) + 4) + * sizeof(char)); + + write_ptr = stpcpy(program, decompressor); + write_ptr = stpcpy(write_ptr, " '"); + + /* We're mutating filename so copy it */ + input = read_ptr = pstrdup(filename); + + write_ptr = stpcpy(write_ptr, strsep(&read_ptr, "'")); + + while ((token = strsep(&read_ptr, "'")) != NULL) + { + write_ptr = stpcpy(write_ptr, "'\\''"); + write_ptr = stpcpy(write_ptr, token); + } + + stpcpy(write_ptr, "'"); + + pfree(input); + } + *other_options = options; } From a5102c9712d105ba008f7b958d1753279b06959f Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 30 Dec 2013 18:29:11 -0700 Subject: [PATCH 5/9] Bugfixes for decompressor file_fdw option Found some issues here and there. --- contrib/file_fdw/file_fdw.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index a5e44b8a3fe5b..bb491500633da 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -299,7 +299,7 @@ file_fdw_validator(PG_FUNCTION_ARGS) /* * Decompressors must be executable. */ - if (decompressor && !access(decompressor, R_OK | X_OK)) + if (decompressor && (access(decompressor, R_OK | X_OK) != 0)) { ereport(ERROR, (errcode_for_file_access(), @@ -350,6 +350,9 @@ fileGetOptions(Oid foreigntableid, char *decompressor = NULL; char *write_ptr, *token, *input, *read_ptr; + bool gzip_found, filename_found; + gzip_found = filename_found = FALSE; + /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -381,14 +384,17 @@ fileGetOptions(Oid foreigntableid, { *filename = defGetString(def); options = list_delete_cell(options, lc, prev); - break; + filename_found = TRUE; } else if (strcmp(def->defname, "decompressor") == 0) { decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); - break; + gzip_found = TRUE; } + + if (filename_found && gzip_found) break; + prev = lc; } @@ -412,14 +418,14 @@ fileGetOptions(Oid foreigntableid, * room for the quotes, a space, and a null terminator. */ *program = palloc0( - (strlen(decompressor) + (4 * strlen(filename)) + 4) + (strlen(decompressor) + (4 * strlen(*filename)) + 4) * sizeof(char)); - write_ptr = stpcpy(program, decompressor); + write_ptr = stpcpy(*program, decompressor); write_ptr = stpcpy(write_ptr, " '"); /* We're mutating filename so copy it */ - input = read_ptr = pstrdup(filename); + input = read_ptr = pstrdup(*filename); write_ptr = stpcpy(write_ptr, strsep(&read_ptr, "'")); From 51fa91adf1479767c674611d44f9aef9679637e5 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 12:16:31 -0700 Subject: [PATCH 6/9] Fix population of program, null list issue Turns out it's unsafe to modify a list while iterating over it, since the delete method actually frees the node (and possibly the list, too!) rather than just updating the next/prev pointers. --- contrib/file_fdw/file_fdw.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index bb491500633da..f1ee3e6653e3b 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -347,12 +347,9 @@ fileGetOptions(Oid foreigntableid, ListCell *lc, *prev; - char *decompressor = NULL; + char *decompressor; char *write_ptr, *token, *input, *read_ptr; - bool gzip_found, filename_found; - gzip_found = filename_found = FALSE; - /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -372,7 +369,7 @@ fileGetOptions(Oid foreigntableid, options = list_concat(options, get_file_fdw_attribute_options(foreigntableid)); /* - * Separate out the filename and decompressor. + * Separate out the filename. */ *filename = NULL; prev = NULL; @@ -384,17 +381,29 @@ fileGetOptions(Oid foreigntableid, { *filename = defGetString(def); options = list_delete_cell(options, lc, prev); - filename_found = TRUE; + break; } - else if (strcmp(def->defname, "decompressor") == 0) + + prev = lc; + } + + /* + * Separate out the decompressor, which will be used to calculate program. + */ + decompressor = NULL; + *program = NULL; + prev = NULL; + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "decompressor") == 0) { decompressor = defGetString(def); options = list_delete_cell(options, lc, prev); - gzip_found = TRUE; + break; } - if (filename_found && gzip_found) break; - prev = lc; } From 04039740057fdaab6577eadabcd8dd9b70ec71b8 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 12:45:18 -0700 Subject: [PATCH 7/9] Add rudimentary `EXPLAIN` support for programs The compression guess is really only used for finding out the foreign relation size if no `ANALYZE` has yet been performed. --- contrib/file_fdw/file_fdw.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index f1ee3e6653e3b..65a5bf9d7c889 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -46,6 +46,9 @@ struct FileFdwOption Oid optcontext; /* Oid of catalog in which option may appear */ }; +/* Totally made-up compression ratio */ +static const double program_compression_ratio = 2.7708899835032f; + /* * Valid options for file_fdw. * These options are based on the options for COPY FROM command. @@ -85,6 +88,7 @@ static const struct FileFdwOption valid_options[] = { typedef struct FileFdwPlanState { char *filename; /* file to read */ + bool is_program; /* whether a program is used to read the file */ List *options; /* merged COPY options, excluding filename */ BlockNumber pages; /* estimate of file's physical size */ double ntuples; /* estimate of number of rows in file */ @@ -96,7 +100,7 @@ typedef struct FileFdwPlanState typedef struct FileFdwExecutionState { char *filename; /* file to read */ - char *program; /* program/args to use if using compression */ + char *program; /* optional program to use to read file */ List *options; /* merged COPY options, excluding filename */ CopyState cstate; /* state of reading file */ } FileFdwExecutionState; @@ -529,6 +533,7 @@ fileGetForeignRelSize(PlannerInfo *root, fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); fileGetOptions(foreigntableid, &fdw_private->filename, &program, &fdw_private->options); + fdw_private->is_program = (program != NULL); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -635,7 +640,11 @@ fileExplainForeignScan(ForeignScanState *node, ExplainState *es) ExplainPropertyText("Foreign File", filename, es); if (program != NULL) + { ExplainPropertyText("Foreign Program", program, es); + ExplainPropertyFloat("Foreign Program Compression Est.", + program_compression_ratio, 4, es); + } /* Suppress file size if we're not showing cost details */ if (es->costs) @@ -999,6 +1008,8 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel, MAXALIGN(sizeof(HeapTupleHeaderData)); ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width); + if (fdw_private->is_program) + ntuples *= program_compression_ratio; } fdw_private->ntuples = ntuples; From 32ab3e2388a5ca69a4a57f2e0217557bcc2311d8 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 13:24:51 -0700 Subject: [PATCH 8/9] Add decompressor tests to file_fdw suite Duplicates the agg.csv-based tests, but using a decompressor. Includes a Perl-based decompressor since the codebase already depends on Perl and I didn't want to hardcode a path to the gunzip executable. --- contrib/file_fdw/data/agg.csv.gz | Bin 0 -> 69 bytes contrib/file_fdw/input/file_fdw.source | 15 ++++++++ contrib/file_fdw/output/file_fdw.source | 47 +++++++++++++++++++++++- contrib/file_fdw/scripts/gunzip.pl | 10 +++++ doc/src/sgml/file-fdw.sgml | 12 ++++++ 5 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 contrib/file_fdw/data/agg.csv.gz create mode 100755 contrib/file_fdw/scripts/gunzip.pl diff --git a/contrib/file_fdw/data/agg.csv.gz b/contrib/file_fdw/data/agg.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..83773d7f3c83f9b5cc8f98f7a0fe58f894315c81 GIT binary patch literal 69 zcmV-L0J{GliwFp;&Ad_o17T-pE@N|c01e795&$p+1i;>7%#k2QbN_!;bX#oXu=G(3 b7{Az9bJB6E|E&u=kg;g5} literal 0 HcmV?d00001 diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index f7fd28d44d7be..4185b97cb8d00 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -72,6 +72,11 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -97,7 +102,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; SELECT * FROM agg_csv ORDER BY a; +SELECT * FROM agg_csv_gz ORDER BY a; SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; -- error context report tests SELECT * FROM agg_bad; -- ERROR @@ -111,6 +118,14 @@ EXECUTE st(100); EXECUTE st(100); DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); +EXECUTE st(100); +DEALLOCATE st; + -- tableoid SELECT tableoid::regclass, b FROM agg_csv; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index ff797f02c2fb6..ca1547cdc1c15 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -88,6 +88,11 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -140,6 +145,14 @@ SELECT * FROM agg_csv ORDER BY a; 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 100 | 99.097 +(3 rows) + SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; a | b | a | b -----+---------+-----+--------- @@ -148,6 +161,14 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; 100 | 99.097 | 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + a | b | a | b +-----+---------+-----+--------- + 0 | 0.09561 | 0 | 0.09561 + 42 | 324.78 | 42 | 324.78 + 100 | 99.097 | 100 | 99.097 +(3 rows) + -- error context report tests SELECT * FROM agg_bad; -- ERROR ERROR: invalid input syntax for type real: "aaa" @@ -173,6 +194,29 @@ EXECUTE st(100); 100 | 99.097 (1 row) +DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; + Foreign Scan on public.agg_csv_gz + Output: a, b + Foreign File: @abs_srcdir@/data/agg.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/agg.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + DEALLOCATE st; -- tableoid SELECT tableoid::regclass, b FROM agg_csv; @@ -243,13 +287,14 @@ SET ROLE file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 8 other objects +NOTICE: drop cascades to 9 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for file_fdw_user drop cascades to user mapping for file_fdw_superuser drop cascades to user mapping for no_priv_user drop cascades to foreign table agg_text drop cascades to foreign table agg_csv +drop cascades to foreign table agg_csv_gz drop cascades to foreign table agg_bad drop cascades to foreign table text_csv DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/scripts/gunzip.pl b/contrib/file_fdw/scripts/gunzip.pl new file mode 100755 index 0000000000000..14b1e6d15182c --- /dev/null +++ b/contrib/file_fdw/scripts/gunzip.pl @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +# Decompress the gzipped file at the path specified by the ARGV[0] +# Usage: gunzip.pl /path/to/compressed/file.gz + +use strict; + +use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; + +gunzip $ARGV[0] => '-' or die "could not decompress: GunzipError\n"; diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 9385b26d34d51..4bfdca0aaafdf 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -32,6 +32,18 @@ + + decompressor + + + + Specifies an external program to be used to decompress the file. Such + programs should accept the filename as an argument and decompress data to + stdout. The program must be readable and executable by the server process. + + + + format From b4f5ceef55999769142c3a87ae3cd2654d21419c Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 31 Dec 2013 13:36:40 -0700 Subject: [PATCH 9/9] Add filename escaping test for file_fdw It's OK to have single quotes in filenames. --- contrib/file_fdw/data/it's_ok.csv.gz | Bin 0 -> 69 bytes contrib/file_fdw/input/file_fdw.source | 9 +++++++++ contrib/file_fdw/output/file_fdw.source | 17 ++++++++++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 contrib/file_fdw/data/it's_ok.csv.gz diff --git a/contrib/file_fdw/data/it's_ok.csv.gz b/contrib/file_fdw/data/it's_ok.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..83773d7f3c83f9b5cc8f98f7a0fe58f894315c81 GIT binary patch literal 69 zcmV-L0J{GliwFp;&Ad_o17T-pE@N|c01e795&$p+1i;>7%#k2QbN_!;bX#oXu=G(3 b7{Az9bJB6E|E&u=kg;g5} literal 0 HcmV?d00001 diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index 4185b97cb8d00..7980619b6ecc8 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -77,6 +77,11 @@ CREATE FOREIGN TABLE agg_csv_gz ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -126,6 +131,10 @@ EXECUTE st(100); EXECUTE st(100); DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; +\t off + -- tableoid SELECT tableoid::regclass, b FROM agg_csv; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index ca1547cdc1c15..c08d7bd19be44 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -93,6 +93,11 @@ CREATE FOREIGN TABLE agg_csv_gz ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -218,6 +223,15 @@ EXECUTE st(100); (1 row) DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; + Foreign Scan on public.it_is_ok + Output: a, b + Foreign File: @abs_srcdir@/data/it's_ok.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/it'\''s_ok.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off -- tableoid SELECT tableoid::regclass, b FROM agg_csv; tableoid | b @@ -287,7 +301,7 @@ SET ROLE file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 9 other objects +NOTICE: drop cascades to 10 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for file_fdw_user drop cascades to user mapping for file_fdw_superuser @@ -295,6 +309,7 @@ drop cascades to user mapping for no_priv_user drop cascades to foreign table agg_text drop cascades to foreign table agg_csv drop cascades to foreign table agg_csv_gz +drop cascades to foreign table it_is_ok drop cascades to foreign table agg_bad drop cascades to foreign table text_csv DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy