Content-Length: 19450 | pFad | http://github.com/postgres/postgres/pull/4.diff

thub.com diff --git a/contrib/file_fdw/data/agg.csv.gz b/contrib/file_fdw/data/agg.csv.gz new file mode 100644 index 0000000000000..83773d7f3c83f Binary files /dev/null and b/contrib/file_fdw/data/agg.csv.gz differ diff --git a/contrib/file_fdw/data/it's_ok.csv.gz b/contrib/file_fdw/data/it's_ok.csv.gz new file mode 100644 index 0000000000000..83773d7f3c83f Binary files /dev/null and b/contrib/file_fdw/data/it's_ok.csv.gz differ diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index c5c797c1a4c76..65a5bf9d7c889 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -46,6 +46,9 @@ struct FileFdwOption Oid optcontext; /* Oid of catalog in which option may appear */ }; +/* Totally made-up compression ratio */ +static const double program_compression_ratio = 2.7708899835032f; + /* * Valid options for file_fdw. * These options are based on the options for COPY FROM command. @@ -68,6 +71,7 @@ static const struct FileFdwOption valid_options[] = { {"escape", ForeignTableRelationId}, {"null", ForeignTableRelationId}, {"encoding", ForeignTableRelationId}, + {"decompressor", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, /* @@ -84,6 +88,7 @@ static const struct FileFdwOption valid_options[] = { typedef struct FileFdwPlanState { char *filename; /* file to read */ + bool is_program; /* whether a program is used to read the file */ List *options; /* merged COPY options, excluding filename */ BlockNumber pages; /* estimate of file's physical size */ double ntuples; /* estimate of number of rows in file */ @@ -95,6 +100,7 @@ typedef struct FileFdwPlanState typedef struct FileFdwExecutionState { char *filename; /* file to read */ + char *program; /* optional program to use to read file */ List *options; /* merged COPY options, excluding filename */ CopyState cstate; /* state of reading file */ } FileFdwExecutionState; @@ -137,7 +143,7 @@ static bool fileAnalyzeForeignTable(Relation relation, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options); + char **filename, char **program, List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, @@ -186,6 +192,7 @@ file_fdw_validator(PG_FUNCTION_ARGS) List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); Oid catalog = PG_GETARG_OID(1); char *filename = NULL; + char *decompressor = NULL; DefElem *force_not_null = NULL; List *other_options = NIL; ListCell *cell; @@ -243,9 +250,9 @@ file_fdw_validator(PG_FUNCTION_ARGS) } /* - * Separate out filename and force_not_null, since ProcessCopyOptions - * won't accept them. (force_not_null only comes in a boolean - * per-column flavor here.) + * Separate out filename, decompressor, and force_not_null, since + * ProcessCopyOptions won't accept them. (force_not_null only comes in + * a boolean per-column flavor here.) */ if (strcmp(def->defname, "filename") == 0) { @@ -255,6 +262,14 @@ file_fdw_validator(PG_FUNCTION_ARGS) errmsg("conflicting or redundant options"))); filename = defGetString(def); } + else if (strcmp(def->defname, "decompressor") == 0) + { + if (decompressor) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + decompressor = defGetString(def); + } else if (strcmp(def->defname, "force_not_null") == 0) { if (force_not_null) @@ -274,13 +289,28 @@ file_fdw_validator(PG_FUNCTION_ARGS) */ ProcessCopyOptions(NULL, true, other_options); - /* - * Filename option is required for file_fdw foreign tables. - */ - if (catalog == ForeignTableRelationId && filename == NULL) - ereport(ERROR, - (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), - errmsg("filename is required for file_fdw foreign tables"))); + if (catalog == ForeignTableRelationId) + { + /* + * Filename option is required for file_fdw foreign tables. + */ + if (filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), + errmsg("filename is required for file_fdw foreign tables"))); + + + /* + * Decompressors must be executable. + */ + if (decompressor && (access(decompressor, R_OK | X_OK) != 0)) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("decompressor must be readable/executable \"%s\": %m", + decompressor))); + } + } PG_RETURN_VOID(); } @@ -305,12 +335,14 @@ is_valid_option(const char *option, Oid context) /* * Fetch the options for a file_fdw foreign table. * - * We have to separate out "filename" from the other options because - * it must not appear in the options list passed to the core COPY code. + * We have to separate out "filename" and "decompressor" from the other options + * because they must not appear in the options passed to the core COPY code. If + * a decompressor is present, a string consisting of it concatenated to the + * escaped file name is stored at `program`. */ static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options) + char **filename, char **program, List **other_options) { ForeignTable *table; ForeignServer *server; @@ -319,6 +351,9 @@ fileGetOptions(Oid foreigntableid, ListCell *lc, *prev; + char *decompressor; + char *write_ptr, *token, *input, *read_ptr; + /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -352,6 +387,27 @@ fileGetOptions(Oid foreigntableid, options = list_delete_cell(options, lc, prev); break; } + + prev = lc; + } + + /* + * Separate out the decompressor, which will be used to calculate program. + */ + decompressor = NULL; + *program = NULL; + prev = NULL; + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "decompressor") == 0) + { + decompressor = defGetString(def); + options = list_delete_cell(options, lc, prev); + break; + } + prev = lc; } @@ -362,6 +418,41 @@ fileGetOptions(Oid foreigntableid, if (*filename == NULL) elog(ERROR, "filename is required for file_fdw foreign tables"); + /* + * Set up the decompressor if present. + */ + if (decompressor != NULL) + { + /* + * We will escape the filename by wrapping it in single quotes. To deal + * with single quotes in the name itself, we will replace all single + * quotes with the string "'\''", which is four characters long. Strings + * of only single quotes will need four times as much space, plus the + * room for the quotes, a space, and a null terminator. + */ + *program = palloc0( + (strlen(decompressor) + (4 * strlen(*filename)) + 4) + * sizeof(char)); + + write_ptr = stpcpy(*program, decompressor); + write_ptr = stpcpy(write_ptr, " '"); + + /* We're mutating filename so copy it */ + input = read_ptr = pstrdup(*filename); + + write_ptr = stpcpy(write_ptr, strsep(&read_ptr, "'")); + + while ((token = strsep(&read_ptr, "'")) != NULL) + { + write_ptr = stpcpy(write_ptr, "'\\''"); + write_ptr = stpcpy(write_ptr, token); + } + + stpcpy(write_ptr, "'"); + + pfree(input); + } + *other_options = options; } @@ -433,14 +524,16 @@ fileGetForeignRelSize(PlannerInfo *root, Oid foreigntableid) { FileFdwPlanState *fdw_private; + char *program; /* * Fetch options. We only need filename at this point, but we might as * well get everything and not need to re-fetch it later in planning. */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); - fileGetOptions(foreigntableid, - &fdw_private->filename, &fdw_private->options); + fileGetOptions(foreigntableid, &fdw_private->filename, &program, + &fdw_private->options); + fdw_private->is_program = (program != NULL); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -537,14 +630,22 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; + char *program; List *options; /* Fetch options --- we only need filename at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &program, &options); ExplainPropertyText("Foreign File", filename, es); + if (program != NULL) + { + ExplainPropertyText("Foreign Program", program, es); + ExplainPropertyFloat("Foreign Program Compression Est.", + program_compression_ratio, 4, es); + } + /* Suppress file size if we're not showing cost details */ if (es->costs) { @@ -565,6 +666,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; + char *program; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -577,7 +679,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &program, &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -587,8 +689,8 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) * as to match the expected ScanTupleSlot signature. */ cstate = BeginCopyFrom(node->ss.ss_currentRelation, - filename, - false, + (program != NULL) ? program : filename, + (program != NULL), NIL, options); @@ -598,6 +700,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) */ festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState)); festate->filename = filename; + festate->program = program; festate->options = options; festate->cstate = cstate; @@ -656,12 +759,16 @@ static void fileReScanForeignScan(ForeignScanState *node) { FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; + char *filename_or_program; EndCopyFrom(festate->cstate); + filename_or_program = + (festate->program != NULL) ? festate->program : festate->filename; + festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation, - festate->filename, - false, + filename_or_program, + (festate->program != NULL), NIL, festate->options); } @@ -690,11 +797,12 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; + char *program; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &options); + fileGetOptions(RelationGetRelid(relation), &filename, &program, &options); /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -900,6 +1008,8 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel, MAXALIGN(sizeof(HeapTupleHeaderData)); ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width); + if (fdw_private->is_program) + ntuples *= program_compression_ratio; } fdw_private->ntuples = ntuples; @@ -976,6 +1086,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; + char *program; List *options; CopyState cstate; ErrorContextCallback errcallback; @@ -990,12 +1101,13 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &options); + fileGetOptions(RelationGetRelid(onerel), &filename, &program, &options); /* * Create CopyState from FDW options. */ - cstate = BeginCopyFrom(onerel, filename, false, NIL, options); + cstate = BeginCopyFrom(onerel, (program != NULL) ? program : filename, + (program != NULL), NIL, options); /* * Use per-tuple memory context to prevent leak of memory used to read diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index f7fd28d44d7be..7980619b6ecc8 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -72,6 +72,16 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -97,7 +107,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; SELECT * FROM agg_csv ORDER BY a; +SELECT * FROM agg_csv_gz ORDER BY a; SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; -- error context report tests SELECT * FROM agg_bad; -- ERROR @@ -111,6 +123,18 @@ EXECUTE st(100); EXECUTE st(100); DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); +EXECUTE st(100); +DEALLOCATE st; + +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; +\t off + -- tableoid SELECT tableoid::regclass, b FROM agg_csv; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 4f90baebd6b09..c08d7bd19be44 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -88,6 +88,16 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -123,7 +133,7 @@ ERROR: invalid option "force_not_null" HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR ERROR: invalid option "force_not_null" -HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding, decompressor -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; a | b @@ -140,6 +150,14 @@ SELECT * FROM agg_csv ORDER BY a; 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 100 | 99.097 +(3 rows) + SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; a | b | a | b -----+---------+-----+--------- @@ -148,6 +166,14 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; 100 | 99.097 | 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + a | b | a | b +-----+---------+-----+--------- + 0 | 0.09561 | 0 | 0.09561 + 42 | 324.78 | 42 | 324.78 + 100 | 99.097 | 100 | 99.097 +(3 rows) + -- error context report tests SELECT * FROM agg_bad; -- ERROR ERROR: invalid input syntax for type real: "aaa" @@ -174,6 +200,38 @@ EXECUTE st(100); (1 row) DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; + Foreign Scan on public.agg_csv_gz + Output: a, b + Foreign File: @abs_srcdir@/data/agg.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/agg.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; + Foreign Scan on public.it_is_ok + Output: a, b + Foreign File: @abs_srcdir@/data/it's_ok.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/it'\''s_ok.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off -- tableoid SELECT tableoid::regclass, b FROM agg_csv; tableoid | b @@ -243,13 +301,15 @@ SET ROLE file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 8 other objects +NOTICE: drop cascades to 10 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for file_fdw_user drop cascades to user mapping for file_fdw_superuser drop cascades to user mapping for no_priv_user drop cascades to foreign table agg_text drop cascades to foreign table agg_csv +drop cascades to foreign table agg_csv_gz +drop cascades to foreign table it_is_ok drop cascades to foreign table agg_bad drop cascades to foreign table text_csv DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/scripts/gunzip.pl b/contrib/file_fdw/scripts/gunzip.pl new file mode 100755 index 0000000000000..14b1e6d15182c --- /dev/null +++ b/contrib/file_fdw/scripts/gunzip.pl @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +# Decompress the gzipped file at the path specified by the ARGV[0] +# Usage: gunzip.pl /path/to/compressed/file.gz + +use strict; + +use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; + +gunzip $ARGV[0] => '-' or die "could not decompress: GunzipError\n"; diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 9385b26d34d51..4bfdca0aaafdf 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -32,6 +32,18 @@ + + decompressor + + + + Specifies an external program to be used to decompress the file. Such + programs should accept the filename as an argument and decompress data to + stdout. The program must be readable and executable by the server process. + + + + format








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/postgres/postgres/pull/4.diff

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy