diff --git a/contrib/file_fdw/data/agg.csv.gz b/contrib/file_fdw/data/agg.csv.gz new file mode 100644 index 0000000000000..83773d7f3c83f Binary files /dev/null and b/contrib/file_fdw/data/agg.csv.gz differ diff --git a/contrib/file_fdw/data/it's_ok.csv.gz b/contrib/file_fdw/data/it's_ok.csv.gz new file mode 100644 index 0000000000000..83773d7f3c83f Binary files /dev/null and b/contrib/file_fdw/data/it's_ok.csv.gz differ diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index c5c797c1a4c76..65a5bf9d7c889 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -46,6 +46,9 @@ struct FileFdwOption Oid optcontext; /* Oid of catalog in which option may appear */ }; +/* Totally made-up compression ratio */ +static const double program_compression_ratio = 2.7708899835032f; + /* * Valid options for file_fdw. * These options are based on the options for COPY FROM command. @@ -68,6 +71,7 @@ static const struct FileFdwOption valid_options[] = { {"escape", ForeignTableRelationId}, {"null", ForeignTableRelationId}, {"encoding", ForeignTableRelationId}, + {"decompressor", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, /* @@ -84,6 +88,7 @@ static const struct FileFdwOption valid_options[] = { typedef struct FileFdwPlanState { char *filename; /* file to read */ + bool is_program; /* whether a program is used to read the file */ List *options; /* merged COPY options, excluding filename */ BlockNumber pages; /* estimate of file's physical size */ double ntuples; /* estimate of number of rows in file */ @@ -95,6 +100,7 @@ typedef struct FileFdwPlanState typedef struct FileFdwExecutionState { char *filename; /* file to read */ + char *program; /* optional program to use to read file */ List *options; /* merged COPY options, excluding filename */ CopyState cstate; /* state of reading file */ } FileFdwExecutionState; @@ -137,7 +143,7 @@ static bool fileAnalyzeForeignTable(Relation relation, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options); + char **filename, char **program, List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, @@ -186,6 +192,7 @@ file_fdw_validator(PG_FUNCTION_ARGS) List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); Oid catalog = PG_GETARG_OID(1); char *filename = NULL; + char *decompressor = NULL; DefElem *force_not_null = NULL; List *other_options = NIL; ListCell *cell; @@ -243,9 +250,9 @@ file_fdw_validator(PG_FUNCTION_ARGS) } /* - * Separate out filename and force_not_null, since ProcessCopyOptions - * won't accept them. (force_not_null only comes in a boolean - * per-column flavor here.) + * Separate out filename, decompressor, and force_not_null, since + * ProcessCopyOptions won't accept them. (force_not_null only comes in + * a boolean per-column flavor here.) */ if (strcmp(def->defname, "filename") == 0) { @@ -255,6 +262,14 @@ file_fdw_validator(PG_FUNCTION_ARGS) errmsg("conflicting or redundant options"))); filename = defGetString(def); } + else if (strcmp(def->defname, "decompressor") == 0) + { + if (decompressor) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + decompressor = defGetString(def); + } else if (strcmp(def->defname, "force_not_null") == 0) { if (force_not_null) @@ -274,13 +289,28 @@ file_fdw_validator(PG_FUNCTION_ARGS) */ ProcessCopyOptions(NULL, true, other_options); - /* - * Filename option is required for file_fdw foreign tables. - */ - if (catalog == ForeignTableRelationId && filename == NULL) - ereport(ERROR, - (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), - errmsg("filename is required for file_fdw foreign tables"))); + if (catalog == ForeignTableRelationId) + { + /* + * Filename option is required for file_fdw foreign tables. + */ + if (filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), + errmsg("filename is required for file_fdw foreign tables"))); + + + /* + * Decompressors must be executable. + */ + if (decompressor && (access(decompressor, R_OK | X_OK) != 0)) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("decompressor must be readable/executable \"%s\": %m", + decompressor))); + } + } PG_RETURN_VOID(); } @@ -305,12 +335,14 @@ is_valid_option(const char *option, Oid context) /* * Fetch the options for a file_fdw foreign table. * - * We have to separate out "filename" from the other options because - * it must not appear in the options list passed to the core COPY code. + * We have to separate out "filename" and "decompressor" from the other options + * because they must not appear in the options passed to the core COPY code. If + * a decompressor is present, a string consisting of it concatenated to the + * escaped file name is stored at `program`. */ static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options) + char **filename, char **program, List **other_options) { ForeignTable *table; ForeignServer *server; @@ -319,6 +351,9 @@ fileGetOptions(Oid foreigntableid, ListCell *lc, *prev; + char *decompressor; + char *write_ptr, *token, *input, *read_ptr; + /* * Extract options from FDW objects. We ignore user mappings because * file_fdw doesn't have any options that can be specified there. @@ -352,6 +387,27 @@ fileGetOptions(Oid foreigntableid, options = list_delete_cell(options, lc, prev); break; } + + prev = lc; + } + + /* + * Separate out the decompressor, which will be used to calculate program. + */ + decompressor = NULL; + *program = NULL; + prev = NULL; + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "decompressor") == 0) + { + decompressor = defGetString(def); + options = list_delete_cell(options, lc, prev); + break; + } + prev = lc; } @@ -362,6 +418,41 @@ fileGetOptions(Oid foreigntableid, if (*filename == NULL) elog(ERROR, "filename is required for file_fdw foreign tables"); + /* + * Set up the decompressor if present. + */ + if (decompressor != NULL) + { + /* + * We will escape the filename by wrapping it in single quotes. To deal + * with single quotes in the name itself, we will replace all single + * quotes with the string "'\''", which is four characters long. Strings + * of only single quotes will need four times as much space, plus the + * room for the quotes, a space, and a null terminator. + */ + *program = palloc0( + (strlen(decompressor) + (4 * strlen(*filename)) + 4) + * sizeof(char)); + + write_ptr = stpcpy(*program, decompressor); + write_ptr = stpcpy(write_ptr, " '"); + + /* We're mutating filename so copy it */ + input = read_ptr = pstrdup(*filename); + + write_ptr = stpcpy(write_ptr, strsep(&read_ptr, "'")); + + while ((token = strsep(&read_ptr, "'")) != NULL) + { + write_ptr = stpcpy(write_ptr, "'\\''"); + write_ptr = stpcpy(write_ptr, token); + } + + stpcpy(write_ptr, "'"); + + pfree(input); + } + *other_options = options; } @@ -433,14 +524,16 @@ fileGetForeignRelSize(PlannerInfo *root, Oid foreigntableid) { FileFdwPlanState *fdw_private; + char *program; /* * Fetch options. We only need filename at this point, but we might as * well get everything and not need to re-fetch it later in planning. */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); - fileGetOptions(foreigntableid, - &fdw_private->filename, &fdw_private->options); + fileGetOptions(foreigntableid, &fdw_private->filename, &program, + &fdw_private->options); + fdw_private->is_program = (program != NULL); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -537,14 +630,22 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; + char *program; List *options; /* Fetch options --- we only need filename at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &program, &options); ExplainPropertyText("Foreign File", filename, es); + if (program != NULL) + { + ExplainPropertyText("Foreign Program", program, es); + ExplainPropertyFloat("Foreign Program Compression Est.", + program_compression_ratio, 4, es); + } + /* Suppress file size if we're not showing cost details */ if (es->costs) { @@ -565,6 +666,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; + char *program; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -577,7 +679,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, &program, &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -587,8 +689,8 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) * as to match the expected ScanTupleSlot signature. */ cstate = BeginCopyFrom(node->ss.ss_currentRelation, - filename, - false, + (program != NULL) ? program : filename, + (program != NULL), NIL, options); @@ -598,6 +700,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) */ festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState)); festate->filename = filename; + festate->program = program; festate->options = options; festate->cstate = cstate; @@ -656,12 +759,16 @@ static void fileReScanForeignScan(ForeignScanState *node) { FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; + char *filename_or_program; EndCopyFrom(festate->cstate); + filename_or_program = + (festate->program != NULL) ? festate->program : festate->filename; + festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation, - festate->filename, - false, + filename_or_program, + (festate->program != NULL), NIL, festate->options); } @@ -690,11 +797,12 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; + char *program; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &options); + fileGetOptions(RelationGetRelid(relation), &filename, &program, &options); /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -900,6 +1008,8 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel, MAXALIGN(sizeof(HeapTupleHeaderData)); ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width); + if (fdw_private->is_program) + ntuples *= program_compression_ratio; } fdw_private->ntuples = ntuples; @@ -976,6 +1086,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; + char *program; List *options; CopyState cstate; ErrorContextCallback errcallback; @@ -990,12 +1101,13 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &options); + fileGetOptions(RelationGetRelid(onerel), &filename, &program, &options); /* * Create CopyState from FDW options. */ - cstate = BeginCopyFrom(onerel, filename, false, NIL, options); + cstate = BeginCopyFrom(onerel, (program != NULL) ? program : filename, + (program != NULL), NIL, options); /* * Use per-tuple memory context to prevent leak of memory used to read diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index f7fd28d44d7be..7980619b6ecc8 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -72,6 +72,16 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -97,7 +107,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; SELECT * FROM agg_csv ORDER BY a; +SELECT * FROM agg_csv_gz ORDER BY a; SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; -- error context report tests SELECT * FROM agg_bad; -- ERROR @@ -111,6 +123,18 @@ EXECUTE st(100); EXECUTE st(100); DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); +EXECUTE st(100); +DEALLOCATE st; + +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; +\t off + -- tableoid SELECT tableoid::regclass, b FROM agg_csv; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 4f90baebd6b09..c08d7bd19be44 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -88,6 +88,16 @@ CREATE FOREIGN TABLE agg_csv ( b float4 ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_csv_gz ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); +CREATE FOREIGN TABLE it_is_ok ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl'); CREATE FOREIGN TABLE agg_bad ( a int2, b float4 @@ -123,7 +133,7 @@ ERROR: invalid option "force_not_null" HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR ERROR: invalid option "force_not_null" -HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding, decompressor -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; a | b @@ -140,6 +150,14 @@ SELECT * FROM agg_csv ORDER BY a; 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 100 | 99.097 +(3 rows) + SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; a | b | a | b -----+---------+-----+--------- @@ -148,6 +166,14 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; 100 | 99.097 | 100 | 99.097 (3 rows) +SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + a | b | a | b +-----+---------+-----+--------- + 0 | 0.09561 | 0 | 0.09561 + 42 | 324.78 | 42 | 324.78 + 100 | 99.097 | 100 | 99.097 +(3 rows) + -- error context report tests SELECT * FROM agg_bad; -- ERROR ERROR: invalid input syntax for type real: "aaa" @@ -174,6 +200,38 @@ EXECUTE st(100); (1 row) DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz; + Foreign Scan on public.agg_csv_gz + Output: a, b + Foreign File: @abs_srcdir@/data/agg.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/agg.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off +PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1; +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +DEALLOCATE st; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok; + Foreign Scan on public.it_is_ok + Output: a, b + Foreign File: @abs_srcdir@/data/it's_ok.csv.gz + Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/it'\''s_ok.csv.gz' + Foreign Program Compression Est.: 2.7709 + +\t off -- tableoid SELECT tableoid::regclass, b FROM agg_csv; tableoid | b @@ -243,13 +301,15 @@ SET ROLE file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 8 other objects +NOTICE: drop cascades to 10 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for file_fdw_user drop cascades to user mapping for file_fdw_superuser drop cascades to user mapping for no_priv_user drop cascades to foreign table agg_text drop cascades to foreign table agg_csv +drop cascades to foreign table agg_csv_gz +drop cascades to foreign table it_is_ok drop cascades to foreign table agg_bad drop cascades to foreign table text_csv DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/scripts/gunzip.pl b/contrib/file_fdw/scripts/gunzip.pl new file mode 100755 index 0000000000000..14b1e6d15182c --- /dev/null +++ b/contrib/file_fdw/scripts/gunzip.pl @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +# Decompress the gzipped file at the path specified by the ARGV[0] +# Usage: gunzip.pl /path/to/compressed/file.gz + +use strict; + +use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; + +gunzip $ARGV[0] => '-' or die "could not decompress: GunzipError\n"; diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 9385b26d34d51..4bfdca0aaafdf 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -32,6 +32,18 @@ + + decompressor + + + + Specifies an external program to be used to decompress the file. Such + programs should accept the filename as an argument and decompress data to + stdout. The program must be readable and executable by the server process. + + + + format pFad - Phonifier reborn Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved. Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages. Alternative Proxies:Alternative ProxypFad ProxypFad v3 ProxypFad v4 Proxy
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy