Skip to content

Commit 03db44e

Browse files
committed
Add pg_read_binary_file() and whole-file-at-once versions of pg_read_file().
One of the usages of the binary version is to read files in a different encoding from the server encoding. Dimitri Fontaine and Itagaki Takahiro.
1 parent 16b5e08 commit 03db44e

File tree

5 files changed

+139
-25
lines changed

5 files changed

+139
-25
lines changed

doc/src/sgml/func.sgml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14449,11 +14449,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
1444914449
</row>
1445014450
<row>
1445114451
<entry>
14452-
<literal><function>pg_read_file(<parameter>filename</> <type>text</>, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>)</function></literal>
14452+
<literal><function>pg_read_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
1445314453
</entry>
1445414454
<entry><type>text</type></entry>
1445514455
<entry>Return the contents of a text file</entry>
1445614456
</row>
14457+
<row>
14458+
<entry>
14459+
<literal><function>pg_read_binary_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
14460+
</entry>
14461+
<entry><type>bytea</type></entry>
14462+
<entry>Return the contents of a file</entry>
14463+
</row>
1445714464
<row>
1445814465
<entry>
1445914466
<literal><function>pg_stat_file(<parameter>filename</> <type>text</>)</function></literal>
@@ -14482,6 +14489,22 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
1448214489
at the given <parameter>offset</>, returning at most <parameter>length</>
1448314490
bytes (less if the end of file is reached first). If <parameter>offset</>
1448414491
is negative, it is relative to the end of the file.
14492+
When <parameter>offset</> and <parameter>length</> parameters are omitted,
14493+
it returns the whole of the file.
14494+
The part of a file must be a valid text in the server encoding.
14495+
</para>
14496+
14497+
<indexterm>
14498+
<primary>pg_read_binary_file</primary>
14499+
</indexterm>
14500+
<para>
14501+
<function>pg_read_binary_file</> returns part of a file as like as
14502+
<function>pg_read_file</>, but the result is a bytea value.
14503+
One of the usages is to read a file in the specified encoding combined with
14504+
<function>convert_from</> function:
14505+
<programlisting>
14506+
SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
14507+
</programlisting>
1448514508
</para>
1448614509

1448714510
<indexterm>

src/backend/utils/adt/genfile.c

Lines changed: 105 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,14 @@ convert_and_check_filename(text *arg)
8080

8181

8282
/*
83-
* Read a section of a file, returning it as text
83+
* Read a section of a file, returning it as bytea
84+
*
85+
* We read the whole of the file when bytes_to_read is nagative.
8486
*/
85-
Datum
86-
pg_read_file(PG_FUNCTION_ARGS)
87+
static bytea *
88+
read_binary_file(text *filename_t, int64 seek_offset, int64 bytes_to_read)
8789
{
88-
text *filename_t = PG_GETARG_TEXT_P(0);
89-
int64 seek_offset = PG_GETARG_INT64(1);
90-
int64 bytes_to_read = PG_GETARG_INT64(2);
91-
char *buf;
90+
bytea *buf;
9291
size_t nbytes;
9392
FILE *file;
9493
char *filename;
@@ -100,6 +99,29 @@ pg_read_file(PG_FUNCTION_ARGS)
10099

101100
filename = convert_and_check_filename(filename_t);
102101

102+
if (bytes_to_read < 0)
103+
{
104+
if (seek_offset < 0)
105+
bytes_to_read = -seek_offset;
106+
else
107+
{
108+
struct stat fst;
109+
110+
if (stat(filename, &fst) < 0)
111+
ereport(ERROR,
112+
(errcode_for_file_access(),
113+
errmsg("could not stat file \"%s\": %m", filename)));
114+
115+
bytes_to_read = fst.st_size - seek_offset;
116+
}
117+
}
118+
119+
/* not sure why anyone thought that int64 length was a good idea */
120+
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
121+
ereport(ERROR,
122+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
123+
errmsg("requested length too large")));
124+
103125
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
104126
ereport(ERROR,
105127
(errcode_for_file_access(),
@@ -112,18 +134,7 @@ pg_read_file(PG_FUNCTION_ARGS)
112134
(errcode_for_file_access(),
113135
errmsg("could not seek in file \"%s\": %m", filename)));
114136

115-
if (bytes_to_read < 0)
116-
ereport(ERROR,
117-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
118-
errmsg("requested length cannot be negative")));
119-
120-
/* not sure why anyone thought that int64 length was a good idea */
121-
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
122-
ereport(ERROR,
123-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
124-
errmsg("requested length too large")));
125-
126-
buf = palloc((Size) bytes_to_read + VARHDRSZ);
137+
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
127138

128139
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
129140

@@ -132,15 +143,86 @@ pg_read_file(PG_FUNCTION_ARGS)
132143
(errcode_for_file_access(),
133144
errmsg("could not read file \"%s\": %m", filename)));
134145

135-
/* Make sure the input is valid */
136-
pg_verifymbstr(VARDATA(buf), nbytes, false);
137-
138146
SET_VARSIZE(buf, nbytes + VARHDRSZ);
139147

140148
FreeFile(file);
141149
pfree(filename);
142150

143-
PG_RETURN_TEXT_P(buf);
151+
return buf;
152+
}
153+
154+
/*
155+
* In addition to read_binary_file, verify whether the contents are encoded
156+
* in the database encoding.
157+
*/
158+
static text *
159+
read_text_file(text *filename, int64 seek_offset, int64 bytes_to_read)
160+
{
161+
bytea *buf = read_binary_file(filename, seek_offset, bytes_to_read);
162+
163+
/* Make sure the input is valid */
164+
pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
165+
166+
/* OK, we can cast it as text safely */
167+
return (text *) buf;
168+
}
169+
170+
/*
171+
* Read a section of a file, returning it as text
172+
*/
173+
Datum
174+
pg_read_file(PG_FUNCTION_ARGS)
175+
{
176+
text *filename_t = PG_GETARG_TEXT_P(0);
177+
int64 seek_offset = PG_GETARG_INT64(1);
178+
int64 bytes_to_read = PG_GETARG_INT64(2);
179+
180+
if (bytes_to_read < 0)
181+
ereport(ERROR,
182+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
183+
errmsg("requested length cannot be negative")));
184+
185+
PG_RETURN_TEXT_P(read_text_file(filename_t, seek_offset, bytes_to_read));
186+
}
187+
188+
/*
189+
* Read the whole of a file, returning it as text
190+
*/
191+
Datum
192+
pg_read_file_all(PG_FUNCTION_ARGS)
193+
{
194+
text *filename_t = PG_GETARG_TEXT_P(0);
195+
196+
PG_RETURN_TEXT_P(read_text_file(filename_t, 0, -1));
197+
}
198+
199+
/*
200+
* Read a section of a file, returning it as bytea
201+
*/
202+
Datum
203+
pg_read_binary_file(PG_FUNCTION_ARGS)
204+
{
205+
text *filename_t = PG_GETARG_TEXT_P(0);
206+
int64 seek_offset = PG_GETARG_INT64(1);
207+
int64 bytes_to_read = PG_GETARG_INT64(2);
208+
209+
if (bytes_to_read < 0)
210+
ereport(ERROR,
211+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
212+
errmsg("requested length cannot be negative")));
213+
214+
PG_RETURN_BYTEA_P(read_binary_file(filename_t, seek_offset, bytes_to_read));
215+
}
216+
217+
/*
218+
* Read the whole of a file, returning it as bytea
219+
*/
220+
Datum
221+
pg_read_binary_file_all(PG_FUNCTION_ARGS)
222+
{
223+
text *filename_t = PG_GETARG_TEXT_P(0);
224+
225+
PG_RETURN_BYTEA_P(read_binary_file(filename_t, 0, -1));
144226
}
145227

146228
/*

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 201012131
56+
#define CATALOG_VERSION_NO 201012161
5757

5858
#endif

src/include/catalog/pg_proc.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3403,6 +3403,12 @@ DATA(insert OID = 2623 ( pg_stat_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2249
34033403
DESCR("return file information");
34043404
DATA(insert OID = 2624 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 25 "25 20 20" _null_ _null_ _null_ _null_ pg_read_file _null_ _null_ _null_ ));
34053405
DESCR("read text from a file");
3406+
DATA(insert OID = 3826 ( pg_read_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_read_file_all _null_ _null_ _null_ ));
3407+
DESCR("read text from a file");
3408+
DATA(insert OID = 3827 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 3 0 17 "25 20 20" _null_ _null_ _null_ _null_ pg_read_binary_file _null_ _null_ _null_ ));
3409+
DESCR("read bytea from a file");
3410+
DATA(insert OID = 3828 ( pg_read_binary_file PGNSP PGUID 12 1 0 0 f f f t f v 1 0 17 "25" _null_ _null_ _null_ _null_ pg_read_binary_file_all _null_ _null_ _null_ ));
3411+
DESCR("read bytea from a file");
34063412
DATA(insert OID = 2625 ( pg_ls_dir PGNSP PGUID 12 1 1000 0 f f f t t v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_ls_dir _null_ _null_ _null_ ));
34073413
DESCR("list all files in a directory");
34083414
DATA(insert OID = 2626 ( pg_sleep PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "701" _null_ _null_ _null_ _null_ pg_sleep _null_ _null_ _null_ ));

src/include/utils/builtins.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,9 @@ extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
442442
/* genfile.c */
443443
extern Datum pg_stat_file(PG_FUNCTION_ARGS);
444444
extern Datum pg_read_file(PG_FUNCTION_ARGS);
445+
extern Datum pg_read_file_all(PG_FUNCTION_ARGS);
446+
extern Datum pg_read_binary_file(PG_FUNCTION_ARGS);
447+
extern Datum pg_read_binary_file_all(PG_FUNCTION_ARGS);
445448
extern Datum pg_ls_dir(PG_FUNCTION_ARGS);
446449

447450
/* misc.c */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy