Skip to content

Commit 83b6131

Browse files
committed
Improve behavior of tsearch_readline(), and remove t_readline().
Commit fbeb9da, which added the tsearch_readline APIs, left t_readline() in place as a compatibility measure. But that function has been unused and deprecated for twelve years now, so that seems like enough time to remove it. Doing so, and merging t_readline's code into tsearch_readline, aids in making several useful improvements: * The hard-wired 4K limit on line length in tsearch data files is removed, by using a StringInfo buffer instead of a fixed-size buffer. * We can buy back the per-line palloc/pfree added by 3ea7e95 in the common case where encoding conversion is not required. * We no longer need a separate pg_verify_mbstr call, as that functionality was folded into encoding conversion some time ago. (We could have done some of this stuff while keeping t_readline as a separate API, but there seems little point, since there's no reason for anyone to still be using t_readline directly.) Discussion: https://postgr.es/m/48A4FA71-524E-41B9-953A-FD04EF36E2E7@yesql.se
1 parent aca7484 commit 83b6131

File tree

3 files changed

+32
-58
lines changed

3 files changed

+32
-58
lines changed

src/backend/tsearch/dict_thesaurus.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
286286
(errcode(ERRCODE_CONFIG_FILE_ERROR),
287287
errmsg("unexpected end of line")));
288288

289-
/*
290-
* Note: currently, tsearch_readline can't return lines exceeding 4KB,
291-
* so overflow of the word counts is impossible. But that may not
292-
* always be true, so let's check.
293-
*/
294289
if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
295290
ereport(ERROR,
296291
(errcode(ERRCODE_CONFIG_FILE_ERROR),

src/backend/tsearch/ts_locale.c

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "postgres.h"
1515

1616
#include "catalog/pg_collation.h"
17+
#include "common/string.h"
1718
#include "storage/fd.h"
1819
#include "tsearch/ts_locale.h"
1920
#include "tsearch/ts_public.h"
@@ -128,6 +129,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
128129
return false;
129130
stp->filename = filename;
130131
stp->lineno = 0;
132+
initStringInfo(&stp->buf);
131133
stp->curline = NULL;
132134
/* Setup error traceback support for ereport() */
133135
stp->cb.callback = tsearch_readline_callback;
@@ -145,31 +147,43 @@ tsearch_readline_begin(tsearch_readline_state *stp,
145147
char *
146148
tsearch_readline(tsearch_readline_state *stp)
147149
{
148-
char *result;
150+
char *recoded;
149151

150152
/* Advance line number to use in error reports */
151153
stp->lineno++;
152154

153155
/* Clear curline, it's no longer relevant */
154156
if (stp->curline)
155157
{
156-
pfree(stp->curline);
158+
if (stp->curline != stp->buf.data)
159+
pfree(stp->curline);
157160
stp->curline = NULL;
158161
}
159162

160163
/* Collect next line, if there is one */
161-
result = t_readline(stp->fp);
162-
if (!result)
164+
if (!pg_get_line_buf(stp->fp, &stp->buf))
163165
return NULL;
164166

167+
/* Validate the input as UTF-8, then convert to DB encoding if needed */
168+
recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
169+
170+
/* Save the correctly-encoded string for possible error reports */
171+
stp->curline = recoded; /* might be equal to buf.data */
172+
165173
/*
166-
* Save a copy of the line for possible use in error reports. (We cannot
167-
* just save "result", since it's likely to get pfree'd at some point by
168-
* the caller; an error after that would try to access freed data.)
174+
* We always return a freshly pstrdup'd string. This is clearly necessary
175+
* if pg_any_to_server() returned buf.data, and we need a second copy even
176+
* if encoding conversion did occur. The caller is entitled to pfree the
177+
* returned string at any time, which would leave curline pointing to
178+
* recycled storage, causing problems if an error occurs after that point.
179+
* (It's preferable to return the result of pstrdup instead of the output
180+
* of pg_any_to_server, because the conversion result tends to be
181+
* over-allocated. Since callers might save the result string directly
182+
* into a long-lived dictionary structure, we don't want it to be a larger
183+
* palloc chunk than necessary. We'll reclaim the conversion result on
184+
* the next call.)
169185
*/
170-
stp->curline = pstrdup(result);
171-
172-
return result;
186+
return pstrdup(recoded);
173187
}
174188

175189
/*
@@ -181,11 +195,13 @@ tsearch_readline_end(tsearch_readline_state *stp)
181195
/* Suppress use of curline in any error reported below */
182196
if (stp->curline)
183197
{
184-
pfree(stp->curline);
198+
if (stp->curline != stp->buf.data)
199+
pfree(stp->curline);
185200
stp->curline = NULL;
186201
}
187202

188203
/* Release other resources */
204+
pfree(stp->buf.data);
189205
FreeFile(stp->fp);
190206

191207
/* Pop the error context stack */
@@ -203,8 +219,7 @@ tsearch_readline_callback(void *arg)
203219

204220
/*
205221
* We can't include the text of the config line for errors that occur
206-
* during t_readline() itself. This is only partly a consequence of our
207-
* arms-length use of that routine: the major cause of such errors is
222+
* during tsearch_readline() itself. The major cause of such errors is
208223
* encoding violations, and we daren't try to print error messages
209224
* containing badly-encoded data.
210225
*/
@@ -220,43 +235,6 @@ tsearch_readline_callback(void *arg)
220235
}
221236

222237

223-
/*
224-
* Read the next line from a tsearch data file (expected to be in UTF-8), and
225-
* convert it to database encoding if needed. The returned string is palloc'd.
226-
* NULL return means EOF.
227-
*
228-
* Note: direct use of this function is now deprecated. Go through
229-
* tsearch_readline() to provide better error reporting.
230-
*/
231-
char *
232-
t_readline(FILE *fp)
233-
{
234-
int len;
235-
char *recoded;
236-
char buf[4096]; /* lines must not be longer than this */
237-
238-
if (fgets(buf, sizeof(buf), fp) == NULL)
239-
return NULL;
240-
241-
len = strlen(buf);
242-
243-
/* Make sure the input is valid UTF-8 */
244-
(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
245-
246-
/* And convert */
247-
recoded = pg_any_to_server(buf, len, PG_UTF8);
248-
if (recoded == buf)
249-
{
250-
/*
251-
* conversion didn't pstrdup, so we must. We can use the length of the
252-
* original string, because no conversion was done.
253-
*/
254-
recoded = pnstrdup(recoded, len);
255-
}
256-
257-
return recoded;
258-
}
259-
260238
/*
261239
* lowerstr --- fold null-terminated string to lower case
262240
*

src/include/tsearch/ts_locale.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <ctype.h>
1616
#include <limits.h>
1717

18+
#include "lib/stringinfo.h"
1819
#include "mb/pg_wchar.h"
1920
#include "utils/pg_locale.h"
2021

@@ -33,7 +34,9 @@ typedef struct
3334
FILE *fp;
3435
const char *filename;
3536
int lineno;
36-
char *curline;
37+
StringInfoData buf; /* current input line, in UTF-8 */
38+
char *curline; /* current input line, in DB's encoding */
39+
/* curline may be NULL, or equal to buf.data, or a palloc'd string */
3740
ErrorContextCallback cb;
3841
} tsearch_readline_state;
3942

@@ -57,6 +60,4 @@ extern bool tsearch_readline_begin(tsearch_readline_state *stp,
5760
extern char *tsearch_readline(tsearch_readline_state *stp);
5861
extern void tsearch_readline_end(tsearch_readline_state *stp);
5962

60-
extern char *t_readline(FILE *fp);
61-
6263
#endif /* __TSLOCALE_H__ */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy