Skip to content

Commit 04a2c7f

Browse files
committed
Improve make_tsvector() to handle empty input, and simplify its callers.
It seemed a bit silly that each caller of make_tsvector() was laboriously special-casing the situation where no lexemes were found, when it would be easy and much more bullet-proof to make make_tsvector() handle that.
1 parent b4c6d31 commit 04a2c7f

File tree

2 files changed

+28
-61
lines changed

2 files changed

+28
-61
lines changed

src/backend/tsearch/to_tsany.c

Lines changed: 19 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ uniqueWORD(ParsedWord *a, int32 l)
149149

150150
/*
151151
* make value of tsvector, given parsed text
152+
*
153+
* Note: frees prs->words and subsidiary data.
152154
*/
153155
TSVector
154156
make_tsvector(ParsedText *prs)
@@ -162,7 +164,11 @@ make_tsvector(ParsedText *prs)
162164
char *str;
163165
int stroff;
164166

165-
prs->curwords = uniqueWORD(prs->words, prs->curwords);
167+
/* Merge duplicate words */
168+
if (prs->curwords > 0)
169+
prs->curwords = uniqueWORD(prs->words, prs->curwords);
170+
171+
/* Determine space needed */
166172
for (i = 0; i < prs->curwords; i++)
167173
{
168174
lenstr += prs->words[i].len;
@@ -217,7 +223,10 @@ make_tsvector(ParsedText *prs)
217223
ptr->haspos = 0;
218224
ptr++;
219225
}
220-
pfree(prs->words);
226+
227+
if (prs->words)
228+
pfree(prs->words);
229+
221230
return in;
222231
}
223232

@@ -231,26 +240,19 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
231240

232241
prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6; /* just estimation of word's
233242
* number */
234-
if (prs.lenwords == 0)
243+
if (prs.lenwords < 2)
235244
prs.lenwords = 2;
236245
prs.curwords = 0;
237246
prs.pos = 0;
238247
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
239248

240249
parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
250+
241251
PG_FREE_IF_COPY(in, 1);
242252

243-
if (prs.curwords)
244-
out = make_tsvector(&prs);
245-
else
246-
{
247-
pfree(prs.words);
248-
out = palloc(CALCDATASIZE(0, 0));
249-
SET_VARSIZE(out, CALCDATASIZE(0, 0));
250-
out->size = 0;
251-
}
253+
out = make_tsvector(&prs);
252254

253-
PG_RETURN_POINTER(out);
255+
PG_RETURN_TSVECTOR(out);
254256
}
255257

256258
Datum
@@ -281,21 +283,10 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
281283

282284
iterate_jsonb_string_values(jb, &state, add_to_tsvector);
283285

284-
if (prs.curwords > 0)
285-
result = make_tsvector(&prs);
286-
else
287-
{
288-
/*
289-
* There weren't any string elements in jsonb, so we need to return an
290-
* empty vector
291-
*/
292-
result = palloc(CALCDATASIZE(0, 0));
293-
SET_VARSIZE(result, CALCDATASIZE(0, 0));
294-
result->size = 0;
295-
}
296-
297286
PG_FREE_IF_COPY(jb, 1);
298287

288+
result = make_tsvector(&prs);
289+
299290
PG_RETURN_TSVECTOR(result);
300291
}
301292

@@ -327,21 +318,10 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
327318

328319
iterate_json_string_values(json, &state, add_to_tsvector);
329320

330-
if (prs.curwords > 0)
331-
result = make_tsvector(&prs);
332-
else
333-
{
334-
/*
335-
* There weren't any string elements in json, so we need to return an
336-
* empty vector
337-
*/
338-
result = palloc(CALCDATASIZE(0, 0));
339-
SET_VARSIZE(result, CALCDATASIZE(0, 0));
340-
result->size = 0;
341-
}
342-
343321
PG_FREE_IF_COPY(json, 1);
344322

323+
result = make_tsvector(&prs);
324+
345325
PG_RETURN_TSVECTOR(result);
346326
}
347327

src/backend/utils/adt/tsvector_op.c

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2579,28 +2579,15 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
25792579
}
25802580

25812581
/* make tsvector value */
2582-
if (prs.curwords)
2583-
{
2584-
datum = PointerGetDatum(make_tsvector(&prs));
2585-
isnull = false;
2586-
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2587-
1, &tsvector_attr_num,
2588-
&datum, &isnull);
2589-
pfree(DatumGetPointer(datum));
2590-
}
2591-
else
2592-
{
2593-
TSVector out = palloc(CALCDATASIZE(0, 0));
2594-
2595-
SET_VARSIZE(out, CALCDATASIZE(0, 0));
2596-
out->size = 0;
2597-
datum = PointerGetDatum(out);
2598-
isnull = false;
2599-
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2600-
1, &tsvector_attr_num,
2601-
&datum, &isnull);
2602-
pfree(prs.words);
2603-
}
2582+
datum = TSVectorGetDatum(make_tsvector(&prs));
2583+
isnull = false;
2584+
2585+
/* and insert it into tuple */
2586+
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2587+
1, &tsvector_attr_num,
2588+
&datum, &isnull);
2589+
2590+
pfree(DatumGetPointer(datum));
26042591

26052592
return PointerGetDatum(rettuple);
26062593
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy