Skip to content

Commit 029dea8

Browse files
committed
Fix ts_headline() edge cases for empty query and empty search text.
tsquery's GETQUERY() macro is only safe to apply to a tsquery that is known non-empty; otherwise it gives a pointer to garbage. Before commit 5a617d7, ts_headline() avoided this pitfall, but only in a very indirect, nonobvious way. (hlCover could not reach its TS_execute call, because if the query contains no lexemes then hlFirstIndex would surely return -1.) After that commit, it fell into the trap, resulting in weird errors such as "unrecognized operator" and/or valgrind complaints. In HEAD, fix this by not calling TS_execute_locations() at all for an empty query. In the back branches, add a defensive check to hlCover() --- that's not fixing any live bug, but I judge the code a bit too fragile as-is. Also, both mark_hl_fragments() and mark_hl_words() were careless about the possibility of empty search text: in the cases where no match has been found, they'd end up telling mark_fragment() to mark from word indexes 0 to 0 inclusive, even when there is no word 0. This is harmless since we over-allocated the prs->words array, but it does annoy valgrind. Fix so that the end index is -1 and thus mark_fragment() will do nothing in such cases. Bottom line is that this fixes a live bug in HEAD, but in the back branches it's only getting rid of a valgrind nitpick. Back-patch anyway. Per report from Alexander Lakhin. Discussion: https://postgr.es/m/c27f642d-020b-01ff-ae61-086af287c4fd@gmail.com
1 parent 18103b7 commit 029dea8

File tree

3 files changed

+41
-7
lines changed

3 files changed

+41
-7
lines changed

src/backend/tsearch/wparser_def.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2417,7 +2417,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations,
24172417
/* show the first min_words words if we have not marked anything */
24182418
if (num_f <= 0)
24192419
{
2420-
startpos = endpos = curlen = 0;
2420+
startpos = curlen = 0;
2421+
endpos = -1;
24212422
for (i = 0; i < prs->curwords && curlen < min_words; i++)
24222423
{
24232424
if (!NONWORDTOKEN(prs->words[i].type))
@@ -2571,7 +2572,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations,
25712572
if (bestlen < 0)
25722573
{
25732574
curlen = 0;
2574-
pose = 0;
2575+
pose = -1;
25752576
for (i = 0; i < prs->curwords && curlen < min_words; i++)
25762577
{
25772578
if (!NONWORDTOKEN(prs->words[i].type))
@@ -2601,7 +2602,6 @@ prsd_headline(PG_FUNCTION_ARGS)
26012602
HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
26022603
List *prsoptions = (List *) PG_GETARG_POINTER(1);
26032604
TSQuery query = PG_GETARG_TSQUERY(2);
2604-
hlCheck ch;
26052605
List *locations;
26062606

26072607
/* default option values: */
@@ -2671,10 +2671,17 @@ prsd_headline(PG_FUNCTION_ARGS)
26712671
}
26722672

26732673
/* Locate words and phrases matching the query */
2674-
ch.words = prs->words;
2675-
ch.len = prs->curwords;
2676-
locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
2677-
checkcondition_HL);
2674+
if (query->size > 0)
2675+
{
2676+
hlCheck ch;
2677+
2678+
ch.words = prs->words;
2679+
ch.len = prs->curwords;
2680+
locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
2681+
checkcondition_HL);
2682+
}
2683+
else
2684+
locations = NIL; /* empty query matches nothing */
26782685

26792686
/* Apply appropriate headline selector */
26802687
if (max_fragments == 0)

src/test/regress/expected/tsearch.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2127,6 +2127,27 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
21272127
<b>Lorem</b> ipsum <b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>
21282128
(1 row)
21292129

2130+
-- Edge cases with empty query
2131+
SELECT ts_headline('english',
2132+
'', ''::tsquery);
2133+
NOTICE: text-search query doesn't contain lexemes: ""
2134+
LINE 2: '', ''::tsquery);
2135+
^
2136+
ts_headline
2137+
-------------
2138+
2139+
(1 row)
2140+
2141+
SELECT ts_headline('english',
2142+
'foo bar', ''::tsquery);
2143+
NOTICE: text-search query doesn't contain lexemes: ""
2144+
LINE 2: 'foo bar', ''::tsquery);
2145+
^
2146+
ts_headline
2147+
-------------
2148+
foo bar
2149+
(1 row)
2150+
21302151
--Rewrite sub system
21312152
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
21322153
\set ECHO none

src/test/regress/sql/tsearch.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,12 @@ SELECT ts_headline('english',
640640
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
641641
'MaxFragments=100, MaxWords=100, MinWords=1');
642642

643+
-- Edge cases with empty query
644+
SELECT ts_headline('english',
645+
'', ''::tsquery);
646+
SELECT ts_headline('english',
647+
'foo bar', ''::tsquery);
648+
643649
--Rewrite sub system
644650

645651
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy