Skip to content

Commit bb14050

Browse files
committed
Phrase full text search.
Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery. On-disk and binary in/out format of tsquery are backward compatible. It has two side effect: - change order for tsquery, so, users, who has a btree index over tsquery, should reindex it - less number of parenthesis in tsquery output, and tsquery becomes more readable Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov Reviewers: Alexander Korotkov, Artur Zakirov
1 parent 015e889 commit bb14050

30 files changed

+2536
-444
lines changed

contrib/tsearch2/expected/tsearch2.out

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery;
278278
(1 row)
279279

280280
SELECT '1|(2|(4|(5|6)))'::tsquery;
281-
tsquery
282-
-----------------------------------------
283-
'1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
281+
tsquery
282+
-----------------------------
283+
'1' | '2' | '4' | '5' | '6'
284284
(1 row)
285285

286286
SELECT '1|2|4|5|6'::tsquery;
287-
tsquery
288-
-----------------------------------------
289-
( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
287+
tsquery
288+
-----------------------------
289+
'1' | '2' | '4' | '5' | '6'
290290
(1 row)
291291

292292
SELECT '1&(2&(4&(5&6)))'::tsquery;
@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery;
340340
select 'a | f' < 'b & c'::tsquery;
341341
?column?
342342
----------
343-
t
343+
f
344344
(1 row)
345345

346346
select 'a | ff' < 'b & c'::tsquery;
@@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword > 'new & york';
443443

444444
set enable_seqscan=on;
445445
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
446-
rewrite
447-
----------------------------------------------------------------------------------
448-
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
446+
rewrite
447+
------------------------------------------------------------------------------
448+
'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
449449
(1 row)
450450

451451
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
@@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex
461461
(1 row)
462462

463463
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
464-
rewrite
465-
-------------------------------------------------------------------------------------
466-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
464+
rewrite
465+
---------------------------------------------------------------------------------
466+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
467467
(1 row)
468468

469469
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
@@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
479479
(1 row)
480480

481481
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
482-
rewrite
483-
-------------------------------------------------------------------------------------
484-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
482+
rewrite
483+
---------------------------------------------------------------------------------
484+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
485485
(1 row)
486486

487487
select keyword from test_tsquery where keyword @> 'new';
@@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
520520
(1 row)
521521

522522
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
523-
rewrite
524-
-------------------------------------------------------------------------------------
525-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
523+
rewrite
524+
---------------------------------------------------------------------------------
525+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
526526
(1 row)
527527

528528
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
538538
(1 row)
539539

540540
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
541-
rewrite
542-
-------------------------------------------------------------------------------------
543-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
541+
rewrite
542+
---------------------------------------------------------------------------------
543+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
544544
(1 row)
545545

546546
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
@@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
581581
(1 row)
582582

583583
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
584-
rewrite
585-
-------------------------------------------------------------------------------------
586-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
584+
rewrite
585+
---------------------------------------------------------------------------------
586+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
587587
(1 row)
588588

589589
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
599599
(1 row)
600600

601601
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
602-
rewrite
603-
-------------------------------------------------------------------------------------
604-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
602+
rewrite
603+
---------------------------------------------------------------------------------
604+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
605605
(1 row)
606606

607607
set enable_seqscan='on';

doc/src/sgml/datatype.sgml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3924,8 +3924,9 @@ SELECT to_tsvector('english', 'The Fat Rats');
39243924
<para>
39253925
A <type>tsquery</type> value stores lexemes that are to be
39263926
searched for, and combines them honoring the Boolean operators
3927-
<literal>&amp;</literal> (AND), <literal>|</literal> (OR), and
3928-
<literal>!</> (NOT). Parentheses can be used to enforce grouping
3927+
<literal>&amp;</literal> (AND), <literal>|</literal> (OR),
3928+
<literal>!</> (NOT) and <literal>&lt;-&gt;</> (FOLLOWED BY) phrase search
3929+
operator. Parentheses can be used to enforce grouping
39293930
of the operators:
39303931

39313932
<programlisting>
@@ -3946,8 +3947,8 @@ SELECT 'fat &amp; rat &amp; ! cat'::tsquery;
39463947
</programlisting>
39473948

39483949
In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
3949-
and <literal>&amp;</literal> (AND) binds more tightly than
3950-
<literal>|</literal> (OR).
3950+
and <literal>&amp;</literal> (AND) and <literal>&lt;-&gt;</literal> (FOLLOWED BY)
3951+
both bind more tightly than <literal>|</literal> (OR).
39513952
</para>
39523953

39533954
<para>

doc/src/sgml/func.sgml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9127,6 +9127,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
91279127
<entry><literal>!! 'cat'::tsquery</literal></entry>
91289128
<entry><literal>!'cat'</literal></entry>
91299129
</row>
9130+
<row>
9131+
<entry> <literal>&lt;-&gt;</literal> </entry>
9132+
<entry><type>tsquery</> followed by <type>tsquery</></entry>
9133+
<entry><literal>to_tsquery('fat') &lt;-&gt; to_tsquery('rat')</literal></entry>
9134+
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
9135+
</row>
91309136
<row>
91319137
<entry> <literal>@&gt;</literal> </entry>
91329138
<entry><type>tsquery</> contains another ?</entry>
@@ -9219,6 +9225,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
92199225
<entry><literal>plainto_tsquery('english', 'The Fat Rats')</literal></entry>
92209226
<entry><literal>'fat' &amp; 'rat'</literal></entry>
92219227
</row>
9228+
<row>
9229+
<entry>
9230+
<indexterm>
9231+
<primary>phraseto_tsquery</primary>
9232+
</indexterm>
9233+
<literal><function>phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal>
9234+
</entry>
9235+
<entry><type>tsquery</type></entry>
9236+
<entry>produce <type>tsquery</> ignoring punctuation</entry>
9237+
<entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
9238+
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
9239+
</row>
92229240
<row>
92239241
<entry>
92249242
<indexterm>
@@ -9421,6 +9439,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
94219439
<entry><literal>SELECT ts_rewrite('a &amp; b'::tsquery, 'SELECT t,s FROM aliases')</literal></entry>
94229440
<entry><literal>'b' &amp; ( 'foo' | 'bar' )</literal></entry>
94239441
</row>
9442+
<row>
9443+
<entry>
9444+
<indexterm>
9445+
<primary>tsquery_phrase</primary>
9446+
</indexterm>
9447+
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal>
9448+
</entry>
9449+
<entry><type>tsquery</type></entry>
9450+
<entry>implementation of <literal>&lt;-&gt;</> (FOLLOWED BY) operator</entry>
9451+
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry>
9452+
<entry><literal>'fat' &lt;-&gt; 'cat'</literal></entry>
9453+
</row>
9454+
<row>
9455+
<entry>
9456+
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">distance</replaceable> <type>integer</>)</function></literal>
9457+
</entry>
9458+
<entry><type>tsquery</type></entry>
9459+
<entry>phrase-concatenate with distance</entry>
9460+
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry>
9461+
<entry><literal>'fat' &lt;10&gt; 'cat'</literal></entry>
9462+
</row>
94249463
<row>
94259464
<entry>
94269465
<indexterm>

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy