Skip to content

Commit 710d90d

Browse files
committed
Add prefix operator for TEXT type.
The prefix operator along with SP-GiST indexes can be used as an alternative for LIKE 'word%' commands and it doesn't have a limitation of string/prefix length as B-Tree has. Bump catalog version Author: Ildus Kurbangaliev with some editorization by me Review by: Arthur Zakirov, Alexander Korotkov, and me Discussion: https://www.postgresql.org/message-id/flat/20180202180327.222b04b3@wp.localdomain
1 parent 4ab2999 commit 710d90d

File tree

14 files changed

+189
-9
lines changed

14 files changed

+189
-9
lines changed

doc/src/sgml/func.sgml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2274,6 +2274,21 @@
22742274
<entry><literal>ph</literal></entry>
22752275
</row>
22762276

2277+
<row>
2278+
<entry>
2279+
<indexterm>
2280+
<primary>starts_with</primary>
2281+
</indexterm>
2282+
<literal><function>starts_with(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
2283+
</entry>
2284+
<entry><type>bool</type></entry>
2285+
<entry>
2286+
Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
2287+
</entry>
2288+
<entry><literal>starts_with('alphabet', 'alph')</literal></entry>
2289+
<entry><literal>t</literal></entry>
2290+
</row>
2291+
22772292
<row>
22782293
<entry>
22792294
<indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
40334048
ILIKE</function>, respectively. All of these operators are
40344049
<productname>PostgreSQL</productname>-specific.
40354050
</para>
4051+
4052+
<para>
4053+
There is also the prefix operator <literal>^@</literal> and corresponding
4054+
<function>starts_with</function> function which covers cases when only
4055+
searching by beginning of the string is needed.
4056+
</para>
40364057
</sect2>
40374058

40384059

doc/src/sgml/spgist.sgml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@
161161
<literal>~&lt;~</literal>
162162
<literal>~&gt;=~</literal>
163163
<literal>~&gt;~</literal>
164+
<literal>^@</literal>
164165
</entry>
165166
</row>
166167
<row>

src/backend/access/spgist/spgtextproc.c

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,20 @@
6767
*/
6868
#define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
6969

70+
/*
71+
* Strategy for collation aware operator on text is equal to btree strategy
72+
* plus value of 10.
73+
*
74+
* Current collation aware strategies and their corresponding btree strategies:
75+
* 11 BTLessStrategyNumber
76+
* 12 BTLessEqualStrategyNumber
77+
* 14 BTGreaterEqualStrategyNumber
78+
* 15 BTGreaterStrategyNumber
79+
*/
80+
#define SPG_STRATEGY_ADDITION (10)
81+
#define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
82+
&& (s) != RTPrefixStrategyNumber)
83+
7084
/* Struct for sorting values in picksplit */
7185
typedef struct spgNodePtr
7286
{
@@ -496,10 +510,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
496510
* well end with a partial multibyte character, so that applying
497511
* any encoding-sensitive test to it would be risky anyhow.)
498512
*/
499-
if (strategy > 10)
513+
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
500514
{
501515
if (collate_is_c)
502-
strategy -= 10;
516+
strategy -= SPG_STRATEGY_ADDITION;
503517
else
504518
continue;
505519
}
@@ -526,6 +540,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
526540
if (r < 0)
527541
res = false;
528542
break;
543+
case RTPrefixStrategyNumber:
544+
if (r != 0)
545+
res = false;
546+
break;
529547
default:
530548
elog(ERROR, "unrecognized strategy number: %d",
531549
in->scankeys[j].sk_strategy);
@@ -605,10 +623,27 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
605623
int queryLen = VARSIZE_ANY_EXHDR(query);
606624
int r;
607625

608-
if (strategy > 10)
626+
if (strategy == RTPrefixStrategyNumber)
627+
{
628+
/*
629+
* if level >= length of query then reconstrValue is began with
630+
* query (prefix) string and we don't need to check it again.
631+
*/
632+
633+
res = (level >= queryLen) ||
634+
DatumGetBool(DirectFunctionCall2(text_starts_with,
635+
out->leafValue, PointerGetDatum(query)));
636+
637+
if (!res) /* no need to consider remaining conditions */
638+
break;
639+
640+
continue;
641+
}
642+
643+
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
609644
{
610645
/* Collation-aware comparison */
611-
strategy -= 10;
646+
strategy -= SPG_STRATEGY_ADDITION;
612647

613648
/* If asserts enabled, verify encoding of reconstructed string */
614649
Assert(pg_verifymbstr(fullValue, fullLen, false));

src/backend/utils/adt/selfuncs.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,6 +1488,16 @@ likesel(PG_FUNCTION_ARGS)
14881488
}
14891489

14901490
/*
1491+
* prefixsel - selectivity of prefix operator
1492+
*/
1493+
Datum
1494+
prefixsel(PG_FUNCTION_ARGS)
1495+
{
1496+
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
1497+
}
1498+
1499+
/*
1500+
*
14911501
* iclikesel - Selectivity of ILIKE pattern match.
14921502
*/
14931503
Datum
@@ -2906,6 +2916,15 @@ likejoinsel(PG_FUNCTION_ARGS)
29062916
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
29072917
}
29082918

2919+
/*
2920+
* prefixjoinsel - Join selectivity of prefix operator
2921+
*/
2922+
Datum
2923+
prefixjoinsel(PG_FUNCTION_ARGS)
2924+
{
2925+
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
2926+
}
2927+
29092928
/*
29102929
* iclikejoinsel - Join selectivity of ILIKE pattern match.
29112930
*/
@@ -5947,6 +5966,20 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
59475966
result = regex_fixed_prefix(patt, true, collation,
59485967
prefix, rest_selec);
59495968
break;
5969+
case Pattern_Type_Prefix:
5970+
/* Prefix type work is trivial. */
5971+
result = Pattern_Prefix_Partial;
5972+
*rest_selec = 1.0; /* all */
5973+
*prefix = makeConst(patt->consttype,
5974+
patt->consttypmod,
5975+
patt->constcollid,
5976+
patt->constlen,
5977+
datumCopy(patt->constvalue,
5978+
patt->constbyval,
5979+
patt->constlen),
5980+
patt->constisnull,
5981+
patt->constbyval);
5982+
break;
59505983
default:
59515984
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
59525985
result = Pattern_Prefix_None; /* keep compiler quiet */

src/backend/utils/adt/varlena.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
17611761
PG_RETURN_BOOL(result);
17621762
}
17631763

1764+
Datum
1765+
text_starts_with(PG_FUNCTION_ARGS)
1766+
{
1767+
Datum arg1 = PG_GETARG_DATUM(0);
1768+
Datum arg2 = PG_GETARG_DATUM(1);
1769+
bool result;
1770+
Size len1,
1771+
len2;
1772+
1773+
len1 = toast_raw_datum_size(arg1);
1774+
len2 = toast_raw_datum_size(arg2);
1775+
if (len2 > len1)
1776+
result = false;
1777+
else
1778+
{
1779+
text *targ1 = DatumGetTextPP(arg1);
1780+
text *targ2 = DatumGetTextPP(arg2);
1781+
1782+
result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1783+
VARSIZE_ANY_EXHDR(targ2)) == 0);
1784+
1785+
PG_FREE_IF_COPY(targ1, 0);
1786+
PG_FREE_IF_COPY(targ2, 1);
1787+
}
1788+
1789+
PG_RETURN_BOOL(result);
1790+
}
1791+
17641792
Datum
17651793
bttextcmp(PG_FUNCTION_ARGS)
17661794
{

src/include/access/stratnum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
6868
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
6969
#define RTSuperStrategyNumber 26 /* for inet << */
7070
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
71+
#define RTPrefixStrategyNumber 28 /* for text ^@ */
7172

72-
#define RTMaxStrategyNumber 27
73+
#define RTMaxStrategyNumber 28
7374

7475

7576
#endif /* STRATNUM_H */

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 201803311
56+
#define CATALOG_VERSION_NO 201804031
5757

5858
#endif

src/include/catalog/pg_amop.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,7 @@ DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
799799
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
800800
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
801801
DATA(insert ( 4017 25 25 15 s 666 4000 0 ));
802+
DATA(insert ( 4017 25 25 28 s 3877 4000 0 ));
802803

803804
/*
804805
* btree jsonb_ops

src/include/catalog/pg_operator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ DESCR("less than");
134134
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
135135
DESCR("equal");
136136
#define TextEqualOperator 98
137+
DATA(insert OID = 3877 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 starts_with prefixsel prefixjoinsel ));
138+
DESCR("starts with");
137139

138140
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
139141
DESCR("append element onto end of array");

src/include/catalog/pg_proc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
209209
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
210210
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
211211
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
212+
DATA(insert OID = 3696 ( starts_with PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_starts_with _null_ _null_ _null_ ));
212213
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
213214
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
214215
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2584,6 +2585,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
25842585
DESCR("join selectivity of NOT LIKE");
25852586
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
25862587
DESCR("join selectivity of case-insensitive regex non-match");
2588+
DATA(insert OID = 3437 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
2589+
DESCR("restriction selectivity of exact prefix");
2590+
DATA(insert OID = 3438 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
2591+
DESCR("join selectivity of exact prefix");
25872592

25882593
/* Aggregate-related functions */
25892594
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));

src/include/utils/selfuncs.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,11 @@ typedef struct VariableStatData
8787

8888
typedef enum
8989
{
90-
Pattern_Type_Like, Pattern_Type_Like_IC,
91-
Pattern_Type_Regex, Pattern_Type_Regex_IC
90+
Pattern_Type_Like,
91+
Pattern_Type_Like_IC,
92+
Pattern_Type_Regex,
93+
Pattern_Type_Regex_IC,
94+
Pattern_Type_Prefix
9295
} Pattern_Type;
9396

9497
typedef enum

src/test/regress/expected/create_index.out

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
372372
48
373373
(1 row)
374374

375+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
376+
count
377+
-------
378+
2
379+
(1 row)
380+
375381
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
376382
f1
377383
-------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
11821188
48
11831189
(1 row)
11841190

1191+
EXPLAIN (COSTS OFF)
1192+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
1193+
QUERY PLAN
1194+
------------------------------------------------------------
1195+
Aggregate
1196+
-> Index Only Scan using sp_radix_ind on radix_text_tbl
1197+
Index Cond: (t ^@ 'Worth'::text)
1198+
(3 rows)
1199+
1200+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
1201+
count
1202+
-------
1203+
2
1204+
(1 row)
1205+
11851206
EXPLAIN (COSTS OFF)
11861207
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
11871208
QUERY PLAN
@@ -1763,6 +1784,23 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
17631784
48
17641785
(1 row)
17651786

1787+
EXPLAIN (COSTS OFF)
1788+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
1789+
QUERY PLAN
1790+
------------------------------------------------
1791+
Aggregate
1792+
-> Bitmap Heap Scan on radix_text_tbl
1793+
Recheck Cond: (t ^@ 'Worth'::text)
1794+
-> Bitmap Index Scan on sp_radix_ind
1795+
Index Cond: (t ^@ 'Worth'::text)
1796+
(5 rows)
1797+
1798+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
1799+
count
1800+
-------
1801+
2
1802+
(1 row)
1803+
17661804
RESET enable_seqscan;
17671805
RESET enable_indexscan;
17681806
RESET enable_bitmapscan;

src/test/regress/expected/opr_sanity.out

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ sha224(bytea)
718718
sha256(bytea)
719719
sha384(bytea)
720720
sha512(bytea)
721+
starts_with(text,text)
721722
macaddr8_eq(macaddr8,macaddr8)
722723
macaddr8_lt(macaddr8,macaddr8)
723724
macaddr8_le(macaddr8,macaddr8)
@@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3;
18871888
4000 | 25 | <<=
18881889
4000 | 26 | >>
18891890
4000 | 27 | >>=
1890-
(121 rows)
1891+
4000 | 28 | ^@
1892+
(122 rows)
18911893

18921894
-- Check that all opclass search operators have selectivity estimators.
18931895
-- This is not absolutely required, but it seems a reasonable thing

src/test/regress/sql/create_index.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth
224224

225225
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
226226

227+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
228+
227229
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
228230

229231
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
441443
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
442444
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
443445

446+
EXPLAIN (COSTS OFF)
447+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
448+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
449+
444450
EXPLAIN (COSTS OFF)
445451
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
446452
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
@@ -578,6 +584,10 @@ EXPLAIN (COSTS OFF)
578584
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
579585
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
580586

587+
EXPLAIN (COSTS OFF)
588+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
589+
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
590+
581591
RESET enable_seqscan;
582592
RESET enable_indexscan;
583593
RESET enable_bitmapscan;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy