Skip to content

Commit 710d90d

Browse files
committed
Add prefix operator for TEXT type.
The prefix operator along with SP-GiST indexes can be used as an alternative for LIKE 'word%' commands and it doesn't have a limitation of string/prefix length as B-Tree has. Bump catalog version Author: Ildus Kurbangaliev with some editorization by me Review by: Arthur Zakirov, Alexander Korotkov, and me Discussion: https://www.postgresql.org/message-id/flat/20180202180327.222b04b3@wp.localdomain
1 parent 4ab2999 commit 710d90d

File tree

14 files changed

+189
-9
lines changed

14 files changed

+189
-9
lines changed

doc/src/sgml/func.sgml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2274,6 +2274,21 @@
22742274
<entry><literal>ph</literal></entry>
22752275
</row>
22762276

2277+
<row>
2278+
<entry>
2279+
<indexterm>
2280+
<primary>starts_with</primary>
2281+
</indexterm>
2282+
<literal><function>starts_with(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
2283+
</entry>
2284+
<entry><type>bool</type></entry>
2285+
<entry>
2286+
Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
2287+
</entry>
2288+
<entry><literal>starts_with('alphabet', 'alph')</literal></entry>
2289+
<entry><literal>t</literal></entry>
2290+
</row>
2291+
22772292
<row>
22782293
<entry>
22792294
<indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
40334048
ILIKE</function>, respectively. All of these operators are
40344049
<productname>PostgreSQL</productname>-specific.
40354050
</para>
4051+
4052+
<para>
4053+
There is also the prefix operator <literal>^@</literal> and corresponding
4054+
<function>starts_with</function> function which covers cases when only
4055+
searching by beginning of the string is needed.
4056+
</para>
40364057
</sect2>
40374058

40384059

doc/src/sgml/spgist.sgml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@
161161
<literal>~&lt;~</literal>
162162
<literal>~&gt;=~</literal>
163163
<literal>~&gt;~</literal>
164+
<literal>^@</literal>
164165
</entry>
165166
</row>
166167
<row>

src/backend/access/spgist/spgtextproc.c

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,20 @@
6767
*/
6868
#define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
6969

70+
/*
71+
* Strategy for collation aware operator on text is equal to btree strategy
72+
* plus value of 10.
73+
*
74+
* Current collation aware strategies and their corresponding btree strategies:
75+
* 11 BTLessStrategyNumber
76+
* 12 BTLessEqualStrategyNumber
77+
* 14 BTGreaterEqualStrategyNumber
78+
* 15 BTGreaterStrategyNumber
79+
*/
80+
#define SPG_STRATEGY_ADDITION (10)
81+
#define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
82+
&& (s) != RTPrefixStrategyNumber)
83+
7084
/* Struct for sorting values in picksplit */
7185
typedef struct spgNodePtr
7286
{
@@ -496,10 +510,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
496510
* well end with a partial multibyte character, so that applying
497511
* any encoding-sensitive test to it would be risky anyhow.)
498512
*/
499-
if (strategy > 10)
513+
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
500514
{
501515
if (collate_is_c)
502-
strategy -= 10;
516+
strategy -= SPG_STRATEGY_ADDITION;
503517
else
504518
continue;
505519
}
@@ -526,6 +540,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
526540
if (r < 0)
527541
res = false;
528542
break;
543+
case RTPrefixStrategyNumber:
544+
if (r != 0)
545+
res = false;
546+
break;
529547
default:
530548
elog(ERROR, "unrecognized strategy number: %d",
531549
in->scankeys[j].sk_strategy);
@@ -605,10 +623,27 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
605623
int queryLen = VARSIZE_ANY_EXHDR(query);
606624
int r;
607625

608-
if (strategy > 10)
626+
if (strategy == RTPrefixStrategyNumber)
627+
{
628+
/*
629+
* if level >= length of query then reconstrValue is began with
630+
* query (prefix) string and we don't need to check it again.
631+
*/
632+
633+
res = (level >= queryLen) ||
634+
DatumGetBool(DirectFunctionCall2(text_starts_with,
635+
out->leafValue, PointerGetDatum(query)));
636+
637+
if (!res) /* no need to consider remaining conditions */
638+
break;
639+
640+
continue;
641+
}
642+
643+
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
609644
{
610645
/* Collation-aware comparison */
611-
strategy -= 10;
646+
strategy -= SPG_STRATEGY_ADDITION;
612647

613648
/* If asserts enabled, verify encoding of reconstructed string */
614649
Assert(pg_verifymbstr(fullValue, fullLen, false));

src/backend/utils/adt/selfuncs.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,6 +1488,16 @@ likesel(PG_FUNCTION_ARGS)
14881488
}
14891489

14901490
/*
1491+
* prefixsel - selectivity of prefix operator
1492+
*/
1493+
Datum
1494+
prefixsel(PG_FUNCTION_ARGS)
1495+
{
1496+
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
1497+
}
1498+
1499+
/*
1500+
*
14911501
* iclikesel - Selectivity of ILIKE pattern match.
14921502
*/
14931503
Datum
@@ -2906,6 +2916,15 @@ likejoinsel(PG_FUNCTION_ARGS)
29062916
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
29072917
}
29082918

2919+
/*
2920+
* prefixjoinsel - Join selectivity of prefix operator
2921+
*/
2922+
Datum
2923+
prefixjoinsel(PG_FUNCTION_ARGS)
2924+
{
2925+
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
2926+
}
2927+
29092928
/*
29102929
* iclikejoinsel - Join selectivity of ILIKE pattern match.
29112930
*/
@@ -5947,6 +5966,20 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
59475966
result = regex_fixed_prefix(patt, true, collation,
59485967
prefix, rest_selec);
59495968
break;
5969+
case Pattern_Type_Prefix:
5970+
/* Prefix type work is trivial. */
5971+
result = Pattern_Prefix_Partial;
5972+
*rest_selec = 1.0; /* all */
5973+
*prefix = makeConst(patt->consttype,
5974+
patt->consttypmod,
5975+
patt->constcollid,
5976+
patt->constlen,
5977+
datumCopy(patt->constvalue,
5978+
patt->constbyval,
5979+
patt->constlen),
5980+
patt->constisnull,
5981+
patt->constbyval);
5982+
break;
59505983
default:
59515984
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
59525985
result = Pattern_Prefix_None; /* keep compiler quiet */

src/backend/utils/adt/varlena.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
17611761
PG_RETURN_BOOL(result);
17621762
}
17631763

1764+
Datum
1765+
text_starts_with(PG_FUNCTION_ARGS)
1766+
{
1767+
Datum arg1 = PG_GETARG_DATUM(0);
1768+
Datum arg2 = PG_GETARG_DATUM(1);
1769+
bool result;
1770+
Size len1,
1771+
len2;
1772+
1773+
len1 = toast_raw_datum_size(arg1);
1774+
len2 = toast_raw_datum_size(arg2);
1775+
if (len2 > len1)
1776+
result = false;
1777+
else
1778+
{
1779+
text *targ1 = DatumGetTextPP(arg1);
1780+
text *targ2 = DatumGetTextPP(arg2);
1781+
1782+
result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1783+
VARSIZE_ANY_EXHDR(targ2)) == 0);
1784+
1785+
PG_FREE_IF_COPY(targ1, 0);
1786+
PG_FREE_IF_COPY(targ2, 1);
1787+
}
1788+
1789+
PG_RETURN_BOOL(result);
1790+
}
1791+
17641792
Datum
17651793
bttextcmp(PG_FUNCTION_ARGS)
17661794
{

src/include/access/stratnum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
6868
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
6969
#define RTSuperStrategyNumber 26 /* for inet << */
7070
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
71+
#define RTPrefixStrategyNumber 28 /* for text ^@ */
7172

72-
#define RTMaxStrategyNumber 27
73+
#define RTMaxStrategyNumber 28
7374

7475

7576
#endif /* STRATNUM_H */

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 201803311
56+
#define CATALOG_VERSION_NO 201804031
5757

5858
#endif

src/include/catalog/pg_amop.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,7 @@ DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
799799
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
800800
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
801801
DATA(insert ( 4017 25 25 15 s 666 4000 0 ));
802+
DATA(insert ( 4017 25 25 28 s 3877 4000 0 ));
802803

803804
/*
804805
* btree jsonb_ops

src/include/catalog/pg_operator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ DESCR("less than");
134134
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
135135
DESCR("equal");
136136
#define TextEqualOperator 98
137+
DATA(insert OID = 3877 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 starts_with prefixsel prefixjoinsel ));
138+
DESCR("starts with");
137139

138140
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
139141
DESCR("append element onto end of array");

src/include/catalog/pg_proc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
209209
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
210210
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
211211
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
212+
DATA(insert OID = 3696 ( starts_with PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_starts_with _null_ _null_ _null_ ));
212213
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
213214
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
214215
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2584,6 +2585,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
25842585
DESCR("join selectivity of NOT LIKE");
25852586
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
25862587
DESCR("join selectivity of case-insensitive regex non-match");
2588+
DATA(insert OID = 3437 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
2589+
DESCR("restriction selectivity of exact prefix");
2590+
DATA(insert OID = 3438 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
2591+
DESCR("join selectivity of exact prefix");
25872592

25882593
/* Aggregate-related functions */
25892594
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy