Skip to content

Commit d8f3752

Browse files
committed
Generate double-sided LIKE indexquals that work even in weird locales,
by continuing to increment the rightmost character until we get a string that is demonstrably greater than the pattern prefix.
1 parent 5f68d5c commit d8f3752

File tree

1 file changed

+163
-45
lines changed

1 file changed

+163
-45
lines changed

src/backend/optimizer/path/indxpath.c

Lines changed: 163 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.74 1999/12/31 03:41:03 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.75 1999/12/31 05:38:25 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -23,6 +23,7 @@
2323
#include "catalog/pg_amop.h"
2424
#include "catalog/pg_operator.h"
2525
#include "executor/executor.h"
26+
#include "mb/pg_wchar.h"
2627
#include "nodes/makefuncs.h"
2728
#include "nodes/nodeFuncs.h"
2829
#include "optimizer/clauses.h"
@@ -92,7 +93,12 @@ static Prefix_Status regex_fixed_prefix(char *patt, bool case_insensitive,
9293
char **prefix);
9394
static List *prefix_quals(Var *leftop, Oid expr_op,
9495
char *prefix, Prefix_Status pstatus);
96+
static char *make_greater_string(const char * str, Oid datatype);
9597
static Oid find_operator(const char * opname, Oid datatype);
98+
static Datum string_to_datum(const char * str, Oid datatype);
99+
static Const *string_to_const(const char * str, Oid datatype);
100+
static bool string_lessthan(const char * str1, const char * str2,
101+
Oid datatype);
96102

97103

98104
/*
@@ -1653,31 +1659,31 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
16531659
case OID_TEXT_REGEXEQ_OP:
16541660
case OID_TEXT_ICREGEXEQ_OP:
16551661
if (! op_class(find_operator(">=", TEXTOID), opclass, relam) ||
1656-
! op_class(find_operator("<=", TEXTOID), opclass, relam))
1662+
! op_class(find_operator("<", TEXTOID), opclass, relam))
16571663
isIndexable = false;
16581664
break;
16591665

16601666
case OID_BPCHAR_LIKE_OP:
16611667
case OID_BPCHAR_REGEXEQ_OP:
16621668
case OID_BPCHAR_ICREGEXEQ_OP:
16631669
if (! op_class(find_operator(">=", BPCHAROID), opclass, relam) ||
1664-
! op_class(find_operator("<=", BPCHAROID), opclass, relam))
1670+
! op_class(find_operator("<", BPCHAROID), opclass, relam))
16651671
isIndexable = false;
16661672
break;
16671673

16681674
case OID_VARCHAR_LIKE_OP:
16691675
case OID_VARCHAR_REGEXEQ_OP:
16701676
case OID_VARCHAR_ICREGEXEQ_OP:
16711677
if (! op_class(find_operator(">=", VARCHAROID), opclass, relam) ||
1672-
! op_class(find_operator("<=", VARCHAROID), opclass, relam))
1678+
! op_class(find_operator("<", VARCHAROID), opclass, relam))
16731679
isIndexable = false;
16741680
break;
16751681

16761682
case OID_NAME_LIKE_OP:
16771683
case OID_NAME_REGEXEQ_OP:
16781684
case OID_NAME_ICREGEXEQ_OP:
16791685
if (! op_class(find_operator(">=", NAMEOID), opclass, relam) ||
1680-
! op_class(find_operator("<=", NAMEOID), opclass, relam))
1686+
! op_class(find_operator("<", NAMEOID), opclass, relam))
16811687
isIndexable = false;
16821688
break;
16831689
}
@@ -1774,7 +1780,7 @@ expand_indexqual_conditions(List *indexquals)
17741780

17751781
/*
17761782
* Extract the fixed prefix, if any, for a LIKE pattern.
1777-
* *prefix is set to a palloc'd prefix string with 1 spare byte,
1783+
* *prefix is set to a palloc'd prefix string,
17781784
* or to NULL if no fixed prefix exists for the pattern.
17791785
* The return value distinguishes no fixed prefix, a partial prefix,
17801786
* or an exact-match-only pattern.
@@ -1786,7 +1792,7 @@ like_fixed_prefix(char *patt, char **prefix)
17861792
int pos,
17871793
match_pos;
17881794

1789-
*prefix = match = palloc(strlen(patt)+2);
1795+
*prefix = match = palloc(strlen(patt)+1);
17901796
match_pos = 0;
17911797

17921798
for (pos = 0; patt[pos]; pos++)
@@ -1823,7 +1829,7 @@ like_fixed_prefix(char *patt, char **prefix)
18231829

18241830
/*
18251831
* Extract the fixed prefix, if any, for a regex pattern.
1826-
* *prefix is set to a palloc'd prefix string with 1 spare byte,
1832+
* *prefix is set to a palloc'd prefix string,
18271833
* or to NULL if no fixed prefix exists for the pattern.
18281834
* The return value distinguishes no fixed prefix, a partial prefix,
18291835
* or an exact-match-only pattern.
@@ -1858,7 +1864,7 @@ regex_fixed_prefix(char *patt, bool case_insensitive,
18581864
}
18591865

18601866
/* OK, allocate space for pattern */
1861-
*prefix = match = palloc(strlen(patt)+2);
1867+
*prefix = match = palloc(strlen(patt)+1);
18621868
match_pos = 0;
18631869

18641870
/* note start at pos 1 to skip leading ^ */
@@ -1906,11 +1912,10 @@ prefix_quals(Var *leftop, Oid expr_op,
19061912
List *result;
19071913
Oid datatype;
19081914
Oid oproid;
1909-
void *conval;
19101915
Const *con;
19111916
Oper *op;
19121917
Expr *expr;
1913-
int prefixlen;
1918+
char *greaterstr;
19141919

19151920
Assert(pstatus != Prefix_None);
19161921

@@ -1953,14 +1958,7 @@ prefix_quals(Var *leftop, Oid expr_op,
19531958
oproid = find_operator("=", datatype);
19541959
if (oproid == InvalidOid)
19551960
elog(ERROR, "prefix_quals: no = operator for type %u", datatype);
1956-
/* Note: we cheat a little by assuming that textin() will do for
1957-
* bpchar and varchar constants too...
1958-
*/
1959-
conval = (datatype == NAMEOID) ?
1960-
(void*) namein(prefix) : (void*) textin(prefix);
1961-
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
1962-
PointerGetDatum(conval),
1963-
false, false, false, false);
1961+
con = string_to_const(prefix, datatype);
19641962
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
19651963
expr = make_opclause(op, leftop, (Var *) con);
19661964
result = lcons(expr, NIL);
@@ -1975,43 +1973,92 @@ prefix_quals(Var *leftop, Oid expr_op,
19751973
oproid = find_operator(">=", datatype);
19761974
if (oproid == InvalidOid)
19771975
elog(ERROR, "prefix_quals: no >= operator for type %u", datatype);
1978-
conval = (datatype == NAMEOID) ?
1979-
(void*) namein(prefix) : (void*) textin(prefix);
1980-
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
1981-
PointerGetDatum(conval),
1982-
false, false, false, false);
1976+
con = string_to_const(prefix, datatype);
19831977
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
19841978
expr = make_opclause(op, leftop, (Var *) con);
19851979
result = lcons(expr, NIL);
19861980

19871981
/*
1988-
* In ASCII locale we say "x <= prefix\377". This does not
1989-
* work for non-ASCII collation orders, and it's not really
1990-
* right even for ASCII. FIX ME!
1991-
* Note we assume the passed prefix string is workspace with
1992-
* an extra byte, as created by the xxx_fixed_prefix routines above.
1982+
* If we can create a string larger than the prefix, say "x < greaterstr".
19931983
*/
1994-
#ifndef USE_LOCALE
1995-
prefixlen = strlen(prefix);
1996-
prefix[prefixlen] = '\377';
1997-
prefix[prefixlen+1] = '\0';
1984+
greaterstr = make_greater_string(prefix, datatype);
1985+
if (greaterstr)
1986+
{
1987+
oproid = find_operator("<", datatype);
1988+
if (oproid == InvalidOid)
1989+
elog(ERROR, "prefix_quals: no < operator for type %u", datatype);
1990+
con = string_to_const(greaterstr, datatype);
1991+
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
1992+
expr = make_opclause(op, leftop, (Var *) con);
1993+
result = lappend(result, expr);
1994+
pfree(greaterstr);
1995+
}
19981996

1999-
oproid = find_operator("<=", datatype);
2000-
if (oproid == InvalidOid)
2001-
elog(ERROR, "prefix_quals: no <= operator for type %u", datatype);
2002-
conval = (datatype == NAMEOID) ?
2003-
(void*) namein(prefix) : (void*) textin(prefix);
2004-
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
2005-
PointerGetDatum(conval),
2006-
false, false, false, false);
2007-
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
2008-
expr = make_opclause(op, leftop, (Var *) con);
2009-
result = lappend(result, expr);
1997+
return result;
1998+
}
1999+
2000+
/*
2001+
* Try to generate a string greater than the given string or any string it is
2002+
* a prefix of. If successful, return a palloc'd string; else return NULL.
2003+
*
2004+
* To work correctly in non-ASCII locales with weird collation orders,
2005+
* we cannot simply increment "foo" to "fop" --- we have to check whether
2006+
* we actually produced a string greater than the given one. If not,
2007+
* increment the righthand byte again and repeat. If we max out the righthand
2008+
* byte, truncate off the last character and start incrementing the next.
2009+
* For example, if "z" were the last character in the sort order, then we
2010+
* could produce "foo" as a string greater than "fonz".
2011+
*
2012+
* This could be rather slow in the worst case, but in most cases we won't
2013+
* have to try more than one or two strings before succeeding.
2014+
*
2015+
* XXX in a sufficiently weird locale, this might produce incorrect results?
2016+
* For example, in German I believe "ss" is treated specially --- if we are
2017+
* given "foos" and return "foot", will this actually be greater than "fooss"?
2018+
*/
2019+
static char *
2020+
make_greater_string(const char * str, Oid datatype)
2021+
{
2022+
char *workstr;
2023+
int len;
2024+
2025+
/* Make a modifiable copy, which will be our return value if successful */
2026+
workstr = pstrdup((char *) str);
2027+
2028+
while ((len = strlen(workstr)) > 0)
2029+
{
2030+
unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
2031+
2032+
/*
2033+
* Try to generate a larger string by incrementing the last byte.
2034+
*/
2035+
while (*lastchar < (unsigned char) 255)
2036+
{
2037+
(*lastchar)++;
2038+
if (string_lessthan(str, workstr, datatype))
2039+
return workstr; /* Success! */
2040+
}
2041+
/*
2042+
* Truncate off the last character, which might be more than 1 byte
2043+
* in MULTIBYTE case.
2044+
*/
2045+
#ifdef MULTIBYTE
2046+
len = pg_mbcliplen((const unsigned char *) workstr, len, len-1);
2047+
workstr[len] = '\0';
2048+
#else
2049+
*lastchar = '\0';
20102050
#endif
2051+
}
20112052

2012-
return result;
2053+
/* Failed... */
2054+
pfree(workstr);
2055+
return NULL;
20132056
}
20142057

2058+
/*
2059+
* Handy subroutines for match_special_index_operator() and friends.
2060+
*/
2061+
20152062
/* See if there is a binary op of the given name for the given datatype */
20162063
static Oid
20172064
find_operator(const char * opname, Oid datatype)
@@ -2027,3 +2074,74 @@ find_operator(const char * opname, Oid datatype)
20272074
return InvalidOid;
20282075
return optup->t_data->t_oid;
20292076
}
2077+
2078+
/*
2079+
* Generate a Datum of the appropriate type from a C string.
2080+
* Note that all of the supported types are pass-by-ref, so the
2081+
* returned value should be pfree'd if no longer needed.
2082+
*/
2083+
static Datum
2084+
string_to_datum(const char * str, Oid datatype)
2085+
{
2086+
/* We cheat a little by assuming that textin() will do for
2087+
* bpchar and varchar constants too...
2088+
*/
2089+
if (datatype == NAMEOID)
2090+
return PointerGetDatum(namein((char *) str));
2091+
else
2092+
return PointerGetDatum(textin((char *) str));
2093+
}
2094+
2095+
/*
2096+
* Generate a Const node of the appropriate type from a C string.
2097+
*/
2098+
static Const *
2099+
string_to_const(const char * str, Oid datatype)
2100+
{
2101+
Datum conval = string_to_datum(str, datatype);
2102+
2103+
return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
2104+
conval, false, false, false, false);
2105+
}
2106+
2107+
/*
2108+
* Test whether two strings are "<" according to the rules of the given
2109+
* datatype. We do this the hard way, ie, actually calling the type's
2110+
* "<" operator function, to ensure we get the right result...
2111+
*/
2112+
static bool
2113+
string_lessthan(const char * str1, const char * str2, Oid datatype)
2114+
{
2115+
Datum datum1 = string_to_datum(str1, datatype);
2116+
Datum datum2 = string_to_datum(str2, datatype);
2117+
bool result;
2118+
2119+
switch (datatype)
2120+
{
2121+
case TEXTOID:
2122+
result = text_lt((text *) datum1, (text *) datum2);
2123+
break;
2124+
2125+
case BPCHAROID:
2126+
result = bpcharlt((char *) datum1, (char *) datum2);
2127+
break;
2128+
2129+
case VARCHAROID:
2130+
result = varcharlt((char *) datum1, (char *) datum2);
2131+
break;
2132+
2133+
case NAMEOID:
2134+
result = namelt((NameData *) datum1, (NameData *) datum2);
2135+
break;
2136+
2137+
default:
2138+
elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
2139+
result = false;
2140+
break;
2141+
}
2142+
2143+
pfree(DatumGetPointer(datum1));
2144+
pfree(DatumGetPointer(datum2));
2145+
2146+
return result;
2147+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy