Skip to content

Commit 03c811a

Browse files
committed
Fix planner's test for case-foldable characters in ILIKE with ICU.
As coded, the ICU-collation path in pattern_char_isalpha() failed to consider regular ASCII letters to be case-varying. This led to like_fixed_prefix treating too much of an ILIKE pattern as being a fixed prefix, so that indexscans derived from an ILIKE clause might miss entries that they should find. Per bug #15892 from James Inform. This is an oversight in the original ICU patch (commit eccfef8), so back-patch to v10 where that came in. Discussion: https://postgr.es/m/15892-e5d2bea3e8a04a1b@postgresql.org
1 parent 3c92658 commit 03c811a

File tree

3 files changed

+50
-5
lines changed

3 files changed

+50
-5
lines changed

src/backend/utils/adt/like_support.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,8 +1437,9 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
14371437
* Check whether char is a letter (and, hence, subject to case-folding)
14381438
*
14391439
* In multibyte character sets or with ICU, we can't use isalpha, and it does
1440-
* not seem worth trying to convert to wchar_t to use iswalpha. Instead, just
1441-
* assume any multibyte char is potentially case-varying.
1440+
* not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
1441+
* Instead, just assume any non-ASCII char is potentially case-varying, and
1442+
* hard-wire knowledge of which ASCII chars are letters.
14421443
*/
14431444
static int
14441445
pattern_char_isalpha(char c, bool is_multibyte,
@@ -1449,7 +1450,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
14491450
else if (is_multibyte && IS_HIGHBIT_SET(c))
14501451
return true;
14511452
else if (locale && locale->provider == COLLPROVIDER_ICU)
1452-
return IS_HIGHBIT_SET(c) ? true : false;
1453+
return IS_HIGHBIT_SET(c) ||
1454+
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
14531455
#ifdef HAVE_LOCALE_T
14541456
else if (locale && locale->provider == COLLPROVIDER_LIBC)
14551457
return isalpha_l((unsigned char) c, locale->info.lt);

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,38 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
983983
collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_tests.collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX")
984984
(4 rows)
985985

986+
set enable_seqscan = off;
987+
explain (costs off)
988+
select * from collate_test1 where b ilike 'abc';
989+
QUERY PLAN
990+
-------------------------------
991+
Seq Scan on collate_test1
992+
Filter: (b ~~* 'abc'::text)
993+
(2 rows)
994+
995+
select * from collate_test1 where b ilike 'abc';
996+
a | b
997+
---+-----
998+
1 | abc
999+
4 | ABC
1000+
(2 rows)
1001+
1002+
explain (costs off)
1003+
select * from collate_test1 where b ilike 'ABC';
1004+
QUERY PLAN
1005+
-------------------------------
1006+
Seq Scan on collate_test1
1007+
Filter: (b ~~* 'ABC'::text)
1008+
(2 rows)
1009+
1010+
select * from collate_test1 where b ilike 'ABC';
1011+
a | b
1012+
---+-----
1013+
1 | abc
1014+
4 | ABC
1015+
(2 rows)
1016+
1017+
reset enable_seqscan;
9861018
-- schema manipulation commands
9871019
CREATE ROLE regress_test_role;
9881020
CREATE SCHEMA test_schema;
@@ -1867,8 +1899,9 @@ SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
18671899
(1 row)
18681900

18691901
-- cleanup
1902+
RESET search_path;
18701903
SET client_min_messages TO warning;
18711904
DROP SCHEMA collate_tests CASCADE;
1872-
RESET search_path;
1905+
RESET client_min_messages;
18731906
-- leave a collation for pg_upgrade test
18741907
CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,15 @@ CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
341341

342342
SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
343343

344+
set enable_seqscan = off;
345+
explain (costs off)
346+
select * from collate_test1 where b ilike 'abc';
347+
select * from collate_test1 where b ilike 'abc';
348+
explain (costs off)
349+
select * from collate_test1 where b ilike 'ABC';
350+
select * from collate_test1 where b ilike 'ABC';
351+
reset enable_seqscan;
352+
344353

345354
-- schema manipulation commands
346355

@@ -712,9 +721,10 @@ SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
712721

713722

714723
-- cleanup
724+
RESET search_path;
715725
SET client_min_messages TO warning;
716726
DROP SCHEMA collate_tests CASCADE;
717-
RESET search_path;
727+
RESET client_min_messages;
718728

719729
-- leave a collation for pg_upgrade test
720730
CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy