Skip to content

Commit bb6bbc3

Browse files
committed
Defend against non-ASCII letters in fuzzystrmatch code. The functions
still don't behave very sanely for multibyte encodings, but at least they won't be indexing off the ends of static arrays.
1 parent c35eb15 commit bb6bbc3

File tree

1 file changed

+28
-10
lines changed

1 file changed

+28
-10
lines changed

contrib/fuzzystrmatch/fuzzystrmatch.c

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* Joe Conway <mail@joeconway.com>
77
*
8-
* $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.c,v 1.28 2009/01/01 17:23:32 momjian Exp $
8+
* $PostgreSQL: pgsql/contrib/fuzzystrmatch/fuzzystrmatch.c,v 1.29 2009/04/07 15:53:54 tgl Exp $
99
* Copyright (c) 2001-2009, PostgreSQL Global Development Group
1010
* ALL RIGHTS RESERVED;
1111
*
@@ -74,7 +74,15 @@ static void _soundex(const char *instr, char *outstr);
7474
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
7575
static const char *soundex_table = "01230120022455012623010202";
7676

77-
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
77+
static char
78+
soundex_code(char letter)
79+
{
80+
letter = toupper((unsigned char) letter);
81+
/* Defend against non-ASCII letters */
82+
if (letter >= 'A' && letter <= 'Z')
83+
return soundex_table[letter - 'A'];
84+
return letter;
85+
}
7886

7987

8088
/*
@@ -143,27 +151,37 @@ static int _metaphone(char *word, int max_phonemes, char **phoned_word);
143151

144152
/*-- Character encoding array & accessing macros --*/
145153
/* Stolen directly out of the book... */
146-
char _codes[26] = {
154+
static const char _codes[26] = {
147155
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
148156
/* a b c d e f g h i j k l m n o p q r s t u v w x y z */
149157
};
150158

159+
static int
160+
getcode(char c)
161+
{
162+
if (isalpha((unsigned char) c))
163+
{
164+
c = toupper((unsigned char) c);
165+
/* Defend against non-ASCII letters */
166+
if (c >= 'A' && c <= 'Z')
167+
return _codes[c - 'A'];
168+
}
169+
return 0;
170+
}
151171

152-
#define ENCODE(c) (isalpha((unsigned char) (c)) ? _codes[((toupper((unsigned char) (c))) - 'A')] : 0)
153-
154-
#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */
172+
#define isvowel(c) (getcode(c) & 1) /* AEIOU */
155173

156174
/* These letters are passed through unchanged */
157-
#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */
175+
#define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */
158176

159177
/* These form dipthongs when preceding H */
160-
#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */
178+
#define AFFECTH(c) (getcode(c) & 4) /* CGPST */
161179

162180
/* These make C and G soft */
163-
#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */
181+
#define MAKESOFT(c) (getcode(c) & 8) /* EIY */
164182

165183
/* These prevent GH from becoming F */
166-
#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */
184+
#define NOGHTOF(c) (getcode(c) & 16) /* BDH */
167185

168186

169187
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy