Skip to content

Commit cb4ea99

Browse files
committed
Improve support of multibyte encoding:
- tsvector_(in|out) - tsquery_(in|out) - to_tsvector - to_tsquery, plainto_tsquery - 'simple' dictionary
1 parent ec0baf9 commit cb4ea99

File tree

19 files changed

+263
-146
lines changed

19 files changed

+263
-146
lines changed

contrib/tsearch2/dict.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ void sortstoplist(StopList * s);
1414
void freestoplist(StopList * s);
1515
void readstoplist(text *in, StopList * s);
1616
bool searchstoplist(StopList * s, char *key);
17-
char *lowerstr(char *str);
1817

1918
typedef struct
2019
{

contrib/tsearch2/dict_ex.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "dict.h"
88
#include "common.h"
9+
#include "ts_locale.h"
910

1011
typedef struct
1112
{

contrib/tsearch2/dict_ispell.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "dict.h"
1010
#include "common.h"
1111
#include "ispell/spell.h"
12+
#include "ts_locale.h"
1213

1314
typedef struct
1415
{

contrib/tsearch2/dict_snowball.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "snowball/header.h"
1111
#include "snowball/english_stem.h"
1212
#include "snowball/russian_stem.h"
13+
#include "ts_locale.h"
1314

1415
typedef struct
1516
{

contrib/tsearch2/dict_syn.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "dict.h"
1010
#include "common.h"
11+
#include "ts_locale.h"
1112

1213
#define SYNBUFLEN 4096
1314
typedef struct

contrib/tsearch2/gendict/dict_snowball.c.IN

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "common.h"
1313
#include "snowball/header.h"
1414
#include "subinclude.h"
15+
#include "ts_locale.h"
1516

1617
typedef struct {
1718
struct SN_env *z;

contrib/tsearch2/gendict/dict_tmpl.c.IN

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "common.h"
1313

1414
#include "subinclude.h"
15+
#include "ts_locale.h"
1516

1617
HASINIT typedef struct {
1718
HASINIT StopList stoplist;

contrib/tsearch2/ispell/spell.c

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "postgres.h"
77

88
#include "spell.h"
9+
#include "ts_locale.h"
910

1011
#define MAX_NORM 1024
1112
#define MAXNORMLEN 256
@@ -30,18 +31,6 @@ cmpspellaffix(const void *s1, const void *s2)
3031
return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
3132
}
3233

33-
static void
34-
strlower(char *str)
35-
{
36-
unsigned char *ptr = (unsigned char *) str;
37-
38-
while (*ptr)
39-
{
40-
*ptr = tolower(*ptr);
41-
ptr++;
42-
}
43-
}
44-
4534
static char *
4635
strnduplicate(char *s, int len)
4736
{
@@ -175,7 +164,7 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
175164
}
176165
else
177166
flag = "";
178-
strlower(str);
167+
lowerstr(str);
179168
/* Dont load words if first letter is not required */
180169
/* It allows to optimize loading at search time */
181170
s = str;
@@ -385,7 +374,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
385374
*s = 0;
386375
if (!*str)
387376
continue;
388-
strlower(str);
377+
lowerstr(str);
389378
strcpy(mask, "");
390379
strcpy(find, "");
391380
strcpy(repl, "");
@@ -851,7 +840,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
851840

852841
if (wrdlen > MAXNORMLEN)
853842
return NULL;
854-
strlower(word);
843+
lowerstr(word);
855844
cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
856845
*cur = NULL;
857846

contrib/tsearch2/prs_dcfg.c

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "dict.h"
1010
#include "common.h"
11+
#include "ts_locale.h"
1112

1213
#define CS_WAITKEY 0
1314
#define CS_INKEY 1
@@ -30,11 +31,11 @@ nstrdup(char *ptr, int len)
3031
cptr = ptr = res;
3132
while (*ptr)
3233
{
33-
if (*ptr == '\\')
34+
if (t_iseq(ptr, '\\'))
3435
ptr++;
35-
*cptr = *ptr;
36-
ptr++;
37-
cptr++;
36+
COPYCHAR( cptr, ptr );
37+
cptr+=pg_mblen(ptr);
38+
ptr+=pg_mblen(ptr);
3839
}
3940
*cptr = '\0';
4041

@@ -52,9 +53,9 @@ parse_cfgdict(text *in, Map ** m)
5253

5354
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
5455
{
55-
if (*ptr == ',')
56+
if ( t_iseq(ptr, ',') )
5657
num++;
57-
ptr++;
58+
ptr+=pg_mblen(ptr);
5859
}
5960

6061
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
@@ -64,93 +65,93 @@ parse_cfgdict(text *in, Map ** m)
6465
{
6566
if (state == CS_WAITKEY)
6667
{
67-
if (isalpha((unsigned char) *ptr))
68+
if (t_isalpha(ptr))
6869
{
6970
begin = ptr;
7071
state = CS_INKEY;
7172
}
72-
else if (!isspace((unsigned char) *ptr))
73+
else if (!t_isspace(ptr))
7374
ereport(ERROR,
7475
(errcode(ERRCODE_SYNTAX_ERROR),
7576
errmsg("syntax error"),
76-
errdetail("Syntax error in position %d near \"%c\"",
77-
(int) (ptr - VARDATA(in)), *ptr)));
77+
errdetail("Syntax error in position %d",
78+
(int) (ptr - VARDATA(in)))));
7879
}
7980
else if (state == CS_INKEY)
8081
{
81-
if (isspace((unsigned char) *ptr))
82+
if (t_isspace(ptr))
8283
{
8384
mptr->key = nstrdup(begin, ptr - begin);
8485
state = CS_WAITEQ;
8586
}
86-
else if (*ptr == '=')
87+
else if (t_iseq(ptr,'='))
8788
{
8889
mptr->key = nstrdup(begin, ptr - begin);
8990
state = CS_WAITVALUE;
9091
}
91-
else if (!isalpha((unsigned char) *ptr))
92+
else if (!t_isalpha(ptr))
9293
ereport(ERROR,
9394
(errcode(ERRCODE_SYNTAX_ERROR),
9495
errmsg("syntax error"),
95-
errdetail("Syntax error in position %d near \"%c\"",
96-
(int) (ptr - VARDATA(in)), *ptr)));
96+
errdetail("Syntax error in position %d",
97+
(int) (ptr - VARDATA(in)))));
9798
}
9899
else if (state == CS_WAITEQ)
99100
{
100-
if (*ptr == '=')
101+
if (t_iseq(ptr, '='))
101102
state = CS_WAITVALUE;
102-
else if (!isspace((unsigned char) *ptr))
103+
else if (!t_isspace(ptr))
103104
ereport(ERROR,
104105
(errcode(ERRCODE_SYNTAX_ERROR),
105106
errmsg("syntax error"),
106-
errdetail("Syntax error in position %d near \"%c\"",
107-
(int) (ptr - VARDATA(in)), *ptr)));
107+
errdetail("Syntax error in position %d",
108+
(int) (ptr - VARDATA(in)))));
108109
}
109110
else if (state == CS_WAITVALUE)
110111
{
111-
if (*ptr == '"')
112+
if (t_iseq(ptr, '"'))
112113
{
113114
begin = ptr + 1;
114115
state = CS_INVALUE;
115116
}
116-
else if (!isspace((unsigned char) *ptr))
117+
else if (!t_isspace(ptr))
117118
{
118119
begin = ptr;
119120
state = CS_IN2VALUE;
120121
}
121122
}
122123
else if (state == CS_INVALUE)
123124
{
124-
if (*ptr == '"')
125+
if (t_iseq(ptr, '"'))
125126
{
126127
mptr->value = nstrdup(begin, ptr - begin);
127128
mptr++;
128129
state = CS_WAITDELIM;
129130
}
130-
else if (*ptr == '\\')
131+
else if (t_iseq(ptr, '\\'))
131132
state = CS_INESC;
132133
}
133134
else if (state == CS_IN2VALUE)
134135
{
135-
if (isspace((unsigned char) *ptr) || *ptr == ',')
136+
if (t_isspace(ptr) || t_iseq(ptr, ','))
136137
{
137138
mptr->value = nstrdup(begin, ptr - begin);
138139
mptr++;
139-
state = (*ptr == ',') ? CS_WAITKEY : CS_WAITDELIM;
140+
state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM;
140141
}
141-
else if (*ptr == '\\')
142+
else if (t_iseq(ptr, '\\'))
142143
state = CS_INESC;
143144
}
144145
else if (state == CS_WAITDELIM)
145146
{
146-
if (*ptr == ',')
147+
if (t_iseq(ptr, ','))
147148
state = CS_WAITKEY;
148-
else if (!isspace((unsigned char) *ptr))
149+
else if (!t_isspace(ptr))
149150
ereport(ERROR,
150151
(errcode(ERRCODE_SYNTAX_ERROR),
151152
errmsg("syntax error"),
152-
errdetail("Syntax error in position %d near \"%c\"",
153-
(int) (ptr - VARDATA(in)), *ptr)));
153+
errdetail("Syntax error in position %d",
154+
(int) (ptr - VARDATA(in)))));
154155
}
155156
else if (state == CS_INESC)
156157
state = CS_INVALUE;
@@ -160,9 +161,9 @@ parse_cfgdict(text *in, Map ** m)
160161
ereport(ERROR,
161162
(errcode(ERRCODE_SYNTAX_ERROR),
162163
errmsg("bad parser state"),
163-
errdetail("%d at position %d near \"%c\"",
164-
state, (int) (ptr - VARDATA(in)), *ptr)));
165-
ptr++;
164+
errdetail("%d at position %d",
165+
state, (int) (ptr - VARDATA(in)))));
166+
ptr+=pg_mblen(ptr);
166167
}
167168

168169
if (state == CS_IN2VALUE)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy