Skip to content

Commit 7ac8a4b

Browse files
committed
Multibyte encodings support for ISpell dictionary
1 parent e3b9852 commit 7ac8a4b

File tree

5 files changed

+232
-147
lines changed

5 files changed

+232
-147
lines changed

contrib/tsearch2/ispell/regis.c

Lines changed: 80 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,23 @@
11
#include <stdio.h>
22
#include <stdlib.h>
33
#include <string.h>
4-
#include <ctype.h>
54

65
#include "regis.h"
6+
#include "ts_locale.h"
77
#include "common.h"
88

9-
int
9+
bool
1010
RS_isRegis(const char *str)
1111
{
1212
unsigned char *ptr = (unsigned char *) str;
1313

1414
while (ptr && *ptr)
15-
if (isalpha(*ptr) || *ptr == '[' || *ptr == ']' || *ptr == '^')
16-
ptr++;
15+
if (t_isalpha(ptr) || t_iseq(ptr,'[') || t_iseq(ptr,']') || t_iseq(ptr, '^'))
16+
ptr+=pg_mblen(ptr);
1717
else
18-
return 0;
19-
return 1;
18+
return false;
19+
20+
return true;
2021
}
2122

2223
#define RS_IN_ONEOF 1
@@ -38,34 +39,32 @@ newRegisNode(RegisNode * prev, int len)
3839
return ptr;
3940
}
4041

41-
int
42-
RS_compile(Regis * r, int issuffix, const char *str)
42+
void
43+
RS_compile(Regis * r, bool issuffix, char *str)
4344
{
44-
int i,
45-
len = strlen(str);
45+
int len = strlen(str);
4646
int state = RS_IN_WAIT;
47+
char *c = (char*)str;
4748
RegisNode *ptr = NULL;
4849

4950
memset(r, 0, sizeof(Regis));
5051
r->issuffix = (issuffix) ? 1 : 0;
5152

52-
for (i = 0; i < len; i++)
53+
while(*c)
5354
{
54-
unsigned char c = *(((unsigned char *) str) + i);
55-
5655
if (state == RS_IN_WAIT)
5756
{
58-
if (isalpha(c))
57+
if (t_isalpha(c))
5958
{
6059
if (ptr)
6160
ptr = newRegisNode(ptr, len);
6261
else
6362
ptr = r->node = newRegisNode(NULL, len);
64-
ptr->data[0] = c;
63+
COPYCHAR(ptr->data, c);
6564
ptr->type = RSF_ONEOF;
66-
ptr->len = 1;
65+
ptr->len = pg_mblen(c);
6766
}
68-
else if (c == '[')
67+
else if (t_iseq(c,'['))
6968
{
7069
if (ptr)
7170
ptr = newRegisNode(ptr, len);
@@ -75,38 +74,39 @@ RS_compile(Regis * r, int issuffix, const char *str)
7574
state = RS_IN_ONEOF;
7675
}
7776
else
78-
ts_error(ERROR, "Error in regis: %s at pos %d\n", str, i + 1);
77+
ts_error(ERROR, "Error in regis: %s", str );
7978
}
8079
else if (state == RS_IN_ONEOF)
8180
{
82-
if (c == '^')
81+
if (t_iseq(c,'^'))
8382
{
8483
ptr->type = RSF_NONEOF;
8584
state = RS_IN_NONEOF;
8685
}
87-
else if (isalpha(c))
86+
else if (t_isalpha(c))
8887
{
89-
ptr->data[0] = c;
90-
ptr->len = 1;
88+
COPYCHAR(ptr->data, c);
89+
ptr->len = pg_mblen(c);
9190
state = RS_IN_ONEOF_IN;
9291
}
9392
else
94-
ts_error(ERROR, "Error in regis: %s at pos %d\n", str, i + 1);
93+
ts_error(ERROR, "Error in regis: %s", str);
9594
}
9695
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
9796
{
98-
if (isalpha(c))
97+
if (t_isalpha(c))
9998
{
100-
ptr->data[ptr->len] = c;
101-
ptr->len++;
99+
COPYCHAR(ptr->data+ptr->len, c);
100+
ptr->len+=pg_mblen(c);
102101
}
103-
else if (c == ']')
102+
else if (t_iseq(c,']'))
104103
state = RS_IN_WAIT;
105104
else
106-
ts_error(ERROR, "Error in regis: %s at pos %d\n", str, i + 1);
105+
ts_error(ERROR, "Error in regis: %s", str);
107106
}
108107
else
109-
ts_error(ERROR, "Internal error in RS_compile: %d\n", state);
108+
ts_error(ERROR, "Internal error in RS_compile: %d", state);
109+
c += pg_mblen(c);
110110
}
111111

112112
ptr = r->node;
@@ -115,8 +115,6 @@ RS_compile(Regis * r, int issuffix, const char *str)
115115
r->nchar++;
116116
ptr = ptr->next;
117117
}
118-
119-
return 0;
120118
}
121119

122120
void
@@ -135,51 +133,77 @@ RS_free(Regis * r)
135133
r->node = NULL;
136134
}
137135

138-
int
139-
RS_execute(Regis * r, const char *str, int len)
136+
#ifdef TS_USE_WIDE
137+
static bool
138+
mb_strchr(char *str, char *c) {
139+
int clen = pg_mblen(c), plen,i;
140+
char *ptr =str;
141+
bool res=false;
142+
143+
clen = pg_mblen(c);
144+
while( *ptr && !res) {
145+
plen = pg_mblen(ptr);
146+
if ( plen == clen ) {
147+
i=plen;
148+
res = true;
149+
while(i--)
150+
if ( *(ptr+i) != *(c+i) ) {
151+
res = false;
152+
break;
153+
}
154+
}
155+
156+
ptr += plen;
157+
}
158+
159+
return res;
160+
}
161+
#else
162+
#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
163+
#endif
164+
165+
166+
bool
167+
RS_execute(Regis * r, char *str)
140168
{
141169
RegisNode *ptr = r->node;
142-
unsigned char *c;
170+
char *c = str;
171+
int len=0;
143172

144-
if (len < 0)
145-
len = strlen(str);
173+
while(*c) {
174+
len++;
175+
c += pg_mblen(c);
176+
}
146177

147178
if (len < r->nchar)
148179
return 0;
149180

150-
if (r->issuffix)
151-
c = ((unsigned char *) str) + len - r->nchar;
152-
else
153-
c = (unsigned char *) str;
181+
c = str;
182+
if (r->issuffix) {
183+
len -= r->nchar;
184+
while(len-- > 0)
185+
c += pg_mblen(c);
186+
}
187+
154188

155189
while (ptr)
156190
{
157191
switch (ptr->type)
158192
{
159193
case RSF_ONEOF:
160-
if (ptr->len == 0)
161-
{
162-
if (*c != *(ptr->data))
163-
return 0;
164-
}
165-
else if (strchr((char *) ptr->data, *c) == NULL)
166-
return 0;
194+
if ( mb_strchr((char *) ptr->data, c) != true )
195+
return false;
167196
break;
168197
case RSF_NONEOF:
169-
if (ptr->len == 0)
170-
{
171-
if (*c == *(ptr->data))
172-
return 0;
173-
}
174-
else if (strchr((char *) ptr->data, *c) != NULL)
175-
return 0;
198+
if ( mb_strchr((char *) ptr->data, c) == true )
199+
return false;
176200
break;
177201
default:
178202
ts_error(ERROR, "RS_execute: Unknown type node: %d\n", ptr->type);
179203
}
180204
ptr = ptr->next;
181-
c++;
205+
c+=pg_mblen(c);
182206
}
183207

184-
return 1;
208+
return true;
185209
}

contrib/tsearch2/ispell/regis.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ typedef struct Regis
2727
unused:15;
2828
} Regis;
2929

30-
int RS_isRegis(const char *str);
30+
bool RS_isRegis(const char *str);
3131

32-
int RS_compile(Regis * r, int issuffix, const char *str);
32+
void RS_compile(Regis * r, bool issuffix, char *str);
3333
void RS_free(Regis * r);
3434

35-
/*×ÏÚ×ÒÁÝÁÅÔ 1 ÅÓÌÉ ÍÁÔÞÉÔÓÑ */
36-
int RS_execute(Regis * r, const char *str, int len);
35+
/*returns true if matches */
36+
bool RS_execute(Regis * r, char *str);
3737

3838
#endif

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy