Skip to content

Commit de55c0c

Browse files
committed
1 Fix affixes with void replacement (AFAIK, it's only russian)
2 Optimize regex execution
1 parent 153d5d3 commit de55c0c

File tree

6 files changed

+339
-76
lines changed

6 files changed

+339
-76
lines changed

contrib/tsearch2/ispell/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $PostgreSQL: pgsql/contrib/tsearch2/ispell/Makefile,v 1.5 2003/11/29 19:51:36 pgsql Exp $
1+
# $PostgreSQL: pgsql/contrib/tsearch2/ispell/Makefile,v 1.6 2004/06/23 11:06:11 teodor Exp $
22

33
subdir = contrib/tsearch2/ispell
44
top_builddir = ../../..
@@ -8,7 +8,7 @@ include $(top_builddir)/src/Makefile.global
88
PG_CPPFLAGS = -I$(srcdir)/.. $(CPPFLAGS)
99
override CFLAGS += $(CFLAGS_SL)
1010

11-
SUBOBJS = spell.o
11+
SUBOBJS = spell.o regis.o
1212

1313
all: SUBSYS.o
1414

contrib/tsearch2/ispell/regis.c

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include <ctype.h>
5+
6+
#include "regis.h"
7+
#include "common.h"
8+
9+
int
10+
RS_isRegis(const char *str) {
11+
unsigned char *ptr=(unsigned char *)str;
12+
13+
while(ptr && *ptr)
14+
if ( isalpha(*ptr) || *ptr=='[' || *ptr==']' || *ptr=='^')
15+
ptr++;
16+
else
17+
return 0;
18+
return 1;
19+
}
20+
21+
#define RS_IN_ONEOF 1
22+
#define RS_IN_ONEOF_IN 2
23+
#define RS_IN_NONEOF 3
24+
#define RS_IN_WAIT 4
25+
26+
static RegisNode*
27+
newRegisNode(RegisNode *prev, int len) {
28+
RegisNode *ptr;
29+
ptr = (RegisNode*)malloc(RNHDRSZ+len+1);
30+
if (!ptr)
31+
ts_error(ERROR, "No memory");
32+
memset(ptr,0,RNHDRSZ+len+1);
33+
if (prev)
34+
prev->next=ptr;
35+
return ptr;
36+
}
37+
38+
int
39+
RS_compile(Regis *r, int issuffix, const char *str) {
40+
int i,len = strlen(str);
41+
int state = RS_IN_WAIT;
42+
RegisNode *ptr=NULL;
43+
44+
memset(r,0,sizeof(Regis));
45+
r->issuffix = (issuffix) ? 1 : 0;
46+
47+
for(i=0;i<len;i++) {
48+
unsigned char c = *( ( (unsigned char*)str ) + i );
49+
if ( state == RS_IN_WAIT ) {
50+
if ( isalpha(c) ) {
51+
if ( ptr )
52+
ptr = newRegisNode(ptr,len);
53+
else
54+
ptr = r->node = newRegisNode(NULL,len);
55+
ptr->data[ 0 ] = c;
56+
ptr->type = RSF_ONEOF;
57+
ptr->len=1;
58+
} else if ( c=='[' ) {
59+
if ( ptr )
60+
ptr = newRegisNode(ptr,len);
61+
else
62+
ptr = r->node = newRegisNode(NULL,len);
63+
ptr->type = RSF_ONEOF;
64+
state=RS_IN_ONEOF;
65+
} else
66+
ts_error(ERROR,"Error in regis: %s at pos %d\n", str, i+1);
67+
} else if ( state == RS_IN_ONEOF ) {
68+
if ( c=='^' ) {
69+
ptr->type = RSF_NONEOF;
70+
state=RS_IN_NONEOF;
71+
} else if ( isalpha(c) ) {
72+
ptr->data[ 0 ] = c;
73+
ptr->len=1;
74+
state=RS_IN_ONEOF_IN;
75+
} else
76+
ts_error(ERROR,"Error in regis: %s at pos %d\n", str, i+1);
77+
} else if ( state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF ) {
78+
if ( isalpha(c) ) {
79+
ptr->data[ ptr->len ] = c;
80+
ptr->len++;
81+
} else if ( c==']' ) {
82+
state=RS_IN_WAIT;
83+
} else
84+
ts_error(ERROR,"Error in regis: %s at pos %d\n", str, i+1);
85+
} else
86+
ts_error(ERROR,"Internal error in RS_compile: %d\n", state);
87+
}
88+
89+
ptr = r->node;
90+
while(ptr) {
91+
r->nchar++;
92+
ptr=ptr->next;
93+
}
94+
95+
return 0;
96+
}
97+
98+
void
99+
RS_free(Regis *r) {
100+
RegisNode *ptr=r->node,*tmp;
101+
102+
while(ptr) {
103+
tmp=ptr->next;
104+
free(ptr);
105+
ptr = tmp;
106+
}
107+
108+
r->node = NULL;
109+
}
110+
111+
int
112+
RS_execute(Regis *r, const char *str, int len) {
113+
RegisNode *ptr=r->node;
114+
unsigned char *c;
115+
116+
if (len<0)
117+
len=strlen(str);
118+
119+
if (len<r->nchar)
120+
return 0;
121+
122+
if ( r->issuffix )
123+
c = ((unsigned char*)str) + len - r->nchar;
124+
else
125+
c = (unsigned char*)str;
126+
127+
while(ptr) {
128+
switch(ptr->type) {
129+
case RSF_ONEOF:
130+
if ( ptr->len==0 ) {
131+
if ( *c != *(ptr->data) )
132+
return 0;
133+
} else if ( strchr((char*)ptr->data, *c) == NULL )
134+
return 0;
135+
break;
136+
case RSF_NONEOF:
137+
if ( ptr->len==0 ) {
138+
if ( *c == *(ptr->data) )
139+
return 0;
140+
} else if ( strchr((char*)ptr->data, *c) != NULL )
141+
return 0;
142+
break;
143+
default:
144+
ts_error(ERROR,"RS_execute: Unknown type node: %d\n", ptr->type);
145+
}
146+
ptr=ptr->next;
147+
c++;
148+
}
149+
150+
return 1;
151+
}

contrib/tsearch2/ispell/regis.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#ifndef __REGIS_H__
2+
#define __REGIS_H__
3+
4+
#include "postgres.h"
5+
6+
typedef struct RegisNode {
7+
uint32
8+
type:2,
9+
len:16,
10+
unused:14;
11+
struct RegisNode *next;
12+
unsigned char data[1];
13+
} RegisNode;
14+
15+
#define RNHDRSZ (sizeof(uint32)+sizeof(void*))
16+
17+
#define RSF_ONEOF 1
18+
#define RSF_NONEOF 2
19+
20+
typedef struct Regis {
21+
RegisNode *node;
22+
uint32
23+
issuffix:1,
24+
nchar:16,
25+
unused:15;
26+
} Regis;
27+
28+
int RS_isRegis(const char *str);
29+
30+
int RS_compile(Regis *r, int issuffix, const char *str);
31+
void RS_free(Regis *r);
32+
/*×ÏÚ×ÒÁÝÁÅÔ 1 ÅÓÌÉ ÍÁÔÞÉÔÓÑ */
33+
int RS_execute(Regis *r, const char *str, int len);
34+
#endif

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy