Skip to content

Commit 5e2707c

Browse files
committed
Snowball multibyte. It's a pity, but snowball sources is very diferent for multibyte and
singlebyte encodings, so we should have snowball for every encodings. I hope that finalize multibyte support work in tsearch2, but testing is needed...
1 parent 75c4747 commit 5e2707c

File tree

8 files changed

+974
-27
lines changed

8 files changed

+974
-27
lines changed

contrib/tsearch2/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.12 2005/11/21 12:27:57 teodor Exp $
1+
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.13 2006/01/27 16:32:31 teodor Exp $
22

33
MODULE_big = tsearch2
44
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
@@ -16,7 +16,7 @@ OBJS += $(SUBDIROBJS)
1616

1717
PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser
1818

19-
DATA = stopword/english.stop stopword/russian.stop
19+
DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8
2020
DATA_built = tsearch2.sql untsearch2.sql
2121
DOCS = README.tsearch2
2222
REGRESS = tsearch2

contrib/tsearch2/dict_snowball.c

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "snowball/header.h"
1111
#include "snowball/english_stem.h"
1212
#include "snowball/russian_stem.h"
13+
#include "snowball/russian_stem_UTF8.h"
1314
#include "ts_locale.h"
1415

1516
typedef struct
@@ -23,8 +24,11 @@ typedef struct
2324
PG_FUNCTION_INFO_V1(snb_en_init);
2425
Datum snb_en_init(PG_FUNCTION_ARGS);
2526

26-
PG_FUNCTION_INFO_V1(snb_ru_init);
27-
Datum snb_ru_init(PG_FUNCTION_ARGS);
27+
PG_FUNCTION_INFO_V1(snb_ru_init_koi8);
28+
Datum snb_ru_init_koi8(PG_FUNCTION_ARGS);
29+
30+
PG_FUNCTION_INFO_V1(snb_ru_init_utf8);
31+
Datum snb_ru_init_utf8(PG_FUNCTION_ARGS);
2832

2933
PG_FUNCTION_INFO_V1(snb_lexize);
3034
Datum snb_lexize(PG_FUNCTION_ARGS);
@@ -64,7 +68,7 @@ snb_en_init(PG_FUNCTION_ARGS)
6468
}
6569

6670
Datum
67-
snb_ru_init(PG_FUNCTION_ARGS)
71+
snb_ru_init_koi8(PG_FUNCTION_ARGS)
6872
{
6973
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
7074

@@ -97,6 +101,40 @@ snb_ru_init(PG_FUNCTION_ARGS)
97101
PG_RETURN_POINTER(d);
98102
}
99103

104+
Datum
105+
snb_ru_init_utf8(PG_FUNCTION_ARGS)
106+
{
107+
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
108+
109+
if (!d)
110+
ereport(ERROR,
111+
(errcode(ERRCODE_OUT_OF_MEMORY),
112+
errmsg("out of memory")));
113+
memset(d, 0, sizeof(DictSnowball));
114+
d->stoplist.wordop = lowerstr;
115+
116+
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
117+
{
118+
text *in = PG_GETARG_TEXT_P(0);
119+
120+
readstoplist(in, &(d->stoplist));
121+
sortstoplist(&(d->stoplist));
122+
PG_FREE_IF_COPY(in, 0);
123+
}
124+
125+
d->z = russian_UTF_8_create_env();
126+
if (!d->z)
127+
{
128+
freestoplist(&(d->stoplist));
129+
ereport(ERROR,
130+
(errcode(ERRCODE_OUT_OF_MEMORY),
131+
errmsg("out of memory")));
132+
}
133+
d->stem = russian_UTF_8_stem;
134+
135+
PG_RETURN_POINTER(d);
136+
}
137+
100138
Datum
101139
snb_lexize(PG_FUNCTION_ARGS)
102140
{

contrib/tsearch2/expected/tsearch2.out

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,21 @@
44
--
55
\set ECHO none
66
psql:tsearch2.sql:13: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_dict_pkey" for table "pg_ts_dict"
7-
psql:tsearch2.sql:145: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
8-
psql:tsearch2.sql:244: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
9-
psql:tsearch2.sql:251: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
10-
psql:tsearch2.sql:337: NOTICE: type "tsvector" is not yet defined
7+
psql:tsearch2.sql:158: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
8+
psql:tsearch2.sql:257: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
9+
psql:tsearch2.sql:264: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
10+
psql:tsearch2.sql:370: NOTICE: type "tsvector" is not yet defined
1111
DETAIL: Creating a shell type definition.
12-
psql:tsearch2.sql:342: NOTICE: argument type tsvector is only a shell
13-
psql:tsearch2.sql:396: NOTICE: type "tsquery" is not yet defined
12+
psql:tsearch2.sql:375: NOTICE: argument type tsvector is only a shell
13+
psql:tsearch2.sql:429: NOTICE: type "tsquery" is not yet defined
1414
DETAIL: Creating a shell type definition.
15-
psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell
16-
psql:tsearch2.sql:559: NOTICE: type "gtsvector" is not yet defined
15+
psql:tsearch2.sql:434: NOTICE: argument type tsquery is only a shell
16+
psql:tsearch2.sql:592: NOTICE: type "gtsvector" is not yet defined
1717
DETAIL: Creating a shell type definition.
18-
psql:tsearch2.sql:564: NOTICE: argument type gtsvector is only a shell
19-
psql:tsearch2.sql:1054: NOTICE: type "gtsq" is not yet defined
18+
psql:tsearch2.sql:597: NOTICE: argument type gtsvector is only a shell
19+
psql:tsearch2.sql:1087: NOTICE: type "gtsq" is not yet defined
2020
DETAIL: Creating a shell type definition.
21-
psql:tsearch2.sql:1059: NOTICE: argument type gtsq is only a shell
21+
psql:tsearch2.sql:1092: NOTICE: argument type gtsq is only a shell
2222
--tsvector
2323
SELECT '1'::tsvector;
2424
tsvector

contrib/tsearch2/snowball/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.8 2005/10/18 01:30:48 tgl Exp $
1+
# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.9 2006/01/27 16:32:31 teodor Exp $
22

3-
SUBOBJS = english_stem.o api.o russian_stem.o utilities.o
3+
SUBOBJS = english_stem.o api.o russian_stem.o russian_stem_UTF8.o utilities.o
44

55
EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)
66

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy