Skip to content

Commit ecab830

Browse files
committed
Added functions to list dictionaries (shared_ispell_dicts) and stoplists (shared_ispell_stoplists). Several other minor tweaks / improvements.
1 parent 4e732cb commit ecab830

File tree

3 files changed

+229
-20
lines changed

3 files changed

+229
-20
lines changed

sql/shared_ispell--1.0.0.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ CREATE OR REPLACE FUNCTION shared_ispell_mem_available()
2323
AS 'MODULE_PATHNAME', 'dispell_mem_available'
2424
LANGUAGE C IMMUTABLE;
2525

26+
CREATE OR REPLACE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT)
27+
RETURNS SETOF record
28+
AS 'MODULE_PATHNAME', 'dispell_list_dicts'
29+
LANGUAGE C IMMUTABLE;
30+
31+
CREATE OR REPLACE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT)
32+
RETURNS SETOF record
33+
AS 'MODULE_PATHNAME', 'dispell_list_stoplists'
34+
LANGUAGE C IMMUTABLE;
35+
2636
CREATE TEXT SEARCH TEMPLATE shared_ispell (
2737
INIT = shared_ispell_init,
2838
LEXIZE = shared_ispell_lexize

src/shared_ispell.c

Lines changed: 216 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include "storage/lwlock.h"
2121
#include "utils/timestamp.h"
2222

23+
#include "funcapi.h"
24+
2325
#include "libpq/md5.h"
2426

2527
#include "spell.h"
@@ -85,7 +87,7 @@ static SegmentInfo * segment_info = NULL;
8587

8688
static char * shalloc(int bytes);
8789

88-
static SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int bytes);
90+
static SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int bytes, int words);
8991
static SharedStopList * copyStopList(StopList * list, char * stopFile, int bytes);
9092

9193
static int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile);
@@ -230,12 +232,8 @@ void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * s
230232

231233
IspellDict * dict;
232234
StopList stoplist;
233-
234-
/* FIXME Maybe we could treat the stop file separately, as it does not
235-
* influence the dictionary. So the SharedIspellDict would track just
236-
* dictionary and affixes, and the stop words would be kept somewhere
237-
* else - either separately in the shared segment, or in local memory
238-
* (the list is usually small and easy pro load) */
235+
236+
/* dictionary (words and affixes) */
239237
shdict = get_shared_dict(dictFile, affFile);
240238

241239
/* init if needed */
@@ -264,7 +262,7 @@ void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * s
264262
}
265263

266264
/* fine, there's enough space - copy the dictionary */
267-
shdict = copyIspellDict(dict, dictFile, affFile, size);
265+
shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell);
268266

269267
elog(INFO, "shared dictionary %s.dict / %s.affix loaded, used %d B, %ld B remaining",
270268
dictFile, affFile, size, segment_info->available);
@@ -312,12 +310,16 @@ Datum dispell_lexize(PG_FUNCTION_ARGS);
312310
Datum dispell_reset(PG_FUNCTION_ARGS);
313311
Datum dispell_mem_available(PG_FUNCTION_ARGS);
314312
Datum dispell_mem_used(PG_FUNCTION_ARGS);
313+
Datum dispell_list_dicts(PG_FUNCTION_ARGS);
314+
Datum dispell_list_stoplists(PG_FUNCTION_ARGS);
315315

316316
PG_FUNCTION_INFO_V1(dispell_init);
317317
PG_FUNCTION_INFO_V1(dispell_lexize);
318318
PG_FUNCTION_INFO_V1(dispell_reset);
319319
PG_FUNCTION_INFO_V1(dispell_mem_available);
320320
PG_FUNCTION_INFO_V1(dispell_mem_used);
321+
PG_FUNCTION_INFO_V1(dispell_list_dicts);
322+
PG_FUNCTION_INFO_V1(dispell_list_stoplists);
321323

322324
Datum
323325
dispell_reset(PG_FUNCTION_ARGS)
@@ -507,9 +509,10 @@ char * shalloc(int bytes) {
507509
char * result;
508510
bytes = MAXALIGN(bytes);
509511

512+
/* This shouldn't really happen, as the init_shared_dict checks the size
513+
* prior to copy. So let's just throw error here, as something went
514+
* obviously wrong. */
510515
if (bytes > segment_info->available) {
511-
/* FIXME this should not throw error, it should rather return NULL
512-
* and reset the alloc info (this way the memory is wasted forever) */
513516
elog(ERROR, "the shared segment (shared ispell) is too small");
514517
}
515518

@@ -595,6 +598,11 @@ int sizeRegisNode(RegisNode * node) {
595598
return size;
596599
}
597600

601+
static
602+
char * copyRegexGuts(char * guts) {
603+
return NULL;
604+
}
605+
598606
static
599607
AFFIX * copyAffix(AFFIX * affix) {
600608

@@ -605,11 +613,14 @@ AFFIX * copyAffix(AFFIX * affix) {
605613
copy->find = shstrcpy(affix->find);
606614
copy->repl = shstrcpy(affix->repl);
607615

608-
if (copy->isregis) {
609-
copy->reg.regis.node = copyRegisNode(copy->reg.regis.node);
610-
} else if (! copy->issimple) {
611-
// FIXME handle the regex_t properly (copy the strings etc)
616+
if (affix->isregis) {
617+
copy->reg.regis.node = copyRegisNode(affix->reg.regis.node);
618+
} else if (! affix->issimple) {
619+
620+
/*FIXME Need to copy the regex_t properly. But would a plain copy be
621+
* safe tu use by multiple processes at the same time? */
612622
elog(WARNING, "skipping regex_t");
623+
613624
}
614625

615626
return copy;
@@ -627,8 +638,11 @@ int sizeAffix(AFFIX * affix) {
627638
if (affix->isregis) {
628639
size += sizeRegisNode(affix->reg.regis.node);
629640
} else if (! affix->issimple) {
630-
// FIXME handle the regex_t properly (copy the strings etc)
641+
642+
/*FIXME Need to copy the regex_t properly. But would a plain copy be
643+
* safe tu use by multiple processes at the same time? */
631644
elog(WARNING, "skipping regex_t");
645+
632646
}
633647

634648
return size;
@@ -698,7 +712,7 @@ SharedStopList * copyStopList(StopList * list, char * stopFile, int size) {
698712
copy->list.len = list->len;
699713
copy->list.stop = (char**)shalloc(sizeof(char*) * list->len);
700714
copy->stopFile = shstrcpy(stopFile);
701-
copy->bytes = size;
715+
copy->nbytes = size;
702716

703717
for (i = 0; i < list->len; i++) {
704718
copy->list.stop[i] = shstrcpy(list->stop[i]);
@@ -742,7 +756,7 @@ int countCMPDAffixes(CMPDAffix * affixes) {
742756
}
743757

744758
static
745-
SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int size) {
759+
SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int size, int words) {
746760

747761
int i, cnt;
748762

@@ -778,7 +792,8 @@ SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * aff
778792
memcpy(copy->flagval, dict->flagval, 255);
779793
copy->usecompound = dict->usecompound;
780794

781-
copy->bytes = size;
795+
copy->nbytes = size;
796+
copy->nwords = words;
782797

783798
return copy;
784799

@@ -812,3 +827,186 @@ int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile) {
812827
return size;
813828

814829
}
830+
831+
Datum
832+
dispell_list_dicts(PG_FUNCTION_ARGS)
833+
{
834+
FuncCallContext *funcctx;
835+
TupleDesc tupdesc;
836+
AttInMetadata *attinmeta;
837+
SharedIspellDict * dict;
838+
839+
/* init on the first call */
840+
if (SRF_IS_FIRSTCALL()) {
841+
842+
MemoryContext oldcontext;
843+
844+
funcctx = SRF_FIRSTCALL_INIT();
845+
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
846+
847+
/* get a shared lock and then the first dictionary */
848+
LWLockAcquire(segment_info->lock, LW_SHARED);
849+
funcctx->user_fctx = segment_info->dict;
850+
851+
/* Build a tuple descriptor for our result type */
852+
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
853+
ereport(ERROR,
854+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
855+
errmsg("function returning record called in context "
856+
"that cannot accept type record")));
857+
858+
/*
859+
* generate attribute metadata needed later to produce tuples from raw
860+
* C strings
861+
*/
862+
attinmeta = TupleDescGetAttInMetadata(tupdesc);
863+
funcctx->attinmeta = attinmeta;
864+
funcctx->tuple_desc = tupdesc;
865+
866+
/* switch back to the old context */
867+
MemoryContextSwitchTo(oldcontext);
868+
869+
}
870+
871+
/* init the context */
872+
funcctx = SRF_PERCALL_SETUP();
873+
874+
/* check if we have more data */
875+
if (funcctx->user_fctx != NULL)
876+
{
877+
HeapTuple tuple;
878+
Datum result;
879+
Datum values[5];
880+
bool nulls[5];
881+
882+
text *dictname, *affname;
883+
884+
dict = (SharedIspellDict*)funcctx->user_fctx;
885+
funcctx->user_fctx = dict->next;
886+
887+
memset(nulls, 0, sizeof(nulls));
888+
889+
dictname = (text *) palloc(strlen(dict->dictFile) + VARHDRSZ);
890+
affname = (text *) palloc(strlen(dict->affixFile) + VARHDRSZ);
891+
892+
SET_VARSIZE(dictname, strlen(dict->dictFile) + VARHDRSZ);
893+
SET_VARSIZE(affname, strlen(dict->affixFile) + VARHDRSZ);
894+
895+
strcpy(VARDATA(dictname), dict->dictFile);
896+
strcpy(VARDATA(affname), dict->affixFile);
897+
898+
values[0] = PointerGetDatum(dictname);
899+
values[1] = PointerGetDatum(affname);
900+
values[2] = UInt32GetDatum(dict->nwords);
901+
values[3] = UInt32GetDatum(dict->naffixes);
902+
values[4] = UInt32GetDatum(dict->nbytes);
903+
904+
/* Build and return the tuple. */
905+
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
906+
907+
/* make the tuple into a datum */
908+
result = HeapTupleGetDatum(tuple);
909+
910+
/* Here we want to return another item: */
911+
SRF_RETURN_NEXT(funcctx, result);
912+
913+
}
914+
else
915+
{
916+
/* release the lock */
917+
LWLockRelease(segment_info->lock);
918+
919+
/* Here we are done returning items and just need to clean up: */
920+
SRF_RETURN_DONE(funcctx);
921+
}
922+
923+
}
924+
925+
Datum
926+
dispell_list_stoplists(PG_FUNCTION_ARGS)
927+
{
928+
FuncCallContext *funcctx;
929+
TupleDesc tupdesc;
930+
AttInMetadata *attinmeta;
931+
SharedStopList *stoplist;
932+
933+
/* init on the first call */
934+
if (SRF_IS_FIRSTCALL()) {
935+
936+
MemoryContext oldcontext;
937+
938+
funcctx = SRF_FIRSTCALL_INIT();
939+
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
940+
941+
/* get a shared lock and then the first stop list */
942+
LWLockAcquire(segment_info->lock, LW_SHARED);
943+
funcctx->user_fctx = segment_info->stop;
944+
945+
/* Build a tuple descriptor for our result type */
946+
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
947+
ereport(ERROR,
948+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
949+
errmsg("function returning record called in context "
950+
"that cannot accept type record")));
951+
952+
/*
953+
* generate attribute metadata needed later to produce tuples from raw
954+
* C strings
955+
*/
956+
attinmeta = TupleDescGetAttInMetadata(tupdesc);
957+
funcctx->attinmeta = attinmeta;
958+
funcctx->tuple_desc = tupdesc;
959+
960+
/* switch back to the old context */
961+
MemoryContextSwitchTo(oldcontext);
962+
963+
}
964+
965+
/* init the context */
966+
funcctx = SRF_PERCALL_SETUP();
967+
968+
/* check if we have more data */
969+
if (funcctx->user_fctx != NULL)
970+
{
971+
HeapTuple tuple;
972+
Datum result;
973+
Datum values[3];
974+
bool nulls[3];
975+
976+
text *stopname;
977+
978+
stoplist = (SharedStopList*)funcctx->user_fctx;
979+
funcctx->user_fctx = stoplist->next;
980+
981+
memset(nulls, 0, sizeof(nulls));
982+
983+
stopname = (text *) palloc(strlen(stoplist->stopFile) + VARHDRSZ);
984+
985+
SET_VARSIZE(stopname, strlen(stoplist->stopFile) + VARHDRSZ);
986+
987+
strcpy(VARDATA(stopname), stoplist->stopFile);
988+
989+
values[0] = PointerGetDatum(stopname);
990+
values[1] = UInt32GetDatum(stoplist->list.len);
991+
values[2] = UInt32GetDatum(stoplist->nbytes);
992+
993+
/* Build and return the tuple. */
994+
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
995+
996+
/* make the tuple into a datum */
997+
result = HeapTupleGetDatum(tuple);
998+
999+
/* Here we want to return another item: */
1000+
SRF_RETURN_NEXT(funcctx, result);
1001+
1002+
}
1003+
else
1004+
{
1005+
/* release the lock */
1006+
LWLockRelease(segment_info->lock);
1007+
1008+
/* Here we are done returning items and just need to clean up: */
1009+
SRF_RETURN_DONE(funcctx);
1010+
}
1011+
1012+
}

src/spell.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ typedef struct SharedIspellDict
2727
char * dictFile;
2828
char * affixFile;
2929

30-
size_t bytes;
30+
int nbytes;
31+
int nwords;
3132

3233
/* next dictionary in the chain */
3334
struct SharedIspellDict * next;
@@ -56,7 +57,7 @@ typedef struct SharedStopList
5657

5758
char * stopFile;
5859

59-
size_t bytes;
60+
int nbytes;
6061

6162
StopList list;
6263
struct SharedStopList * next;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy