Skip to content

Commit 46a25ce

Browse files
committed
1 Fix bug with very short word: prefix and suffix might be overlapped,
sorry but fix can't be applyed to previous version: it's require refill tsvector... 2 Small optimize of load time for huge dictionaries 3 use palloc instead of malloc during load dict file
1 parent a815a57 commit 46a25ce

File tree

2 files changed

+62
-53
lines changed

2 files changed

+62
-53
lines changed

contrib/tsearch2/ispell/spell.c

Lines changed: 57 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,19 @@
1818
#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
1919
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
2020

21+
static char *VoidString = "";
2122

2223
#define MEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
2324

2425
static int
2526
cmpspell(const void *s1, const void *s2)
2627
{
27-
return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));
28+
return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word));
2829
}
2930
static int
3031
cmpspellaffix(const void *s1, const void *s2)
3132
{
32-
return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
33+
return (strcmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag));
3334
}
3435

3536
static char *
@@ -128,18 +129,17 @@ NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
128129
if (Conf->mspell)
129130
{
130131
Conf->mspell += 1024 * 20;
131-
Conf->Spell = (SPELL *) realloc(Conf->Spell, Conf->mspell * sizeof(SPELL));
132+
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL*));
132133
}
133134
else
134135
{
135136
Conf->mspell = 1024 * 20;
136-
Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));
137+
Conf->Spell = (SPELL **) palloc(Conf->mspell * sizeof(SPELL*));
137138
}
138-
MEMOUT(Conf->Spell);
139139
}
140-
Conf->Spell[Conf->nspell].word = strdup(word);
141-
MEMOUT(Conf->Spell[Conf->nspell].word);
142-
strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16);
140+
Conf->Spell[Conf->nspell] = (SPELL*)palloc(SPELLHDRSZ + strlen(word) + 1);
141+
strcpy( Conf->Spell[Conf->nspell]->word ,word );
142+
strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, 16);
143143
Conf->nspell++;
144144
return (0);
145145
}
@@ -261,13 +261,13 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
261261
{
262262
Conf->Affix[Conf->naffixes].issimple = 1;
263263
Conf->Affix[Conf->naffixes].isregis = 0;
264-
Conf->Affix[Conf->naffixes].mask = strdup("");
264+
Conf->Affix[Conf->naffixes].mask = VoidString;
265265
}
266266
else if (RS_isRegis(mask))
267267
{
268268
Conf->Affix[Conf->naffixes].issimple = 0;
269269
Conf->Affix[Conf->naffixes].isregis = 1;
270-
Conf->Affix[Conf->naffixes].mask = strdup(mask);
270+
Conf->Affix[Conf->naffixes].mask = (mask && *mask) ? strdup(mask) : VoidString;
271271
}
272272
else
273273
{
@@ -287,11 +287,13 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
287287
Conf->Affix[Conf->naffixes].flag = flag;
288288
Conf->Affix[Conf->naffixes].type = type;
289289

290-
Conf->Affix[Conf->naffixes].find = strdup(find);
290+
Conf->Affix[Conf->naffixes].find = (find && *find) ? strdup(find) : VoidString;
291291
MEMOUT(Conf->Affix[Conf->naffixes].find);
292-
Conf->Affix[Conf->naffixes].repl = strdup(repl);
293-
MEMOUT(Conf->Affix[Conf->naffixes].repl);
294-
Conf->Affix[Conf->naffixes].replen = strlen(repl);
292+
if ( (Conf->Affix[Conf->naffixes].replen = strlen(repl)) > 0 ) {
293+
Conf->Affix[Conf->naffixes].repl = strdup(repl);
294+
MEMOUT(Conf->Affix[Conf->naffixes].repl);
295+
} else
296+
Conf->Affix[Conf->naffixes].repl = VoidString;
295297
Conf->naffixes++;
296298
return (0);
297299
}
@@ -506,10 +508,10 @@ mkSPNode(IspellDict * Conf, int low, int high, int level)
506508
int lownew = low;
507509

508510
for (i = low; i < high; i++)
509-
if (Conf->Spell[i].p.d.len > level && lastchar != Conf->Spell[i].word[level])
511+
if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
510512
{
511513
nchar++;
512-
lastchar = Conf->Spell[i].word[level];
514+
lastchar = Conf->Spell[i]->word[level];
513515
}
514516

515517
if (!nchar)
@@ -523,34 +525,34 @@ mkSPNode(IspellDict * Conf, int low, int high, int level)
523525

524526
lastchar = '\0';
525527
for (i = low; i < high; i++)
526-
if (Conf->Spell[i].p.d.len > level)
528+
if (Conf->Spell[i]->p.d.len > level)
527529
{
528-
if (lastchar != Conf->Spell[i].word[level])
530+
if (lastchar != Conf->Spell[i]->word[level])
529531
{
530532
if (lastchar)
531533
{
532534
data->node = mkSPNode(Conf, lownew, i, level + 1);
533535
lownew = i;
534536
data++;
535537
}
536-
lastchar = Conf->Spell[i].word[level];
538+
lastchar = Conf->Spell[i]->word[level];
537539
}
538-
data->val = ((uint8 *) (Conf->Spell[i].word))[level];
539-
if (Conf->Spell[i].p.d.len == level + 1)
540+
data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
541+
if (Conf->Spell[i]->p.d.len == level + 1)
540542
{
541-
if (data->isword && data->affix != Conf->Spell[i].p.d.affix)
543+
if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
542544
{
543545
/*
544546
* fprintf(stderr,"Word already exists: %s (affixes: '%s'
545-
* and '%s')\n", Conf->Spell[i].word,
547+
* and '%s')\n", Conf->Spell[i]->word,
546548
* Conf->AffixData[data->affix],
547-
* Conf->AffixData[Conf->Spell[i].p.d.affix] );
549+
* Conf->AffixData[Conf->Spell[i]->p.d.affix] );
548550
*/
549551
/* MergeAffix called a few times */
550-
data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix);
552+
data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
551553
}
552554
else
553-
data->affix = Conf->Spell[i].p.d.affix;
555+
data->affix = Conf->Spell[i]->p.d.affix;
554556
data->isword = 1;
555557
if (strchr(Conf->AffixData[data->affix], Conf->compoundcontrol))
556558
data->compoundallow = 1;
@@ -562,18 +564,16 @@ mkSPNode(IspellDict * Conf, int low, int high, int level)
562564
return rs;
563565
}
564566

565-
566-
567567
void
568568
NISortDictionary(IspellDict * Conf)
569569
{
570570
size_t i;
571571
int naffix = 3;
572572

573573
/* compress affixes */
574-
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix);
574+
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL*), cmpspellaffix);
575575
for (i = 1; i < Conf->nspell; i++)
576-
if (strcmp(Conf->Spell[i].p.flag, Conf->Spell[i - 1].p.flag))
576+
if (strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag))
577577
naffix++;
578578

579579
Conf->AffixData = (char **) malloc(naffix * sizeof(char *));
@@ -582,28 +582,28 @@ NISortDictionary(IspellDict * Conf)
582582
naffix = 1;
583583
Conf->AffixData[0] = strdup("");
584584
MEMOUT(Conf->AffixData[0]);
585-
Conf->AffixData[1] = strdup(Conf->Spell[0].p.flag);
585+
Conf->AffixData[1] = strdup(Conf->Spell[0]->p.flag);
586586
MEMOUT(Conf->AffixData[1]);
587-
Conf->Spell[0].p.d.affix = 1;
588-
Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word);
587+
Conf->Spell[0]->p.d.affix = 1;
588+
Conf->Spell[0]->p.d.len = strlen(Conf->Spell[0]->word);
589589
for (i = 1; i < Conf->nspell; i++)
590590
{
591-
if (strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix]))
591+
if (strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[naffix]))
592592
{
593593
naffix++;
594-
Conf->AffixData[naffix] = strdup(Conf->Spell[i].p.flag);
594+
Conf->AffixData[naffix] = strdup(Conf->Spell[i]->p.flag);
595595
MEMOUT(Conf->AffixData[naffix]);
596596
}
597-
Conf->Spell[i].p.d.affix = naffix;
598-
Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word);
597+
Conf->Spell[i]->p.d.affix = naffix;
598+
Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
599599
}
600600

601-
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);
601+
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL*), cmpspell);
602602
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
603603

604604
for (i = 0; i < Conf->nspell; i++)
605-
free(Conf->Spell[i].word);
606-
free(Conf->Spell);
605+
pfree(Conf->Spell[i]);
606+
pfree(Conf->Spell);
607607
Conf->Spell = NULL;
608608
}
609609

@@ -724,7 +724,6 @@ NISortAffixes(IspellDict * Conf)
724724

725725
if (Conf->naffixes > 1)
726726
qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
727-
728727
Conf->CompoundAffix = ptr = (CMPDAffix *) malloc(sizeof(CMPDAffix) * Conf->naffixes);
729728
MEMOUT(Conf->CompoundAffix);
730729
ptr->affix = NULL;
@@ -803,7 +802,7 @@ FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type
803802
}
804803

805804
static char *
806-
CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword)
805+
CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword, int *baselen)
807806
{
808807

809808
if (flagflags & FF_COMPOUNDONLYAFX)
@@ -821,9 +820,15 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
821820
{
822821
strcpy(newword, word);
823822
strcpy(newword + len - Affix->replen, Affix->find);
823+
if ( baselen ) /* store length of non-changed part of word */
824+
*baselen = len - Affix->replen;
824825
}
825826
else
826827
{
828+
/* if prefix is a all non-chaged part's length then all word contains only prefix and suffix,
829+
so out */
830+
if ( baselen && *baselen + strlen(Affix->find) <= Affix->replen )
831+
return NULL;
827832
strcpy(newword, Affix->find);
828833
strcat(newword, word + Affix->replen);
829834
}
@@ -927,7 +932,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
927932
break;
928933
for (j = 0; j < prefix->naff; j++)
929934
{
930-
if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword))
935+
if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
931936
{
932937
/* prefix success */
933938
if (FindWord(Conf, newword, prefix->aff[j]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1))
@@ -948,14 +953,16 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
948953
*/
949954
while (snode)
950955
{
956+
int baselen=0;
957+
951958
/* find possible suffix */
952959
suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
953960
if (!suffix)
954961
break;
955962
/* foreach suffix check affix */
956963
for (i = 0; i < suffix->naff; i++)
957964
{
958-
if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword))
965+
if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
959966
{
960967
/* suffix success */
961968
if (FindWord(Conf, newword, suffix->aff[i]->flag, flag & FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM - 1))
@@ -976,7 +983,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
976983
break;
977984
for (j = 0; j < prefix->naff; j++)
978985
{
979-
if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword))
986+
if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
980987
{
981988
/* prefix success */
982989
int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
@@ -1323,15 +1330,15 @@ NIFree(IspellDict * Conf)
13231330
else
13241331
pg_regfree(&(Affix[i].reg.regex));
13251332
}
1326-
free(Affix[i].mask);
1327-
free(Affix[i].find);
1328-
free(Affix[i].repl);
1333+
if ( Affix[i].mask != VoidString ) free(Affix[i].mask);
1334+
if ( Affix[i].find != VoidString ) free(Affix[i].find);
1335+
if ( Affix[i].repl != VoidString ) free(Affix[i].repl);
13291336
}
13301337
if (Conf->Spell)
13311338
{
13321339
for (i = 0; i < Conf->nspell; i++)
1333-
free(Conf->Spell[i].word);
1334-
free(Conf->Spell);
1340+
pfree(Conf->Spell[i]->word);
1341+
pfree(Conf->Spell);
13351342
}
13361343

13371344
if (Conf->Affix)

contrib/tsearch2/ispell/spell.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ typedef struct SPNode
3232

3333
typedef struct spell_struct
3434
{
35-
char *word;
3635
union
3736
{
3837
char flag[16];
@@ -41,9 +40,12 @@ typedef struct spell_struct
4140
int affix;
4241
int len;
4342
} d;
44-
} p;
43+
} p;
44+
char word[1];
4545
} SPELL;
4646

47+
#define SPELLHDRSZ (offsetof(SPELL, word))
48+
4749
typedef struct aff_struct
4850
{
4951
uint32
@@ -106,7 +108,7 @@ typedef struct
106108

107109
int nspell;
108110
int mspell;
109-
SPELL *Spell;
111+
SPELL **Spell;
110112

111113
AffixNode *Suffix;
112114
AffixNode *Prefix;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy