Skip to content

Commit 3e5f941

Browse files
committed
Reduce the memory requirement for large ispell dictionaries.
This patch eliminates per-chunk palloc overhead for most small allocations needed in the representation of an ispell dictionary. This saves close to a factor of 2 on the current Czech ispell data. While it doesn't cover every last small allocation in the ispell code, we are at the point of diminishing returns, because about 95% of the allocations are covered already. Pavel Stehule, rather heavily revised by Tom
1 parent 9b910de commit 3e5f941

File tree

2 files changed

+74
-12
lines changed

2 files changed

+74
-12
lines changed

src/backend/tsearch/spell.c

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,63 @@ NIFinishBuild(IspellDict *Conf)
5959
/* Just for cleanliness, zero the now-dangling pointers */
6060
Conf->buildCxt = NULL;
6161
Conf->Spell = NULL;
62+
Conf->firstfree = NULL;
63+
}
64+
65+
66+
/*
67+
* "Compact" palloc: allocate without extra palloc overhead.
68+
*
69+
* Since we have no need to free the ispell data items individually, there's
70+
* not much value in the per-chunk overhead normally consumed by palloc.
71+
* Getting rid of it is helpful since ispell can allocate a lot of small nodes.
72+
*
73+
* We currently pre-zero all data allocated this way, even though some of it
74+
* doesn't need that. The cpalloc and cpalloc0 macros are just documentation
75+
* to indicate which allocations actually require zeroing.
76+
*/
77+
#define COMPACT_ALLOC_CHUNK 8192 /* must be > aset.c's allocChunkLimit */
78+
#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
79+
80+
static void *
81+
compact_palloc0(IspellDict *Conf, size_t size)
82+
{
83+
void *result;
84+
85+
/* Should only be called during init */
86+
Assert(Conf->buildCxt != NULL);
87+
88+
/* No point in this for large chunks */
89+
if (size > COMPACT_MAX_REQ)
90+
return palloc0(size);
91+
92+
/* Keep everything maxaligned */
93+
size = MAXALIGN(size);
94+
95+
/* Need more space? */
96+
if (size > Conf->avail)
97+
{
98+
Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
99+
Conf->avail = COMPACT_ALLOC_CHUNK;
100+
}
101+
102+
result = (void *) Conf->firstfree;
103+
Conf->firstfree += size;
104+
Conf->avail -= size;
105+
106+
return result;
107+
}
108+
109+
#define cpalloc(size) compact_palloc0(Conf, size)
110+
#define cpalloc0(size) compact_palloc0(Conf, size)
111+
112+
static char *
113+
cpstrdup(IspellDict *Conf, const char *str)
114+
{
115+
char *res = cpalloc(strlen(str) + 1);
116+
117+
strcpy(res, str);
118+
return res;
62119
}
63120

64121

@@ -186,7 +243,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
186243
{
187244
if (Conf->mspell)
188245
{
189-
Conf->mspell += 1024 * 20;
246+
Conf->mspell *= 2;
190247
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
191248
}
192249
else
@@ -324,7 +381,7 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
324381
{
325382
if (Conf->maffixes)
326383
{
327-
Conf->maffixes += 16;
384+
Conf->maffixes *= 2;
328385
Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
329386
}
330387
else
@@ -389,9 +446,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
389446
Affix->flag = flag;
390447
Affix->type = type;
391448

392-
Affix->find = (find && *find) ? pstrdup(find) : VoidString;
449+
Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
393450
if ((Affix->replen = strlen(repl)) > 0)
394-
Affix->repl = pstrdup(repl);
451+
Affix->repl = cpstrdup(Conf, repl);
395452
else
396453
Affix->repl = VoidString;
397454
Conf->naffixes++;
@@ -843,8 +900,9 @@ MergeAffix(IspellDict *Conf, int a1, int a2)
843900
}
844901

845902
ptr = Conf->AffixData + Conf->nAffixData;
846-
*ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) +
847-
1 /* space */ + 1 /* \0 */ );
903+
*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
904+
strlen(Conf->AffixData[a2]) +
905+
1 /* space */ + 1 /* \0 */ );
848906
sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
849907
ptr++;
850908
*ptr = NULL;
@@ -888,7 +946,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
888946
if (!nchar)
889947
return NULL;
890948

891-
rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
949+
rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
892950
rs->length = nchar;
893951
data = rs->data;
894952

@@ -982,7 +1040,7 @@ NISortDictionary(IspellDict *Conf)
9821040
{
9831041
curaffix++;
9841042
Assert(curaffix < naffix);
985-
Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
1043+
Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
9861044
}
9871045

9881046
Conf->Spell[i]->p.d.affix = curaffix;
@@ -1020,7 +1078,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
10201078
aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
10211079
naff = 0;
10221080

1023-
rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1081+
rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
10241082
rs->length = nchar;
10251083
data = rs->data;
10261084

@@ -1036,7 +1094,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
10361094
if (naff)
10371095
{
10381096
data->naff = naff;
1039-
data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1097+
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
10401098
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
10411099
naff = 0;
10421100
}
@@ -1056,7 +1114,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
10561114
if (naff)
10571115
{
10581116
data->naff = naff;
1059-
data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1117+
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
10601118
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
10611119
naff = 0;
10621120
}
@@ -1097,7 +1155,7 @@ mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
10971155
if (cnt == 0)
10981156
return;
10991157

1100-
Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
1158+
Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
11011159
Affix->data->naff = (uint32) cnt;
11021160

11031161
cnt = 0;

src/include/tsearch/dicts/spell.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ typedef struct
161161
SPELL **Spell;
162162
int nspell; /* number of valid entries in Spell array */
163163
int mspell; /* allocated length of Spell array */
164+
165+
/* These are used to allocate "compact" data without palloc overhead */
166+
char *firstfree; /* first free address (always maxaligned) */
167+
size_t avail; /* free space remaining at firstfree */
164168
} IspellDict;
165169

166170
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy