Skip to content

Commit 21d11e7

Browse files
committed
Avoid unnecessary copying of source string when generating a cloned TParser.
For long source strings the copying results in O(N^2) behavior, and the multiplier can be significant if wide-char conversion is involved. Andres Freund, reviewed by Kevin Grittner.
1 parent a5495cd commit 21d11e7

File tree

1 file changed

+69
-5
lines changed

1 file changed

+69
-5
lines changed

src/backend/tsearch/wparser_def.c

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.25 2009/11/15 13:57:01 petere Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.26 2009/12/15 20:37:17 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -328,6 +328,46 @@ TParserInit(char *str, int len)
328328
return prs;
329329
}
330330

331+
/*
332+
* As an alternative to a full TParserInit one can create a
333+
* TParserCopy which basically is a regular TParser without a private
334+
* copy of the string - instead it uses the one from another TParser.
335+
* This is useful because at some places TParsers are created
336+
* recursively and the repeated copying around of the strings can
337+
* cause major inefficiency if the source string is long.
338+
* The new parser starts parsing at the original's current position.
339+
*
340+
* Obviously one must not close the original TParser before the copy.
341+
*/
342+
static TParser *
343+
TParserCopyInit(const TParser *orig)
344+
{
345+
TParser *prs = (TParser *) palloc0(sizeof(TParser));
346+
347+
prs->charmaxlen = orig->charmaxlen;
348+
prs->str = orig->str + orig->state->posbyte;
349+
prs->lenstr = orig->lenstr - orig->state->posbyte;
350+
351+
#ifdef USE_WIDE_UPPER_LOWER
352+
prs->usewide = orig->usewide;
353+
354+
if (orig->pgwstr)
355+
prs->pgwstr = orig->pgwstr + orig->state->poschar;
356+
if (orig->wstr)
357+
prs->wstr = orig->wstr + orig->state->poschar;
358+
#endif
359+
360+
prs->state = newTParserPosition(NULL);
361+
prs->state->state = TPS_Base;
362+
363+
#ifdef WPARSER_TRACE
364+
fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
365+
#endif
366+
367+
return prs;
368+
}
369+
370+
331371
static void
332372
TParserClose(TParser *prs)
333373
{
@@ -346,9 +386,33 @@ TParserClose(TParser *prs)
346386
pfree(prs->pgwstr);
347387
#endif
348388

389+
#ifdef WPARSER_TRACE
390+
fprintf(stderr, "closing parser");
391+
#endif
349392
pfree(prs);
350393
}
351394

395+
/*
396+
* Close a parser created with TParserCopyInit
397+
*/
398+
static void
399+
TParserCopyClose(TParser *prs)
400+
{
401+
while (prs->state)
402+
{
403+
TParserPosition *ptr = prs->state->prev;
404+
405+
pfree(prs->state);
406+
prs->state = ptr;
407+
}
408+
409+
#ifdef WPARSER_TRACE
410+
fprintf(stderr, "closing parser copy");
411+
#endif
412+
pfree(prs);
413+
}
414+
415+
352416
/*
353417
* Character-type support functions, equivalent to is* macros, but
354418
* working with any possible encodings and locales. Notes:
@@ -617,7 +681,7 @@ p_isignore(TParser *prs)
617681
static int
618682
p_ishost(TParser *prs)
619683
{
620-
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
684+
TParser *tmpprs = TParserCopyInit(prs);
621685
int res = 0;
622686

623687
tmpprs->wanthost = true;
@@ -631,15 +695,15 @@ p_ishost(TParser *prs)
631695
prs->state->charlen = tmpprs->state->charlen;
632696
res = 1;
633697
}
634-
TParserClose(tmpprs);
698+
TParserCopyClose(tmpprs);
635699

636700
return res;
637701
}
638702

639703
static int
640704
p_isURLPath(TParser *prs)
641705
{
642-
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
706+
TParser *tmpprs = TParserCopyInit(prs);
643707
int res = 0;
644708

645709
tmpprs->state = newTParserPosition(tmpprs->state);
@@ -654,7 +718,7 @@ p_isURLPath(TParser *prs)
654718
prs->state->charlen = tmpprs->state->charlen;
655719
res = 1;
656720
}
657-
TParserClose(tmpprs);
721+
TParserCopyClose(tmpprs);
658722

659723
return res;
660724
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy