Skip to content

Commit 5ec81ac

Browse files
committed
Fix nasty performance problem in tsquery_rewrite().
tsquery_rewrite() tries to find matches to subsets of AND/OR conditions; for example, in the query 'a | b | c' the substitution subquery 'a | c' should match and lead to replacement of the first and third items. That's fine, but the matching algorithm apparently takes about O(2^N) for an N-clause query (I say "apparently" because the code is also both unintelligible and uncommented). We could probably do better than that even without any extra assumptions --- but actually, we know that the subclauses are sorted, indeed are depending on that elsewhere in this very same function. So we can just scan the two lists a single time to detect matches, as though we were doing a merge join. Also do a re-flattening call (QTNTernary()) in tsquery_rewrite_query, just to make sure that the tree fits the expectations of the next search cycle. I didn't try to devise a test case for this, but I'm pretty sure that the oversight could have led to failure to match in some cases where a match would be expected. Improve comments, and also stick a CHECK_FOR_INTERRUPTS into dofindsubquery, just in case it's still too slow for somebody. Per report from Andreas Seltenreich. Back-patch to all supported branches. Discussion: <8760oasf2y.fsf@credativ.de>
1 parent 24ebc44 commit 5ec81ac

File tree

1 file changed

+110
-80
lines changed

1 file changed

+110
-80
lines changed

src/backend/utils/adt/tsquery_rewrite.c

Lines changed: 110 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -21,47 +21,43 @@
2121
#include "utils/builtins.h"
2222

2323

24-
static int
25-
addone(int *counters, int last, int total)
26-
{
27-
/* since this function recurses, it could be driven to stack overflow. */
28-
check_stack_depth();
29-
30-
counters[last]++;
31-
if (counters[last] >= total)
32-
{
33-
if (last == 0)
34-
return 0;
35-
if (addone(counters, last - 1, total - 1) == 0)
36-
return 0;
37-
counters[last] = counters[last - 1] + 1;
38-
}
39-
return 1;
40-
}
41-
4224
/*
43-
* If node is equal to ex, replace it with subs. Replacement is actually done
44-
* by returning either node or a copy of subs.
25+
* If "node" is equal to "ex", return a copy of "subs" instead.
26+
* If "ex" matches a subset of node's children, return a modified version
27+
* of "node" in which those children are replaced with a copy of "subs".
28+
* Otherwise return "node" unmodified.
29+
*
30+
* The QTN_NOCHANGE bit is set in successfully modified nodes, so that
31+
* we won't uselessly recurse into them.
32+
* Also, set *isfind true if we make a replacement.
4533
*/
4634
static QTNode *
4735
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
4836
{
37+
/* Can't match unless signature matches and node type matches. */
4938
if ((node->sign & ex->sign) != ex->sign ||
5039
node->valnode->type != ex->valnode->type)
5140
return node;
5241

42+
/* Ignore nodes marked NOCHANGE, too. */
5343
if (node->flags & QTN_NOCHANGE)
5444
return node;
5545

5646
if (node->valnode->type == QI_OPR)
5747
{
48+
/* Must be same operator. */
5849
if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper)
5950
return node;
6051

6152
if (node->nchild == ex->nchild)
6253
{
54+
/*
55+
* Simple case: when same number of children, match if equal.
56+
* (This is reliable when the children were sorted earlier.)
57+
*/
6358
if (QTNEq(node, ex))
6459
{
60+
/* Match; delete node and return a copy of subs instead. */
6561
QTNFree(node);
6662
if (subs)
6763
{
@@ -73,79 +69,92 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
7369
*isfind = true;
7470
}
7571
}
76-
else if (node->nchild > ex->nchild)
72+
else if (node->nchild > ex->nchild && ex->nchild > 0)
7773
{
7874
/*
79-
* AND and NOT are commutative, so we check if a subset of the
80-
* children match. For example, if tnode is A | B | C, and ex is B
81-
* | C, we have a match after we convert tnode to A | (B | C).
75+
* AND and OR are commutative/associative, so we should check if a
76+
* subset of the children match. For example, if node is A|B|C,
77+
* and ex is B|C, we have a match after we notionally convert node
78+
* to A|(B|C). This does not work for NOT or PHRASE nodes, but we
79+
* can't get here for those node types because they have a fixed
80+
* number of children.
81+
*
82+
* Because we expect that the children are sorted, it suffices to
83+
* make one pass through the two lists to find the matches.
8284
*/
83-
int *counters = (int *) palloc(sizeof(int) * node->nchild);
84-
int i;
85-
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
86-
87-
memset(tnode, 0, sizeof(QTNode));
88-
tnode->child = (QTNode **) palloc(sizeof(QTNode *) * ex->nchild);
89-
tnode->nchild = ex->nchild;
90-
tnode->valnode = (QueryItem *) palloc(sizeof(QueryItem));
91-
*(tnode->valnode) = *(ex->valnode);
92-
93-
for (i = 0; i < ex->nchild; i++)
94-
counters[i] = i;
95-
96-
do
85+
bool *matched;
86+
int nmatched;
87+
int i,
88+
j;
89+
90+
/* Assert that the subset rule is OK */
91+
Assert(node->valnode->qoperator.oper == OP_AND ||
92+
node->valnode->qoperator.oper == OP_OR);
93+
94+
/* matched[] will record which children of node matched */
95+
matched = (bool *) palloc0(node->nchild * sizeof(bool));
96+
nmatched = 0;
97+
i = j = 0;
98+
while (i < node->nchild && j < ex->nchild)
9799
{
98-
tnode->sign = 0;
99-
for (i = 0; i < ex->nchild; i++)
100+
int cmp = QTNodeCompare(node->child[i], ex->child[j]);
101+
102+
if (cmp == 0)
100103
{
101-
tnode->child[i] = node->child[counters[i]];
102-
tnode->sign |= tnode->child[i]->sign;
104+
/* match! */
105+
matched[i] = true;
106+
nmatched++;
107+
i++, j++;
103108
}
109+
else if (cmp < 0)
110+
{
111+
/* node->child[i] has no match, ignore it */
112+
i++;
113+
}
114+
else
115+
{
116+
/* ex->child[j] has no match; we can give up immediately */
117+
break;
118+
}
119+
}
104120

105-
if (QTNEq(tnode, ex))
121+
if (nmatched == ex->nchild)
122+
{
123+
/* collapse out the matched children of node */
124+
j = 0;
125+
for (i = 0; i < node->nchild; i++)
106126
{
107-
int j = 0;
108-
109-
pfree(tnode->valnode);
110-
pfree(tnode->child);
111-
pfree(tnode);
112-
if (subs)
113-
{
114-
tnode = QTNCopy(subs);
115-
tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
116-
}
127+
if (matched[i])
128+
QTNFree(node->child[i]);
117129
else
118-
tnode = NULL;
119-
120-
node->child[counters[0]] = tnode;
121-
122-
for (i = 1; i < ex->nchild; i++)
123-
node->child[counters[i]] = NULL;
124-
for (i = 0; i < node->nchild; i++)
125-
{
126-
if (node->child[i])
127-
{
128-
node->child[j] = node->child[i];
129-
j++;
130-
}
131-
}
130+
node->child[j++] = node->child[i];
131+
}
132132

133-
node->nchild = j;
133+
/* and instead insert a copy of subs */
134+
if (subs)
135+
{
136+
subs = QTNCopy(subs);
137+
subs->flags |= QTN_NOCHANGE;
138+
node->child[j++] = subs;
139+
}
134140

135-
*isfind = true;
141+
node->nchild = j;
142+
143+
/*
144+
* Re-sort the node to put new child in the right place. This
145+
* is a bit bogus, because it won't matter for findsubquery's
146+
* remaining processing, and it's insufficient to prepare the
147+
* tree for another search (we would need to re-flatten as
148+
* well, and we don't want to do that because we'd lose the
149+
* QTN_NOCHANGE marking on the new child). But it's needed to
150+
* keep the results the same as the regression tests expect.
151+
*/
152+
QTNSort(node);
136153

137-
break;
138-
}
139-
} while (addone(counters, ex->nchild - 1, node->nchild));
140-
if (tnode && (tnode->flags & QTN_NOCHANGE) == 0)
141-
{
142-
pfree(tnode->valnode);
143-
pfree(tnode->child);
144-
pfree(tnode);
154+
*isfind = true;
145155
}
146-
else
147-
QTNSort(node);
148-
pfree(counters);
156+
157+
pfree(matched);
149158
}
150159
}
151160
else
@@ -173,12 +182,20 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
173182
return node;
174183
}
175184

185+
/*
186+
* Recursive guts of findsubquery(): attempt to replace "ex" with "subs"
187+
* at the root node, and if we failed to do so, recursively match against
188+
* child nodes.
189+
*/
176190
static QTNode *
177191
dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
178192
{
179193
/* since this function recurses, it could be driven to stack overflow. */
180194
check_stack_depth();
181195

196+
/* also, since it's a bit expensive, let's check for query cancel. */
197+
CHECK_FOR_INTERRUPTS();
198+
182199
root = findeq(root, ex, subs, isfind);
183200

184201
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
@@ -192,6 +209,10 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
192209
return root;
193210
}
194211

212+
/*
213+
* Delete any void subtrees that may have been inserted when the replacement
214+
* subtree is void.
215+
*/
195216
static QTNode *
196217
dropvoidsubtree(QTNode *root)
197218
{
@@ -231,6 +252,14 @@ dropvoidsubtree(QTNode *root)
231252
return root;
232253
}
233254

255+
/*
256+
* Substitute "subs" for "ex" throughout the QTNode tree at root.
257+
*
258+
* If isfind isn't NULL, set *isfind to show whether we made any substitution.
259+
*
260+
* Both "root" and "ex" must have been through QTNTernary and QTNSort
261+
* to ensure reliable matching.
262+
*/
234263
QTNode *
235264
findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
236265
{
@@ -344,6 +373,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
344373
{
345374
/* ready the tree for another pass */
346375
QTNClearFlags(tree, QTN_NOCHANGE);
376+
QTNTernary(tree);
347377
QTNSort(tree);
348378
}
349379
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy