Skip to content

Commit 43af0e8

Browse files
committed
Allow regex operations to be terminated early by query cancel requests.
The regex code didn't have any provision for query cancel; which is unsurprising given its non-Postgres origin, but still problematic since some operations can take a long time. Introduce a callback function to check for a pending query cancel or session termination request, and call it in a couple of strategic spots where we can make the regex code exit with an error indicator. If we ever actually split out the regex code as a standalone library, some additional work will be needed to let the cancel callback function be specified externally to the library. But that's straightforward (certainly so by comparison to putting the locale-dependent character classification logic on a similar arms-length basis), and there seems no need to do it right now. A bigger issue is that there may be more places than these two where we need to check for cancels. We can always add more checks later, now that the infrastructure is in place. Since there are known examples of not-terribly-long regexes that can lock up a backend for a long time, back-patch to all supported branches. I have hopes of fixing the known performance problems later, but adding query cancel ability seems like a good idea even if they were all fixed.
1 parent 1e0fb6a commit 43af0e8

File tree

8 files changed

+57
-0
lines changed

8 files changed

+57
-0
lines changed

src/backend/regex/regc_nfa.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,23 @@ newstate(struct nfa * nfa)
174174
{
175175
struct state *s;
176176

177+
/*
178+
* This is a handy place to check for operation cancel during regex
179+
* compilation, since no code path will go very long without making a new
180+
* state.
181+
*/
182+
if (CANCEL_REQUESTED(nfa->v->re))
183+
{
184+
NERR(REG_CANCEL);
185+
return NULL;
186+
}
187+
177188
if (TooManyStates(nfa))
178189
{
179190
NERR(REG_ETOOBIG);
180191
return NULL;
181192
}
193+
182194
if (nfa->free != NULL)
183195
{
184196
s = nfa->free;

src/backend/regex/regcomp.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
#include "regex/regguts.h"
3636

37+
#include "miscadmin.h" /* needed by rcancelrequested() */
38+
3739
/*
3840
* forward declarations, up here so forward datatypes etc. are defined early
3941
*/
@@ -67,6 +69,7 @@ static long nfanode(struct vars *, struct subre *, FILE *);
6769
static int newlacon(struct vars *, struct state *, struct state *, int);
6870
static void freelacons(struct subre *, int);
6971
static void rfree(regex_t *);
72+
static int rcancelrequested(void);
7073

7174
#ifdef REG_DEBUG
7275
static void dump(regex_t *, FILE *);
@@ -274,6 +277,7 @@ struct vars
274277
/* static function list */
275278
static struct fns functions = {
276279
rfree, /* regfree insides */
280+
rcancelrequested /* check for cancel request */
277281
};
278282

279283

@@ -1869,6 +1873,22 @@ rfree(regex_t *re)
18691873
}
18701874
}
18711875

1876+
/*
1877+
* rcancelrequested - check for external request to cancel regex operation
1878+
*
1879+
* Return nonzero to fail the operation with error code REG_CANCEL,
1880+
* zero to keep going
1881+
*
1882+
* The current implementation is Postgres-specific. If we ever get around
1883+
* to splitting the regex code out as a standalone library, there will need
1884+
* to be some API to let applications define a callback function for this.
1885+
*/
1886+
static int
1887+
rcancelrequested(void)
1888+
{
1889+
return InterruptPending && (QueryCancelPending || ProcDiePending);
1890+
}
1891+
18721892
#ifdef REG_DEBUG
18731893

18741894
/*

src/backend/regex/regexec.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,10 @@ cdissect(struct vars * v,
710710
assert(t != NULL);
711711
MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
712712

713+
/* handy place to check for operation cancel */
714+
if (CANCEL_REQUESTED(v->re))
715+
return REG_CANCEL;
716+
713717
switch (t->op)
714718
{
715719
case '=': /* terminal node */

src/backend/utils/adt/regexp.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "catalog/pg_type.h"
3333
#include "funcapi.h"
34+
#include "miscadmin.h"
3435
#include "regex/regex.h"
3536
#include "utils/builtins.h"
3637
#include "utils/guc.h"
@@ -183,6 +184,15 @@ RE_compile_and_cache(text *text_re, int cflags)
183184
if (regcomp_result != REG_OKAY)
184185
{
185186
/* re didn't compile (no need for pg_regfree, if so) */
187+
188+
/*
189+
* Here and in other places in this file, do CHECK_FOR_INTERRUPTS
190+
* before reporting a regex error. This is so that if the regex
191+
* library aborts and returns REG_CANCEL, we don't print an error
192+
* message that implies the regex was invalid.
193+
*/
194+
CHECK_FOR_INTERRUPTS();
195+
186196
pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
187197
ereport(ERROR,
188198
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -262,6 +272,7 @@ RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
262272
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
263273
{
264274
/* re failed??? */
275+
CHECK_FOR_INTERRUPTS();
265276
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
266277
ereport(ERROR,
267278
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -1194,6 +1205,7 @@ regexp_fixed_prefix(text *text_re, bool case_insensitive,
11941205

11951206
default:
11961207
/* re failed??? */
1208+
CHECK_FOR_INTERRUPTS();
11971209
pg_regerror(re_result, re, errMsg, sizeof(errMsg));
11981210
ereport(ERROR,
11991211
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),

src/backend/utils/adt/varlena.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2785,6 +2785,7 @@ replace_text_regexp(text *src_text, void *regexp,
27852785
{
27862786
char errMsg[100];
27872787

2788+
CHECK_FOR_INTERRUPTS();
27882789
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
27892790
ereport(ERROR,
27902791
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),

src/include/regex/regerrs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,7 @@
8181
{
8282
REG_ECOLORS, "REG_ECOLORS", "too many colors"
8383
},
84+
85+
{
86+
REG_CANCEL, "REG_CANCEL", "operation cancelled"
87+
},

src/include/regex/regex.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ typedef struct
153153
#define REG_BADOPT 18 /* invalid embedded option */
154154
#define REG_ETOOBIG 19 /* nfa has too many states */
155155
#define REG_ECOLORS 20 /* too many colors */
156+
#define REG_CANCEL 21 /* operation cancelled */
156157
/* two specials for debugging and testing */
157158
#define REG_ATOI 101 /* convert error-code name to number */
158159
#define REG_ITOA 102 /* convert error-code number to name */

src/include/regex/regguts.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,11 @@ struct subre
403403
struct fns
404404
{
405405
void FUNCPTR(free, (regex_t *));
406+
int FUNCPTR(cancel_requested, (void));
406407
};
407408

409+
#define CANCEL_REQUESTED(re) \
410+
((*((struct fns *) (re)->re_fns)->cancel_requested) ())
408411

409412

410413
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy