Skip to content

Commit 6424337

Browse files
committed
Add assorted new regexp_xxx SQL functions.
This patch adds new functions regexp_count(), regexp_instr(), regexp_like(), and regexp_substr(), and extends regexp_replace() with some new optional arguments. All these functions follow the definitions used in Oracle, although there are small differences in the regexp language due to using our own regexp engine -- most notably, that the default newline-matching behavior is different. Similar functions appear in DB2 and elsewhere, too. Aside from easing portability, these functions are easier to use for certain tasks than our existing regexp_match[es] functions. Gilles Darold, heavily revised by me Discussion: https://postgr.es/m/fc160ee0-c843-b024-29bb-97b5da61971f@darold.net
1 parent 9e51cc8 commit 6424337

File tree

8 files changed

+1340
-47
lines changed

8 files changed

+1340
-47
lines changed

doc/src/sgml/func.sgml

Lines changed: 331 additions & 21 deletions
Large diffs are not rendered by default.

src/backend/utils/adt/regexp.c

Lines changed: 455 additions & 18 deletions
Large diffs are not rendered by default.

src/backend/utils/adt/varlena.c

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4496,23 +4496,28 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
44964496
/*
44974497
* replace_text_regexp
44984498
*
4499-
* replace text that matches to regexp in src_text to replace_text.
4499+
* replace substring(s) in src_text that match regexp with replace_text.
4500+
*
4501+
* search_start: the character (not byte) offset in src_text at which to
4502+
* begin searching.
4503+
* n: if 0, replace all matches; if > 0, replace only the N'th match.
45004504
*
45014505
* Note: to avoid having to include regex.h in builtins.h, we declare
45024506
* the regexp argument as void *, but really it's regex_t *.
45034507
*/
45044508
text *
45054509
replace_text_regexp(text *src_text, void *regexp,
4506-
text *replace_text, bool glob)
4510+
text *replace_text,
4511+
int search_start, int n)
45074512
{
45084513
text *ret_text;
45094514
regex_t *re = (regex_t *) regexp;
45104515
int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4516+
int nmatches = 0;
45114517
StringInfoData buf;
45124518
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
45134519
pg_wchar *data;
45144520
size_t data_len;
4515-
int search_start;
45164521
int data_pos;
45174522
char *start_ptr;
45184523
bool have_escape;
@@ -4530,7 +4535,6 @@ replace_text_regexp(text *src_text, void *regexp,
45304535
start_ptr = (char *) VARDATA_ANY(src_text);
45314536
data_pos = 0;
45324537

4533-
search_start = 0;
45344538
while (search_start <= data_len)
45354539
{
45364540
int regexec_result;
@@ -4560,6 +4564,23 @@ replace_text_regexp(text *src_text, void *regexp,
45604564
errmsg("regular expression failed: %s", errMsg)));
45614565
}
45624566

4567+
/*
4568+
* Count matches, and decide whether to replace this match.
4569+
*/
4570+
nmatches++;
4571+
if (n > 0 && nmatches != n)
4572+
{
4573+
/*
4574+
* No, so advance search_start, but not start_ptr/data_pos. (Thus,
4575+
* we treat the matched text as if it weren't matched, and copy it
4576+
* to the output later.)
4577+
*/
4578+
search_start = pmatch[0].rm_eo;
4579+
if (pmatch[0].rm_so == pmatch[0].rm_eo)
4580+
search_start++;
4581+
continue;
4582+
}
4583+
45634584
/*
45644585
* Copy the text to the left of the match position. Note we are given
45654586
* character not byte indexes.
@@ -4596,9 +4617,9 @@ replace_text_regexp(text *src_text, void *regexp,
45964617
data_pos = pmatch[0].rm_eo;
45974618

45984619
/*
4599-
* When global option is off, replace the first instance only.
4620+
* If we only want to replace one occurrence, we're done.
46004621
*/
4601-
if (!glob)
4622+
if (n > 0)
46024623
break;
46034624

46044625
/*

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 202107261
56+
#define CATALOG_VERSION_NO 202108031
5757

5858
#endif

src/include/catalog/pg_proc.dat

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,6 +3565,18 @@
35653565
{ oid => '2285', descr => 'replace text using regexp',
35663566
proname => 'regexp_replace', prorettype => 'text',
35673567
proargtypes => 'text text text text', prosrc => 'textregexreplace' },
3568+
{ oid => '9611', descr => 'replace text using regexp',
3569+
proname => 'regexp_replace', prorettype => 'text',
3570+
proargtypes => 'text text text int4 int4 text',
3571+
prosrc => 'textregexreplace_extended' },
3572+
{ oid => '9612', descr => 'replace text using regexp',
3573+
proname => 'regexp_replace', prorettype => 'text',
3574+
proargtypes => 'text text text int4 int4',
3575+
prosrc => 'textregexreplace_extended_no_flags' },
3576+
{ oid => '9613', descr => 'replace text using regexp',
3577+
proname => 'regexp_replace', prorettype => 'text',
3578+
proargtypes => 'text text text int4',
3579+
prosrc => 'textregexreplace_extended_no_n' },
35683580
{ oid => '3396', descr => 'find first match for regexp',
35693581
proname => 'regexp_match', prorettype => '_text', proargtypes => 'text text',
35703582
prosrc => 'regexp_match_no_flags' },
@@ -3579,6 +3591,58 @@
35793591
proname => 'regexp_matches', prorows => '10', proretset => 't',
35803592
prorettype => '_text', proargtypes => 'text text text',
35813593
prosrc => 'regexp_matches' },
3594+
{ oid => '9614', descr => 'count regexp matches',
3595+
proname => 'regexp_count', prorettype => 'int4', proargtypes => 'text text',
3596+
prosrc => 'regexp_count_no_start' },
3597+
{ oid => '9615', descr => 'count regexp matches',
3598+
proname => 'regexp_count', prorettype => 'int4',
3599+
proargtypes => 'text text int4', prosrc => 'regexp_count_no_flags' },
3600+
{ oid => '9616', descr => 'count regexp matches',
3601+
proname => 'regexp_count', prorettype => 'int4',
3602+
proargtypes => 'text text int4 text', prosrc => 'regexp_count' },
3603+
{ oid => '9617', descr => 'position of regexp match',
3604+
proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text',
3605+
prosrc => 'regexp_instr_no_start' },
3606+
{ oid => '9618', descr => 'position of regexp match',
3607+
proname => 'regexp_instr', prorettype => 'int4',
3608+
proargtypes => 'text text int4', prosrc => 'regexp_instr_no_n' },
3609+
{ oid => '9619', descr => 'position of regexp match',
3610+
proname => 'regexp_instr', prorettype => 'int4',
3611+
proargtypes => 'text text int4 int4', prosrc => 'regexp_instr_no_endoption' },
3612+
{ oid => '9620', descr => 'position of regexp match',
3613+
proname => 'regexp_instr', prorettype => 'int4',
3614+
proargtypes => 'text text int4 int4 int4',
3615+
prosrc => 'regexp_instr_no_flags' },
3616+
{ oid => '9621', descr => 'position of regexp match',
3617+
proname => 'regexp_instr', prorettype => 'int4',
3618+
proargtypes => 'text text int4 int4 int4 text',
3619+
prosrc => 'regexp_instr_no_subexpr' },
3620+
{ oid => '9622', descr => 'position of regexp match',
3621+
proname => 'regexp_instr', prorettype => 'int4',
3622+
proargtypes => 'text text int4 int4 int4 text int4',
3623+
prosrc => 'regexp_instr' },
3624+
{ oid => '9623', descr => 'test for regexp match',
3625+
proname => 'regexp_like', prorettype => 'bool', proargtypes => 'text text',
3626+
prosrc => 'regexp_like_no_flags' },
3627+
{ oid => '9624', descr => 'test for regexp match',
3628+
proname => 'regexp_like', prorettype => 'bool',
3629+
proargtypes => 'text text text', prosrc => 'regexp_like' },
3630+
{ oid => '9625', descr => 'extract substring that matches regexp',
3631+
proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text',
3632+
prosrc => 'regexp_substr_no_start' },
3633+
{ oid => '9626', descr => 'extract substring that matches regexp',
3634+
proname => 'regexp_substr', prorettype => 'text',
3635+
proargtypes => 'text text int4', prosrc => 'regexp_substr_no_n' },
3636+
{ oid => '9627', descr => 'extract substring that matches regexp',
3637+
proname => 'regexp_substr', prorettype => 'text',
3638+
proargtypes => 'text text int4 int4', prosrc => 'regexp_substr_no_flags' },
3639+
{ oid => '9628', descr => 'extract substring that matches regexp',
3640+
proname => 'regexp_substr', prorettype => 'text',
3641+
proargtypes => 'text text int4 int4 text',
3642+
prosrc => 'regexp_substr_no_subexpr' },
3643+
{ oid => '9629', descr => 'extract substring that matches regexp',
3644+
proname => 'regexp_substr', prorettype => 'text',
3645+
proargtypes => 'text text int4 int4 text int4', prosrc => 'regexp_substr' },
35823646
{ oid => '2088', descr => 'split string by field_sep and return field_num',
35833647
proname => 'split_part', prorettype => 'text',
35843648
proargtypes => 'text text int4', prosrc => 'split_part' },

src/include/utils/varlena.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ extern bool SplitDirectoriesString(char *rawstring, char separator,
3434
extern bool SplitGUCList(char *rawstring, char separator,
3535
List **namelist);
3636
extern text *replace_text_regexp(text *src_text, void *regexp,
37-
text *replace_text, bool glob);
37+
text *replace_text,
38+
int search_start, int n);
3839

3940
#endif

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy